# Qwen3-32B (dense)   [tp4, GMU0.8, MBT8192]   TPU v6e-4, vLLM 0.20.0, KV=fp8

## PREFILL (conc=1, out=1)
# ctx  p50_ttft_ms  p99_ttft_ms  prefill_tok_s
   512        37.81        38.13      13540.9
  1024        57.62        58.22      17772.4
  2048       104.24       105.61      19647.8
  4096       207.77       209.71      19714.3
  8192       460.58       463.20      17786.2

## DECODE (out=128, max-conc=bs)
# ctx   bs  p50_tpot_ms  p99_tpot_ms  agg_tok_s(bs*1e3/tpot)  req_s
  1024    1        17.90        17.93                   55.9   0.43
  1024    4        18.20        19.39                  219.8   1.58
  1024   16        22.37        27.41                  715.1   4.57
  1024   64        58.40        65.28                 1096.0   7.59
  4096    1        19.06        19.25                   52.5   0.40
  4096    4        21.44        25.18                  186.5   1.20
  4096   16        37.71       130.61                  424.3   1.82
  4096   64        82.54        92.82                  775.4   2.84

## E2E (ISL/OSL=1024/1024)
# rate  req_s  out_tok_s  TTFT p50/p99(ms)  TPOT p50/p99(ms)  E2E p50/p99(ms)
 0.15   0.14     143.8  115/2243  18.0/18.3  18542/20566
  0.3   0.26     270.5  115/1042  18.9/20.1  19493/20698
 0.45   0.37     379.8  116/295  19.5/20.9  20201/21492
  0.6   0.45     465.8  123/3983  21.0/28.1  21651/28822
  inf   0.88     901.1  1190/9249  43.2/44.0  45401/45443
