# Qwen3-30B-A3B (MoE, 3B active)   [tp4, GMU0.8, MBT8192]   TPU v6e-4, vLLM 0.20.0, KV=fp8

## PREFILL (conc=1, out=1)
# ctx  p50_ttft_ms  p99_ttft_ms  prefill_tok_s
   512        35.02        35.35      14618.4
  1024        50.97        51.44      20090.6
  2048        84.02        85.46      24374.3
  4096       157.16       158.49      26063.1
  8192       317.92       322.08      25767.7

## DECODE (out=128, max-conc=bs)
# ctx   bs  p50_tpot_ms  p99_tpot_ms  agg_tok_s(bs*1e3/tpot)  req_s
  1024    1         6.98         7.10                  143.4   1.07
  1024    4         8.78         9.93                  455.5   3.12
  1024   16        14.94        17.47                 1071.3   3.86
  1024   64        48.10       152.69                 1330.5   7.50
  4096    1         6.99         7.11                  143.0   0.96
  4096    4         9.74        12.94                  410.7   2.30
  4096   16        22.27        28.53                  718.5   4.12
  4096   64        96.76       213.91                  661.4   4.25

## E2E (ISL/OSL=1024/1024)
# rate  req_s  out_tok_s  TTFT p50/p99(ms)  TPOT p50/p99(ms)  E2E p50/p99(ms)
  0.2   0.19     197.3  90/2682  7.9/9.0  8233/11090
  0.4   0.36     365.9  92/190  9.2/10.1  9472/10444
  0.6   0.52     536.6  94/1331  11.2/13.8  11572/14202
  0.8   0.62     635.3  116/6335  18.8/28.7  24647/29578
  inf   1.27    1303.0  712/1092  22.3/22.8  23558/23572
