# Qwen3.5-4B   [tp1, GDN, MBT2048]   TPU v6e-4, vLLM 0.20.0, KV=fp8

## PREFILL (conc=1, out=1)
# ctx  p50_ttft_ms  p99_ttft_ms  prefill_tok_s
   512       432.43       433.28       1184.0
  1024       452.04       453.11       2265.3
  2048       495.40       497.01       4134.0
  4096       982.95       986.74       4167.1
  8192      1961.47      1969.91       4176.5

## DECODE (out=128, max-conc=bs)
# ctx   bs  p50_tpot_ms  p99_tpot_ms  agg_tok_s(bs*1e3/tpot)  req_s
  1024    1        10.72        10.72                   93.3   0.55
  1024    4        16.18        18.31                  247.3   1.42
  1024   16        41.39       647.30                  386.6   0.99
  1024   64        80.50        97.15                  795.0   1.94
  4096    1        10.76        10.77                   92.9   0.43
  4096    4        25.32        34.06                  158.0   0.71
  4096   16        84.98        89.23                  188.3   0.86
  4096   64        84.97        89.23                  753.2   0.86

## E2E (ISL/OSL=1024/1024)
# rate  req_s  out_tok_s  TTFT p50/p99(ms)  TPOT p50/p99(ms)  E2E p50/p99(ms)
  0.2   0.19     193.6  509/33147  12.3/15.4  13083/48882
  0.3   0.28     282.7  507/1185  12.8/14.3  13665/15097
  0.4   0.36     367.0  506/1099  13.4/15.7  14369/16598
 0.45   0.40     407.3  507/1141  13.8/15.9  14609/16802
  inf   0.14     143.0  6762/219855  133.8/237.2  206829/285887
