Skip to content

Commit 64925c5

Browse files
authored
Run latency and throughput benchmark for Qwen3 and Gemma3 (#86)
Signed-off-by: Huy Do <[email protected]>
1 parent ee0085b commit 64925c5

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

.github/workflows/vllm-benchmark.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ name: vLLM Benchmark
22

33
on:
44
schedule:
5-
# Run every 6 hours
6-
- cron: '0 */6 * * *'
5+
# Run every 12 hours
6+
- cron: '0 */12 * * *'
77
workflow_dispatch:
88
inputs:
99
vllm_branch:

vllm-benchmarks/benchmarks/cuda/latency-tests.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,5 +105,27 @@
105105
"num_iters": 15,
106106
"max_model_len": 8192
107107
}
108+
},
109+
{
110+
"test_name": "latency_gemma_3_27b_it_tp8",
111+
"parameters": {
112+
"model": "google/gemma-3-27b-it",
113+
"tensor_parallel_size": 8,
114+
"load_format": "dummy",
115+
"num_iters_warmup": 5,
116+
"num_iters": 15,
117+
"max_model_len": 8192
118+
}
119+
},
120+
{
121+
"test_name": "latency_qwen3_30b_a3b_tp8",
122+
"parameters": {
123+
"model": "Qwen/Qwen3-30B-A3B",
124+
"tensor_parallel_size": 8,
125+
"load_format": "dummy",
126+
"num_iters_warmup": 5,
127+
"num_iters": 15,
128+
"max_model_len": 8192
129+
}
108130
}
109131
]

vllm-benchmarks/benchmarks/cuda/throughput-tests.json

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,5 +115,29 @@
115115
"backend": "vllm",
116116
"max_model_len": 8192
117117
}
118+
},
119+
{
120+
"test_name": "throughput_gemma_3_27b_it_tp8",
121+
"parameters": {
122+
"model": "google/gemma-3-27b-it",
123+
"tensor_parallel_size": 8,
124+
"load_format": "dummy",
125+
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
126+
"num_prompts": 200,
127+
"backend": "vllm",
128+
"max_model_len": 8192
129+
}
130+
},
131+
{
132+
"test_name": "throughput_qwen3_30b_a3b_tp8",
133+
"parameters": {
134+
"model": "Qwen/Qwen3-30B-A3B",
135+
"tensor_parallel_size": 8,
136+
"load_format": "dummy",
137+
"dataset": "./ShareGPT_V3_unfiltered_cleaned_split.json",
138+
"num_prompts": 200,
139+
"backend": "vllm",
140+
"max_model_len": 8192
141+
}
118142
}
119143
]

0 commit comments

Comments
 (0)