Presidentlin commited on
Commit
90ec376
·
1 Parent(s): 3999c2c
src/lib/benchmarks/ index.ts CHANGED
@@ -4,6 +4,7 @@ import { googleBenchmarks } from "./google";
4
  import { anthropicBenchmarks } from "./anthropic";
5
  import { openaiBenchmarks } from "./openai";
6
  import { deepseekBenchmarks } from "./deepseek";
 
7
 
8
 
9
  export const benchmarkData: Benchmark[] = [
@@ -12,5 +13,5 @@ export const benchmarkData: Benchmark[] = [
12
  ...anthropicBenchmarks,
13
  ...openaiBenchmarks,
14
  ...deepseekBenchmarks,
15
-
16
  ];
 
4
  import { anthropicBenchmarks } from "./anthropic";
5
  import { openaiBenchmarks } from "./openai";
6
  import { deepseekBenchmarks } from "./deepseek";
7
+ import { qwenBenchmarks } from "./qwen";
8
 
9
 
10
  export const benchmarkData: Benchmark[] = [
 
13
  ...anthropicBenchmarks,
14
  ...openaiBenchmarks,
15
  ...deepseekBenchmarks,
16
+ ...qwenBenchmarks,
17
  ];
src/lib/benchmarks/qwen.ts ADDED
@@ -0,0 +1,720 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { Benchmark } from "./types";
3
+
4
+ export const qwenBenchmarks: Benchmark[] = [
5
+ {
6
+ model: "Qwen3-235B-A22B (Base Model)",
7
+ provider: "Qwen",
8
+ inputPrice: 0,
9
+ outputPrice: 0,
10
+ benchmark: {
11
+ mmlu: 87.81,
12
+ //mmluredux: 87.40,
13
+ ////"mmlu-pro": 68.18,
14
+ ////supergpqa: 44.06,
15
+ ////bbh: 88.87,
16
+ gpqa: 47.47,
17
+ ////gsm8k: 94.39,
18
+ ////math: 71.84,
19
+ ////evalplus: 77.60,
20
+ //multiple: 65.94,
21
+ mbpp: 81.40,
22
+ //cruxo: 79.00,
23
+ ////mgsm: 83.53,
24
+ mmmlu: 86.70,
25
+ ////include: 73.46,
26
+ },
27
+ source: "",
28
+ },
29
+ {
30
+ model: "Qwen3-32B (Base Model)",
31
+ provider: "Qwen",
32
+ inputPrice: 0,
33
+ outputPrice: 0,
34
+ benchmark: {
35
+ mmlu: 83.61,
36
+ //"mmlu-redux": 83.41,
37
+ //"mmlu-pro": 65.54,
38
+ //supergpqa: 39.78,
39
+ //bbh: 87.38,
40
+ gpqa: 49.49,
41
+ //gsm8k: 93.40,
42
+ //math: 61.62,
43
+ //evalplus: 72.05,
44
+ //"multipl-e":: 67.06,
45
+ mbpp: 78.20,
46
+ // "crux-o":: 72.50,
47
+ //mgsm: 83.06,
48
+ mmmlu: 83.83,
49
+ //include: 67.87,
50
+ },
51
+ source: "",
52
+ },
53
+ {
54
+ model: "Qwen3-14B (Base Model)",
55
+ provider: "Qwen",
56
+ inputPrice: 0,
57
+ outputPrice: 0,
58
+ benchmark: {
59
+ mmlu: 81.05,
60
+ //"mmlu-redux": 79.88,
61
+ //"mmlu-pro": 61.03,
62
+ //supergpqa: 34.27,
63
+ //bbh: 81.07,
64
+ gpqa: 39.90,
65
+ //gsm8k: 92.49,
66
+ //math: 62.02,
67
+ //evalplus: 72.23,
68
+ //"multipl-e":: 61.69,
69
+ mbpp: 73.40,
70
+ // "crux-o":: 68.60,
71
+ //mgsm: 79.20,
72
+ mmmlu: 81.46,
73
+ //include: 64.55,
74
+ },
75
+ source: "",
76
+ },
77
+ {
78
+ model: "Qwen3-30B-A3B (Base Model)",
79
+ provider: "Qwen",
80
+ inputPrice: 0,
81
+ outputPrice: 0,
82
+ benchmark: {
83
+ mmlu: 81.38,
84
+ //"mmlu-redux": 81.17,
85
+ //"mmlu-pro": 61.49,
86
+ //supergpqa: 35.72,
87
+ //bbh: 81.54,
88
+ gpqa: 43.94,
89
+ //gsm8k: 91.81,
90
+ //math: 59.04,
91
+ //evalplus: 71.45,
92
+ //"multipl-e":: 66.53,
93
+ mbpp: 74.40,
94
+ // "crux-o":: 67.20,
95
+ //mgsm: 79.11,
96
+ mmmlu: 81.46,
97
+ //include: 67.00,
98
+ },
99
+ source: "",
100
+ },
101
+ {
102
+ model: "Qwen3-8B (Base Model)",
103
+ provider: "Qwen",
104
+ inputPrice: 0,
105
+ outputPrice: 0,
106
+ benchmark: {
107
+ mmlu: 76.89,
108
+ //"mmlu-redux": 76.17,
109
+ //"mmlu-pro": 56.73,
110
+ //supergpqa: 31.64,
111
+ //bbh: 78.40,
112
+ gpqa: 44.44,
113
+ //gsm8k: 89.84,
114
+ //math: 60.80,
115
+ //evalplus: 67.65,
116
+ //"multipl-e":: 58.75,
117
+ mbpp: 69.80,
118
+ // "crux-o":: 62.00,
119
+ //mgsm: 76.02,
120
+ mmmlu: 75.72,
121
+ //include: 59.40,
122
+ },
123
+ source: "",
124
+ },
125
+ {
126
+ model: "Qwen3-4B (Base Model)",
127
+ provider: "Qwen",
128
+ inputPrice: 0,
129
+ outputPrice: 0,
130
+ benchmark: {
131
+ mmlu: 72.99,
132
+ //"mmlu-redux": 72.79,
133
+ //"mmlu-pro": 50.58,
134
+ //supergpqa: 28.43,
135
+ //bbh: 72.59,
136
+ gpqa: 36.87,
137
+ //gsm8k: 87.79,
138
+ //math: 54.10,
139
+ //evalplus: 63.53,
140
+ //"multipl-e":: 53.13,
141
+ mbpp: 67.00,
142
+ // "crux-o":: 55.00,
143
+ //mgsm: 67.74,
144
+ mmmlu: 71.42,
145
+ //include: 56.29,
146
+ },
147
+ source: "",
148
+ },
149
+ {
150
+ model: "Qwen3-1.7B (Base Model)",
151
+ provider: "Qwen",
152
+ inputPrice: 0,
153
+ outputPrice: 0,
154
+ benchmark: {
155
+ mmlu: 62.63,
156
+ //"mmlu-redux": 61.66,
157
+ //"mmlu-pro": 36.76,
158
+ //supergpqa: 20.92,
159
+ //bbh: 54.47,
160
+ gpqa: 28.28,
161
+ //gsm8k: 75.44,
162
+ //math: 43.50,
163
+ //evalplus: 52.70,
164
+ //"multipl-e":: 42.71,
165
+ mbpp: 55.40,
166
+ // "crux-o":: 36.40,
167
+ //mgsm: 50.71,
168
+ mmmlu: 63.27,
169
+ //include: 45.57,
170
+ },
171
+ source: "",
172
+ },
173
+ {
174
+ model: "Qwen3-0.6B (Base Model)",
175
+ provider: "Qwen",
176
+ inputPrice: 0,
177
+ outputPrice: 0,
178
+ benchmark: {
179
+ mmlu: 52.81,
180
+ //"mmlu-redux": 51.26,
181
+ //"mmlu-pro": 24.74,
182
+ //supergpqa: 15.03,
183
+ //bbh: 41.47,
184
+ gpqa: 26.77,
185
+ //gsm8k: 59.59,
186
+ //math: 32.44,
187
+ //evalplus: 36.23,
188
+ //"multipl-e":: 24.58,
189
+ mbpp: 36.60,
190
+ // "crux-o":: 27.00,
191
+ //mgsm: 30.99,
192
+ mmmlu: 50.16,
193
+ //include: 34.26,
194
+ },
195
+ source: "",
196
+ },
197
+ {
198
+ model: "Qwen3-235B-A22B (Thinking Mode)",
199
+ provider: "Qwen",
200
+ inputPrice: 0,
201
+ outputPrice: 0,
202
+ benchmark: {
203
+ //"mmlu-redux": 92.7,
204
+ gpqa_diamond: 71.1,
205
+ //"c-eval": 89.6,
206
+ //"livebench-2024-11-25": 77.1,
207
+ //"ifeval-strict-prompt": 83.4,
208
+
209
+ //"arena-hard": 95.6,
210
+ //"alignbench-v1.1": 8.94,
211
+ //"creative-writing-v3": 84.6,
212
+ //writingbench: 8.03,
213
+ //"math-500": 98.0,
214
+ aime_24: 85.7,
215
+ aime_2025: 81.5,
216
+ //zebralogic: 80.3,
217
+ //autologi: 89.0,
218
+ //"bfcl-v3": 70.8,
219
+ //"livecodebench-v5": 70.7,
220
+ //"codeforces-rating": 2056,
221
+ //"codeforces-percentile": 98.2,
222
+ //"multi-if": 71.9,
223
+ //include: 78.7,
224
+ // "mmmlu-14-languages": 84.3,
225
+ //"mt-aime2024": 80.8,
226
+ //poly//math: 54.7,
227
+ //mlogiqa: 77.1,
228
+ },
229
+ source: "",
230
+ },
231
+ {
232
+ model: "Qwen3-235B-A22B (Non-thinking Mode)",
233
+ provider: "Qwen",
234
+ inputPrice: 0,
235
+ outputPrice: 0,
236
+ benchmark: {
237
+ //"mmlu-redux": 89.2,
238
+ gpqa_diamond: 62.9,
239
+ //"c-eval": 86.1,
240
+ //"livebench-2024-11-25": 62.5,
241
+ //"ifeval-strict-prompt": 83.2,
242
+ //"arena-hard": 96.1,
243
+ //"alignbench-v1.1": 8.91,
244
+ //"creative-writing-v3": 80.4,
245
+ //writingbench: 7.70,
246
+ //"math-500": 91.2,
247
+
248
+ aime_24: 40.1,
249
+
250
+ aime_2025: 24.7,
251
+ //zebralogic: 37.7,
252
+ //autologi: 83.3,
253
+ //"bfcl-v3": 68.0,
254
+ //"livecodebench-v5": 35.3,
255
+ //"codeforces-rating": 1387,
256
+ //"codeforces-percentile": 75.7,
257
+ //"multi-if": 70.2,
258
+ //include: 75.6,
259
+ // "mmmlu-14-languages": 79.8,
260
+ //"mt-aime2024": 32.4,
261
+ //poly//math: 27.0,
262
+ //mlogiqa: 67.6,
263
+ },
264
+ source: "",
265
+ },
266
+ {
267
+ model: "Qwen3-32B (Thinking Mode)",
268
+ provider: "Qwen",
269
+ inputPrice: 0,
270
+ outputPrice: 0,
271
+ benchmark: {
272
+ //"mmlu-redux": 90.9,
273
+ gpqa_diamond: 68.4,
274
+ //"c-eval": 87.3,
275
+ //"livebench-2024-11-25": 74.9,
276
+ //"ifeval-strict-prompt": 85.0,
277
+ //"arena-hard": 93.8,
278
+ //"alignbench-v1.1": 8.72,
279
+ //"creative-writing-v3": 81.0,
280
+ //writingbench: 7.90,
281
+ //"math-500": 97.2,
282
+ aime_24: 81.4,
283
+ aime_2025: 72.9,
284
+ //zebralogic: 88.8,
285
+ //autologi: 87.3,
286
+ //"bfcl-v3": 70.3,
287
+ //"livecodebench-v5": 65.7,
288
+ //"codeforces-rating": 1977,
289
+ //"codeforces-percentile": 97.7,
290
+ //"multi-if": 73.0,
291
+ //include: 73.7,
292
+ // "mmmlu-14-languages": 80.6,
293
+ //"mt-aime2024": 75.0,
294
+ //poly//math: 47.4,
295
+ //mlogiqa: 76.3,
296
+ },
297
+ source: "",
298
+ },
299
+ {
300
+ model: "Qwen3-32B (Non-thinking Mode)",
301
+ provider: "Qwen",
302
+ inputPrice: 0,
303
+ outputPrice: 0,
304
+ benchmark: {
305
+ //"mmlu-redux": 85.7,
306
+ gpqa_diamond: 54.6,
307
+ //"c-eval": 83.3,
308
+ //"livebench-2024-11-25": 59.8,
309
+ //"ifeval-strict-prompt": 83.2,
310
+ //"arena-hard": 92.8,
311
+ //"alignbench-v1.1": 8.58,
312
+ //"creative-writing-v3": 78.3,
313
+ //writingbench: 7.54,
314
+ //"math-500": 88.6,
315
+ aime_24: 31.0,
316
+ aime_2025: 20.2,
317
+ //zebralogic: 29.2,
318
+ //autologi: 78.5,
319
+ //"bfcl-v3": 63.0,
320
+
321
+ //"livecodebench-v5": 31.3,
322
+ //"codeforces-rating": 1353,
323
+ //"codeforces-percentile": 71.0,
324
+ //"multi-if": 70.7,
325
+ //include: 70.9,
326
+ // "mmmlu-14-languages": 76.5,
327
+ //"mt-aime2024": 24.1,
328
+ //poly//math: 22.5,
329
+ //mlogiqa: 62.9,
330
+ },
331
+ source: "",
332
+ },
333
+ {
334
+ model: "Qwen3-14B (Thinking Mode)",
335
+ provider: "Qwen",
336
+ inputPrice: 0,
337
+ outputPrice: 0,
338
+ benchmark: {
339
+ //"mmlu-redux": 88.6,
340
+ gpqa_diamond: 64.0,
341
+ //"c-eval": 86.2,
342
+ //"livebench-2024-11-25": 71.3,
343
+ //"ifeval-strict-prompt": 85.4,
344
+ //"arena-hard": 91.7,
345
+ //"alignbench-v1.1": 8.56,
346
+ //"creative-writing-v3": 80.3,
347
+ //writingbench: 7.80,
348
+ //"math-500": 96.8,
349
+ aime_24: 79.3,
350
+ aime_2025: 70.4,
351
+ //zebralogic: 88.5,
352
+ //autologi: 89.2,
353
+ //"bfcl-v3": 70.4,
354
+ //"livecodebench-v5": 63.5,
355
+ //"codeforces-rating": 1766,
356
+ //"codeforces-percentile": 95.3,
357
+ //"multi-if": 74.8,
358
+ //include: 71.7,
359
+ // "mmmlu-14-languages": 77.9,
360
+ //"mt-aime2024": 73.3,
361
+ //poly//math: 45.8,
362
+ //mlogiqa: 71.1,
363
+ },
364
+ source: "",
365
+ },
366
+ {
367
+ model: "Qwen3-30B-A3B (Thinking Mode)",
368
+ provider: "Qwen",
369
+ inputPrice: 0,
370
+ outputPrice: 0,
371
+ benchmark: {
372
+ //"mmlu-redux": 89.5,
373
+ gpqa_diamond: 65.8,
374
+ //"c-eval": 86.6,
375
+ //"livebench-2024-11-25": 74.3,
376
+ //"ifeval-strict-prompt": 86.5,
377
+ //"arena-hard": 91.0,
378
+ //"alignbench-v1.1": 8.70,
379
+ //"creative-writing-v3": 79.1,
380
+ //writingbench: 7.70,
381
+ //"math-500": 98.0,
382
+ aime_24: 80.4,
383
+ aime_2025: 70.9,
384
+ //zebralogic: 89.5,
385
+ //autologi: 88.7,
386
+ //"bfcl-v3": 69.1,
387
+ //"livecodebench-v5": 62.6,
388
+ //"codeforces-rating": 1974,
389
+ //"codeforces-percentile": 97.7,
390
+ //"multi-if": 72.2,
391
+ //include: 71.9,
392
+ // "mmmlu-14-languages": 78.4,
393
+ //"mt-aime2024": 73.9,
394
+ //poly//math: 46.1,
395
+ //mlogiqa: 70.1,
396
+ },
397
+ source: "",
398
+ },
399
+ {
400
+ model: "Qwen3-14B (Non-thinking Mode)",
401
+ provider: "Qwen",
402
+ inputPrice: 0,
403
+ outputPrice: 0,
404
+ benchmark: {
405
+ //"mmlu-redux": 82.0,
406
+ gpqa_diamond: 54.8,
407
+ //"c-eval": 81.0,
408
+ //"livebench-2024-11-25": 59.6,
409
+ //"ifeval-strict-prompt": 84.8,
410
+ //"arena-hard": 86.3,
411
+ //"alignbench-v1.1": 8.52,
412
+ //"creative-writing-v3": 73.1,
413
+ //writingbench: 7.24,
414
+ //"math-500": 90.0,
415
+ aime_24: 31.7,
416
+ aime_2025: 23.3,
417
+ //zebralogic: 33.0,
418
+ //autologi: 82.0,
419
+ //"bfcl-v3": 61.5,
420
+ //"livecodebench-v5": 29.0,
421
+ //"codeforces-rating": 1200,
422
+ //"codeforces-percentile": 58.6,
423
+ //"multi-if": 72.9,
424
+ //include: 67.8,
425
+ // "mmmlu-14-languages": 72.6,
426
+ //"mt-aime2024": 23.2,
427
+ //poly//math: 22.0,
428
+ //mlogiqa: 58.9,
429
+ },
430
+ source: "",
431
+ },
432
+ {
433
+ model: "Qwen3-30B-A3B (Non-thinking Mode)",
434
+ provider: "Qwen",
435
+ inputPrice: 0,
436
+ outputPrice: 0,
437
+ benchmark: {
438
+ //"mmlu-redux": 84.1,
439
+ gpqa_diamond: 54.8,
440
+ //"c-eval": 82.9,
441
+ //"livebench-2024-11-25": 59.4,
442
+ //"ifeval-strict-prompt": 83.7,
443
+ //"arena-hard": 88.0,
444
+ //"alignbench-v1.1": 8.55,
445
+ //"creative-writing-v3": 68.1,
446
+ //writingbench: 7.22,
447
+ //"math-500": 89.8,
448
+ aime_24: 32.8,
449
+ aime_2025: 21.6,
450
+ //zebralogic: 33.2,
451
+ //autologi: 81.5,
452
+ //"bfcl-v3": 58.6,
453
+ //"livecodebench-v5": 29.8,
454
+ //"codeforces-rating": 1267,
455
+ //"codeforces-percentile": 64.1,
456
+ //"multi-if": 70.8,
457
+ //include: 67.8,
458
+ // "mmmlu-14-languages": 73.8,
459
+ //"mt-aime2024": 24.6,
460
+ //poly//math: 23.3,
461
+ //mlogiqa: 53.3,
462
+ },
463
+ source: "",
464
+ },
465
+ {
466
+ model: "Qwen3-4B (Thinking Mode)",
467
+ provider: "Qwen",
468
+ inputPrice: 0,
469
+ outputPrice: 0,
470
+ benchmark: {
471
+ //"mmlu-redux": 83.7,
472
+ gpqa_diamond: 55.9,
473
+ //"c-eval": 77.5,
474
+ //"livebench-2024-11-25": 63.6,
475
+ //"ifeval-strict-prompt": 81.9,
476
+ //"arena-hard": 76.6,
477
+ //"alignbench-v1.1": 8.30,
478
+ //"creative-writing-v3": 61.1,
479
+ //writingbench: 7.35,
480
+ //"math-500": 97.0,
481
+ aime_24: 73.8,
482
+ aime_2025: 65.6,
483
+ //zebralogic: 81.0,
484
+ //autologi: 87.9,
485
+ //"bfcl-v3": 65.9,
486
+ //"livecodebench-v5": 54.2,
487
+ //"codeforces-rating": 1671,
488
+ //"codeforces-percentile": 92.8,
489
+ //"multi-if": 66.3,
490
+ //include: 61.8,
491
+ // "mmmlu-14-languages": 69.8,
492
+ //"mt-aime2024": 60.7,
493
+ //poly//math: 40.0,
494
+ //mlogiqa: 65.9,
495
+ },
496
+ source: "",
497
+ },
498
+ {
499
+ model: "Qwen3-8B (Thinking Mode)",
500
+ provider: "Qwen",
501
+ inputPrice: 0,
502
+ outputPrice: 0,
503
+ benchmark: {
504
+ //"mmlu-redux": 87.5,
505
+ gpqa_diamond: 62.0,
506
+ //"c-eval": 83.4,
507
+ //"livebench-2024-11-25": 67.1,
508
+ //"ifeval-strict-prompt": 85.0,
509
+ //"arena-hard": 85.8,
510
+ //"alignbench-v1.1": 8.46,
511
+ //"creative-writing-v3": 75.0,
512
+ //writingbench: 7.59,
513
+ //"math-500": 97.4,
514
+ aime_24: 76.0,
515
+ aime_2025: 67.3,
516
+ //zebralogic: 84.8,
517
+ //autologi: 89.1,
518
+ //"bfcl-v3": 68.1,
519
+ //"livecodebench-v5": 57.5,
520
+ //"codeforces-rating": 1785,
521
+ //"codeforces-percentile": 95.6,
522
+ //"multi-if": 71.2,
523
+ //include: 67.8,
524
+ // "mmmlu-14-languages": 74.4,
525
+ //"mt-aime2024": 65.4,
526
+ //poly//math: 42.7,
527
+ //mlogiqa: 69.0,
528
+ },
529
+ source: "",
530
+ },
531
+ {
532
+ model: "Qwen3-4B (Non-thinking Mode)",
533
+ provider: "Qwen",
534
+ inputPrice: 0,
535
+ outputPrice: 0,
536
+ benchmark: {
537
+ //"mmlu-redux": 77.3,
538
+ gpqa_diamond: 41.7,
539
+ //"c-eval": 72.2,
540
+ //"livebench-2024-11-25": 48.4,
541
+ //"ifeval-strict-prompt": 81.2,
542
+ //"arena-hard": 66.2,
543
+ //"alignbench-v1.1": 8.10,
544
+ //"creative-writing-v3": 53.6,
545
+ //writingbench: 6.85,
546
+ //"math-500": 84.8,
547
+ aime_24: 25.0,
548
+ aime_2025: 19.1,
549
+ //zebralogic: 35.2,
550
+ //autologi: 76.3,
551
+ //"bfcl-v3": 57.6,
552
+ //"livecodebench-v5": 21.3,
553
+ //"codeforces-rating": 842,
554
+ //"codeforces-percentile": 33.7,
555
+ //"multi-if": 61.3,
556
+ //include: 53.8,
557
+ // "mmmlu-14-languages": 61.7,
558
+ //"mt-aime2024": 13.9,
559
+ //poly//math: 16.6,
560
+ //mlogiqa: 49.9,
561
+ },
562
+ source: "",
563
+ },
564
+ {
565
+ model: "Qwen3-8B (Non-thinking Mode)",
566
+ provider: "Qwen",
567
+ inputPrice: 0,
568
+ outputPrice: 0,
569
+ benchmark: {
570
+ //"mmlu-redux": 79.5,
571
+ gpqa_diamond: 39.3,
572
+ //"c-eval": 77.9,
573
+ //"livebench-2024-11-25": 53.5,
574
+ //"ifeval-strict-prompt": 83.0,
575
+ //"arena-hard": 79.6,
576
+ //"alignbench-v1.1": 8.38,
577
+ //"creative-writing-v3": 64.5,
578
+ //writingbench: 7.15,
579
+ //"math-500": 87.4,
580
+ aime_24: 29.1,
581
+ aime_2025: 20.9,
582
+ //zebralogic: 26.7,
583
+ //autologi: 76.5,
584
+ //"bfcl-v3": 60.2,
585
+ //"livecodebench-v5": 22.8,
586
+ //"codeforces-rating": 1110,
587
+ //"codeforces-percentile": 52.4,
588
+ //"multi-if": 69.2,
589
+ //include: 62.5,
590
+ // "mmmlu-14-languages": 66.9,
591
+ //"mt-aime2024": 16.6,
592
+ //poly//math: 18.8,
593
+ //mlogiqa: 51.4,
594
+ },
595
+ source: "",
596
+ },
597
+ {
598
+ model: "Qwen3-0.6B (Thinking Mode)",
599
+ provider: "Qwen",
600
+ inputPrice: 0,
601
+ outputPrice: 0,
602
+ benchmark: {
603
+ //"mmlu-redux": 55.6,
604
+ gpqa_diamond: 27.9,
605
+ //"c-eval": 50.4,
606
+ //"livebench-2024-11-25": 30.3,
607
+ //"ifeval-strict-prompt": 59.2,
608
+ //"arena-hard": 8.5,
609
+ //"alignbench-v1.1": 6.10,
610
+ //"creative-writing-v3": 30.6,
611
+ //writingbench: 5.61,
612
+ //"math-500": 77.6,
613
+ aime_24: 10.7,
614
+ aime_2025: 15.1,
615
+ //zebralogic: 30.3,
616
+ //autologi: 61.6,
617
+ //"bfcl-v3": 46.4,
618
+ //"livecodebench-v5": 12.3,
619
+ //"multi-if": 36.1,
620
+ //include: 35.9,
621
+ // "mmmlu-14-languages": 43.1,
622
+ //"mt-aime2024": 7.8,
623
+ //poly//math: 11.4,
624
+ //mlogiqa: 40.9,
625
+ },
626
+ source: "",
627
+ },
628
+ {
629
+ model: "Qwen3-1.7B (Thinking Mode)",
630
+ provider: "Qwen",
631
+ inputPrice: 0,
632
+ outputPrice: 0,
633
+ benchmark: {
634
+ //"mmlu-redux": 73.9,
635
+ gpqa_diamond: 40.1,
636
+ //"c-eval": 68.1,
637
+ //"livebench-2024-11-25": 51.1,
638
+ //"ifeval-strict-prompt": 72.5,
639
+ //"arena-hard": 43.1,
640
+ //"alignbench-v1.1": 7.60,
641
+ //"creative-writing-v3": 48.0,
642
+ //writingbench: 7.02,
643
+ //"math-500": 93.4,
644
+ aime_24: 48.3,
645
+ aime_2025: 36.8,
646
+ //zebralogic: 63.2,
647
+ //autologi: 83.2,
648
+ //"bfcl-v3": 56.6,
649
+ //"livecodebench-v5": 33.2,
650
+ //"multi-if": 51.2,
651
+ //include: 51.8,
652
+ // "mmmlu-14-languages": 59.1,
653
+ //"mt-aime2024": 36.1,
654
+ //poly//math: 25.2,
655
+ //mlogiqa: 56.0,
656
+ },
657
+ source: "",
658
+ },
659
+ {
660
+ model: "Qwen3-0.6B (Non-thinking Mode)",
661
+ provider: "Qwen",
662
+ inputPrice: 0,
663
+ outputPrice: 0,
664
+ benchmark: {
665
+ //"mmlu-redux": 44.6,
666
+ gpqa_diamond: 22.9,
667
+ //"c-eval": 42.6,
668
+ //"livebench-2024-11-25": 21.8,
669
+ //"ifeval-strict-prompt": 54.5,
670
+ //"arena-hard": 6.5,
671
+ //"alignbench-v1.1": 5.60,
672
+ //"creative-writing-v3": 28.4,
673
+ //writingbench: 5.13,
674
+ //"math-500": 55.2,
675
+ aime_24: 3.4,
676
+ aime_2025: 2.6,
677
+ //zebralogic: 4.2,
678
+ //autologi: 37.4,
679
+ //"bfcl-v3": 44.1,
680
+ //"livecodebench-v5": 3.6,
681
+ //"multi-if": 33.3,
682
+ //include: 34.4,
683
+ // "mmmlu-14-languages": 37.1,
684
+ //"mt-aime2024": 1.5,
685
+ //poly//math: 4.6,
686
+ //mlogiqa: 37.3,
687
+ },
688
+ source: "",
689
+ },
690
+ {
691
+ model: "Qwen3-1.7B (Non-thinking Mode)",
692
+ provider: "Qwen",
693
+ inputPrice: 0,
694
+ outputPrice: 0,
695
+ benchmark: {
696
+ //"mmlu-redux": 64.4,
697
+ gpqa_diamond: 28.6,
698
+ //"c-eval": 61.0,
699
+ //"livebench-2024-11-25": 35.6,
700
+ //"ifeval-strict-prompt": 68.2,
701
+ //"arena-hard": 36.9,
702
+ //"alignbench-v1.1": 7.20,
703
+ //"creative-writing-v3": 43.6,
704
+ //writingbench: 6.54,
705
+ //"math-500": 73.0,
706
+ aime_24: 13.4,
707
+ aime_2025: 9.8,
708
+ //zebralogic: 12.8,
709
+ //autologi: 59.8,
710
+ //"bfcl-v3": 52.2,
711
+ //"livecodebench-v5": 11.6,
712
+ //"multi-if": 44.7,
713
+ //include: 42.6,
714
+ // "mmmlu-14-languages": 48.3,
715
+ //"mt-aime2024": 4.9,
716
+ //poly//math: 10.3,
717
+ //mlogiqa: 41.1,
718
+ },
719
+ source: "",
720
+ },];