Commit
·
80fc9d2
1
Parent(s):
90ec376
- src/lib/benchmarks/ index.ts +3 -3
- src/lib/benchmarks/qwen.ts +24 -24
src/lib/benchmarks/ index.ts
CHANGED
@@ -11,7 +11,7 @@ export const benchmarkData: Benchmark[] = [
|
|
11 |
...xaiBenchmarks,
|
12 |
...googleBenchmarks,
|
13 |
...anthropicBenchmarks,
|
14 |
-
|
15 |
-
|
16 |
-
...qwenBenchmarks,
|
17 |
];
|
|
|
11 |
...xaiBenchmarks,
|
12 |
...googleBenchmarks,
|
13 |
...anthropicBenchmarks,
|
14 |
+
...openaiBenchmarks,
|
15 |
+
...deepseekBenchmarks,
|
16 |
+
...qwenBenchmarks,
|
17 |
];
|
src/lib/benchmarks/qwen.ts
CHANGED
@@ -24,7 +24,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
24 |
mmmlu: 86.70,
|
25 |
////include: 73.46,
|
26 |
},
|
27 |
-
source: "",
|
28 |
},
|
29 |
{
|
30 |
model: "Qwen3-32B (Base Model)",
|
@@ -48,7 +48,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
48 |
mmmlu: 83.83,
|
49 |
//include: 67.87,
|
50 |
},
|
51 |
-
source: "",
|
52 |
},
|
53 |
{
|
54 |
model: "Qwen3-14B (Base Model)",
|
@@ -72,7 +72,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
72 |
mmmlu: 81.46,
|
73 |
//include: 64.55,
|
74 |
},
|
75 |
-
source: "",
|
76 |
},
|
77 |
{
|
78 |
model: "Qwen3-30B-A3B (Base Model)",
|
@@ -96,7 +96,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
96 |
mmmlu: 81.46,
|
97 |
//include: 67.00,
|
98 |
},
|
99 |
-
source: "",
|
100 |
},
|
101 |
{
|
102 |
model: "Qwen3-8B (Base Model)",
|
@@ -120,7 +120,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
120 |
mmmlu: 75.72,
|
121 |
//include: 59.40,
|
122 |
},
|
123 |
-
source: "",
|
124 |
},
|
125 |
{
|
126 |
model: "Qwen3-4B (Base Model)",
|
@@ -144,7 +144,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
144 |
mmmlu: 71.42,
|
145 |
//include: 56.29,
|
146 |
},
|
147 |
-
source: "",
|
148 |
},
|
149 |
{
|
150 |
model: "Qwen3-1.7B (Base Model)",
|
@@ -168,7 +168,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
168 |
mmmlu: 63.27,
|
169 |
//include: 45.57,
|
170 |
},
|
171 |
-
source: "",
|
172 |
},
|
173 |
{
|
174 |
model: "Qwen3-0.6B (Base Model)",
|
@@ -192,7 +192,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
192 |
mmmlu: 50.16,
|
193 |
//include: 34.26,
|
194 |
},
|
195 |
-
source: "",
|
196 |
},
|
197 |
{
|
198 |
model: "Qwen3-235B-A22B (Thinking Mode)",
|
@@ -226,7 +226,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
226 |
//poly//math: 54.7,
|
227 |
//mlogiqa: 77.1,
|
228 |
},
|
229 |
-
source: "",
|
230 |
},
|
231 |
{
|
232 |
model: "Qwen3-235B-A22B (Non-thinking Mode)",
|
@@ -261,7 +261,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
261 |
//poly//math: 27.0,
|
262 |
//mlogiqa: 67.6,
|
263 |
},
|
264 |
-
source: "",
|
265 |
},
|
266 |
{
|
267 |
model: "Qwen3-32B (Thinking Mode)",
|
@@ -294,7 +294,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
294 |
//poly//math: 47.4,
|
295 |
//mlogiqa: 76.3,
|
296 |
},
|
297 |
-
source: "",
|
298 |
},
|
299 |
{
|
300 |
model: "Qwen3-32B (Non-thinking Mode)",
|
@@ -328,7 +328,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
328 |
//poly//math: 22.5,
|
329 |
//mlogiqa: 62.9,
|
330 |
},
|
331 |
-
source: "",
|
332 |
},
|
333 |
{
|
334 |
model: "Qwen3-14B (Thinking Mode)",
|
@@ -361,7 +361,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
361 |
//poly//math: 45.8,
|
362 |
//mlogiqa: 71.1,
|
363 |
},
|
364 |
-
source: "",
|
365 |
},
|
366 |
{
|
367 |
model: "Qwen3-30B-A3B (Thinking Mode)",
|
@@ -394,7 +394,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
394 |
//poly//math: 46.1,
|
395 |
//mlogiqa: 70.1,
|
396 |
},
|
397 |
-
source: "",
|
398 |
},
|
399 |
{
|
400 |
model: "Qwen3-14B (Non-thinking Mode)",
|
@@ -427,7 +427,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
427 |
//poly//math: 22.0,
|
428 |
//mlogiqa: 58.9,
|
429 |
},
|
430 |
-
source: "",
|
431 |
},
|
432 |
{
|
433 |
model: "Qwen3-30B-A3B (Non-thinking Mode)",
|
@@ -460,7 +460,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
460 |
//poly//math: 23.3,
|
461 |
//mlogiqa: 53.3,
|
462 |
},
|
463 |
-
source: "",
|
464 |
},
|
465 |
{
|
466 |
model: "Qwen3-4B (Thinking Mode)",
|
@@ -493,7 +493,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
493 |
//poly//math: 40.0,
|
494 |
//mlogiqa: 65.9,
|
495 |
},
|
496 |
-
source: "",
|
497 |
},
|
498 |
{
|
499 |
model: "Qwen3-8B (Thinking Mode)",
|
@@ -526,7 +526,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
526 |
//poly//math: 42.7,
|
527 |
//mlogiqa: 69.0,
|
528 |
},
|
529 |
-
source: "",
|
530 |
},
|
531 |
{
|
532 |
model: "Qwen3-4B (Non-thinking Mode)",
|
@@ -559,7 +559,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
559 |
//poly//math: 16.6,
|
560 |
//mlogiqa: 49.9,
|
561 |
},
|
562 |
-
source: "",
|
563 |
},
|
564 |
{
|
565 |
model: "Qwen3-8B (Non-thinking Mode)",
|
@@ -592,7 +592,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
592 |
//poly//math: 18.8,
|
593 |
//mlogiqa: 51.4,
|
594 |
},
|
595 |
-
source: "",
|
596 |
},
|
597 |
{
|
598 |
model: "Qwen3-0.6B (Thinking Mode)",
|
@@ -623,7 +623,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
623 |
//poly//math: 11.4,
|
624 |
//mlogiqa: 40.9,
|
625 |
},
|
626 |
-
source: "",
|
627 |
},
|
628 |
{
|
629 |
model: "Qwen3-1.7B (Thinking Mode)",
|
@@ -654,7 +654,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
654 |
//poly//math: 25.2,
|
655 |
//mlogiqa: 56.0,
|
656 |
},
|
657 |
-
source: "",
|
658 |
},
|
659 |
{
|
660 |
model: "Qwen3-0.6B (Non-thinking Mode)",
|
@@ -685,7 +685,7 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
685 |
//poly//math: 4.6,
|
686 |
//mlogiqa: 37.3,
|
687 |
},
|
688 |
-
source: "",
|
689 |
},
|
690 |
{
|
691 |
model: "Qwen3-1.7B (Non-thinking Mode)",
|
@@ -716,5 +716,5 @@ export const qwenBenchmarks: Benchmark[] = [
|
|
716 |
//poly//math: 10.3,
|
717 |
//mlogiqa: 41.1,
|
718 |
},
|
719 |
-
source: "",
|
720 |
},];
|
|
|
24 |
mmmlu: 86.70,
|
25 |
////include: 73.46,
|
26 |
},
|
27 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
28 |
},
|
29 |
{
|
30 |
model: "Qwen3-32B (Base Model)",
|
|
|
48 |
mmmlu: 83.83,
|
49 |
//include: 67.87,
|
50 |
},
|
51 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
52 |
},
|
53 |
{
|
54 |
model: "Qwen3-14B (Base Model)",
|
|
|
72 |
mmmlu: 81.46,
|
73 |
//include: 64.55,
|
74 |
},
|
75 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
76 |
},
|
77 |
{
|
78 |
model: "Qwen3-30B-A3B (Base Model)",
|
|
|
96 |
mmmlu: 81.46,
|
97 |
//include: 67.00,
|
98 |
},
|
99 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
100 |
},
|
101 |
{
|
102 |
model: "Qwen3-8B (Base Model)",
|
|
|
120 |
mmmlu: 75.72,
|
121 |
//include: 59.40,
|
122 |
},
|
123 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
124 |
},
|
125 |
{
|
126 |
model: "Qwen3-4B (Base Model)",
|
|
|
144 |
mmmlu: 71.42,
|
145 |
//include: 56.29,
|
146 |
},
|
147 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
148 |
},
|
149 |
{
|
150 |
model: "Qwen3-1.7B (Base Model)",
|
|
|
168 |
mmmlu: 63.27,
|
169 |
//include: 45.57,
|
170 |
},
|
171 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
172 |
},
|
173 |
{
|
174 |
model: "Qwen3-0.6B (Base Model)",
|
|
|
192 |
mmmlu: 50.16,
|
193 |
//include: 34.26,
|
194 |
},
|
195 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
196 |
},
|
197 |
{
|
198 |
model: "Qwen3-235B-A22B (Thinking Mode)",
|
|
|
226 |
//poly//math: 54.7,
|
227 |
//mlogiqa: 77.1,
|
228 |
},
|
229 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
230 |
},
|
231 |
{
|
232 |
model: "Qwen3-235B-A22B (Non-thinking Mode)",
|
|
|
261 |
//poly//math: 27.0,
|
262 |
//mlogiqa: 67.6,
|
263 |
},
|
264 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
265 |
},
|
266 |
{
|
267 |
model: "Qwen3-32B (Thinking Mode)",
|
|
|
294 |
//poly//math: 47.4,
|
295 |
//mlogiqa: 76.3,
|
296 |
},
|
297 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
298 |
},
|
299 |
{
|
300 |
model: "Qwen3-32B (Non-thinking Mode)",
|
|
|
328 |
//poly//math: 22.5,
|
329 |
//mlogiqa: 62.9,
|
330 |
},
|
331 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
332 |
},
|
333 |
{
|
334 |
model: "Qwen3-14B (Thinking Mode)",
|
|
|
361 |
//poly//math: 45.8,
|
362 |
//mlogiqa: 71.1,
|
363 |
},
|
364 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
365 |
},
|
366 |
{
|
367 |
model: "Qwen3-30B-A3B (Thinking Mode)",
|
|
|
394 |
//poly//math: 46.1,
|
395 |
//mlogiqa: 70.1,
|
396 |
},
|
397 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
398 |
},
|
399 |
{
|
400 |
model: "Qwen3-14B (Non-thinking Mode)",
|
|
|
427 |
//poly//math: 22.0,
|
428 |
//mlogiqa: 58.9,
|
429 |
},
|
430 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
431 |
},
|
432 |
{
|
433 |
model: "Qwen3-30B-A3B (Non-thinking Mode)",
|
|
|
460 |
//poly//math: 23.3,
|
461 |
//mlogiqa: 53.3,
|
462 |
},
|
463 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
464 |
},
|
465 |
{
|
466 |
model: "Qwen3-4B (Thinking Mode)",
|
|
|
493 |
//poly//math: 40.0,
|
494 |
//mlogiqa: 65.9,
|
495 |
},
|
496 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
497 |
},
|
498 |
{
|
499 |
model: "Qwen3-8B (Thinking Mode)",
|
|
|
526 |
//poly//math: 42.7,
|
527 |
//mlogiqa: 69.0,
|
528 |
},
|
529 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
530 |
},
|
531 |
{
|
532 |
model: "Qwen3-4B (Non-thinking Mode)",
|
|
|
559 |
//poly//math: 16.6,
|
560 |
//mlogiqa: 49.9,
|
561 |
},
|
562 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
563 |
},
|
564 |
{
|
565 |
model: "Qwen3-8B (Non-thinking Mode)",
|
|
|
592 |
//poly//math: 18.8,
|
593 |
//mlogiqa: 51.4,
|
594 |
},
|
595 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
596 |
},
|
597 |
{
|
598 |
model: "Qwen3-0.6B (Thinking Mode)",
|
|
|
623 |
//poly//math: 11.4,
|
624 |
//mlogiqa: 40.9,
|
625 |
},
|
626 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
627 |
},
|
628 |
{
|
629 |
model: "Qwen3-1.7B (Thinking Mode)",
|
|
|
654 |
//poly//math: 25.2,
|
655 |
//mlogiqa: 56.0,
|
656 |
},
|
657 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
658 |
},
|
659 |
{
|
660 |
model: "Qwen3-0.6B (Non-thinking Mode)",
|
|
|
685 |
//poly//math: 4.6,
|
686 |
//mlogiqa: 37.3,
|
687 |
},
|
688 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
689 |
},
|
690 |
{
|
691 |
model: "Qwen3-1.7B (Non-thinking Mode)",
|
|
|
716 |
//poly//math: 10.3,
|
717 |
//mlogiqa: 41.1,
|
718 |
},
|
719 |
+
source: "https://arxiv.org/pdf/2505.09388",
|
720 |
},];
|