Presidentlin commited on
Commit
80fc9d2
·
1 Parent(s): 90ec376
src/lib/benchmarks/ index.ts CHANGED
@@ -11,7 +11,7 @@ export const benchmarkData: Benchmark[] = [
11
  ...xaiBenchmarks,
12
  ...googleBenchmarks,
13
  ...anthropicBenchmarks,
14
- ...openaiBenchmarks,
15
- ...deepseekBenchmarks,
16
- ...qwenBenchmarks,
17
  ];
 
11
  ...xaiBenchmarks,
12
  ...googleBenchmarks,
13
  ...anthropicBenchmarks,
14
+ ...openaiBenchmarks,
15
+ ...deepseekBenchmarks,
16
+ ...qwenBenchmarks,
17
  ];
src/lib/benchmarks/qwen.ts CHANGED
@@ -24,7 +24,7 @@ export const qwenBenchmarks: Benchmark[] = [
24
  mmmlu: 86.70,
25
  ////include: 73.46,
26
  },
27
- source: "",
28
  },
29
  {
30
  model: "Qwen3-32B (Base Model)",
@@ -48,7 +48,7 @@ export const qwenBenchmarks: Benchmark[] = [
48
  mmmlu: 83.83,
49
  //include: 67.87,
50
  },
51
- source: "",
52
  },
53
  {
54
  model: "Qwen3-14B (Base Model)",
@@ -72,7 +72,7 @@ export const qwenBenchmarks: Benchmark[] = [
72
  mmmlu: 81.46,
73
  //include: 64.55,
74
  },
75
- source: "",
76
  },
77
  {
78
  model: "Qwen3-30B-A3B (Base Model)",
@@ -96,7 +96,7 @@ export const qwenBenchmarks: Benchmark[] = [
96
  mmmlu: 81.46,
97
  //include: 67.00,
98
  },
99
- source: "",
100
  },
101
  {
102
  model: "Qwen3-8B (Base Model)",
@@ -120,7 +120,7 @@ export const qwenBenchmarks: Benchmark[] = [
120
  mmmlu: 75.72,
121
  //include: 59.40,
122
  },
123
- source: "",
124
  },
125
  {
126
  model: "Qwen3-4B (Base Model)",
@@ -144,7 +144,7 @@ export const qwenBenchmarks: Benchmark[] = [
144
  mmmlu: 71.42,
145
  //include: 56.29,
146
  },
147
- source: "",
148
  },
149
  {
150
  model: "Qwen3-1.7B (Base Model)",
@@ -168,7 +168,7 @@ export const qwenBenchmarks: Benchmark[] = [
168
  mmmlu: 63.27,
169
  //include: 45.57,
170
  },
171
- source: "",
172
  },
173
  {
174
  model: "Qwen3-0.6B (Base Model)",
@@ -192,7 +192,7 @@ export const qwenBenchmarks: Benchmark[] = [
192
  mmmlu: 50.16,
193
  //include: 34.26,
194
  },
195
- source: "",
196
  },
197
  {
198
  model: "Qwen3-235B-A22B (Thinking Mode)",
@@ -226,7 +226,7 @@ export const qwenBenchmarks: Benchmark[] = [
226
  //poly//math: 54.7,
227
  //mlogiqa: 77.1,
228
  },
229
- source: "",
230
  },
231
  {
232
  model: "Qwen3-235B-A22B (Non-thinking Mode)",
@@ -261,7 +261,7 @@ export const qwenBenchmarks: Benchmark[] = [
261
  //poly//math: 27.0,
262
  //mlogiqa: 67.6,
263
  },
264
- source: "",
265
  },
266
  {
267
  model: "Qwen3-32B (Thinking Mode)",
@@ -294,7 +294,7 @@ export const qwenBenchmarks: Benchmark[] = [
294
  //poly//math: 47.4,
295
  //mlogiqa: 76.3,
296
  },
297
- source: "",
298
  },
299
  {
300
  model: "Qwen3-32B (Non-thinking Mode)",
@@ -328,7 +328,7 @@ export const qwenBenchmarks: Benchmark[] = [
328
  //poly//math: 22.5,
329
  //mlogiqa: 62.9,
330
  },
331
- source: "",
332
  },
333
  {
334
  model: "Qwen3-14B (Thinking Mode)",
@@ -361,7 +361,7 @@ export const qwenBenchmarks: Benchmark[] = [
361
  //poly//math: 45.8,
362
  //mlogiqa: 71.1,
363
  },
364
- source: "",
365
  },
366
  {
367
  model: "Qwen3-30B-A3B (Thinking Mode)",
@@ -394,7 +394,7 @@ export const qwenBenchmarks: Benchmark[] = [
394
  //poly//math: 46.1,
395
  //mlogiqa: 70.1,
396
  },
397
- source: "",
398
  },
399
  {
400
  model: "Qwen3-14B (Non-thinking Mode)",
@@ -427,7 +427,7 @@ export const qwenBenchmarks: Benchmark[] = [
427
  //poly//math: 22.0,
428
  //mlogiqa: 58.9,
429
  },
430
- source: "",
431
  },
432
  {
433
  model: "Qwen3-30B-A3B (Non-thinking Mode)",
@@ -460,7 +460,7 @@ export const qwenBenchmarks: Benchmark[] = [
460
  //poly//math: 23.3,
461
  //mlogiqa: 53.3,
462
  },
463
- source: "",
464
  },
465
  {
466
  model: "Qwen3-4B (Thinking Mode)",
@@ -493,7 +493,7 @@ export const qwenBenchmarks: Benchmark[] = [
493
  //poly//math: 40.0,
494
  //mlogiqa: 65.9,
495
  },
496
- source: "",
497
  },
498
  {
499
  model: "Qwen3-8B (Thinking Mode)",
@@ -526,7 +526,7 @@ export const qwenBenchmarks: Benchmark[] = [
526
  //poly//math: 42.7,
527
  //mlogiqa: 69.0,
528
  },
529
- source: "",
530
  },
531
  {
532
  model: "Qwen3-4B (Non-thinking Mode)",
@@ -559,7 +559,7 @@ export const qwenBenchmarks: Benchmark[] = [
559
  //poly//math: 16.6,
560
  //mlogiqa: 49.9,
561
  },
562
- source: "",
563
  },
564
  {
565
  model: "Qwen3-8B (Non-thinking Mode)",
@@ -592,7 +592,7 @@ export const qwenBenchmarks: Benchmark[] = [
592
  //poly//math: 18.8,
593
  //mlogiqa: 51.4,
594
  },
595
- source: "",
596
  },
597
  {
598
  model: "Qwen3-0.6B (Thinking Mode)",
@@ -623,7 +623,7 @@ export const qwenBenchmarks: Benchmark[] = [
623
  //poly//math: 11.4,
624
  //mlogiqa: 40.9,
625
  },
626
- source: "",
627
  },
628
  {
629
  model: "Qwen3-1.7B (Thinking Mode)",
@@ -654,7 +654,7 @@ export const qwenBenchmarks: Benchmark[] = [
654
  //poly//math: 25.2,
655
  //mlogiqa: 56.0,
656
  },
657
- source: "",
658
  },
659
  {
660
  model: "Qwen3-0.6B (Non-thinking Mode)",
@@ -685,7 +685,7 @@ export const qwenBenchmarks: Benchmark[] = [
685
  //poly//math: 4.6,
686
  //mlogiqa: 37.3,
687
  },
688
- source: "",
689
  },
690
  {
691
  model: "Qwen3-1.7B (Non-thinking Mode)",
@@ -716,5 +716,5 @@ export const qwenBenchmarks: Benchmark[] = [
716
  //poly//math: 10.3,
717
  //mlogiqa: 41.1,
718
  },
719
- source: "",
720
  },];
 
24
  mmmlu: 86.70,
25
  ////include: 73.46,
26
  },
27
+ source: "https://arxiv.org/pdf/2505.09388",
28
  },
29
  {
30
  model: "Qwen3-32B (Base Model)",
 
48
  mmmlu: 83.83,
49
  //include: 67.87,
50
  },
51
+ source: "https://arxiv.org/pdf/2505.09388",
52
  },
53
  {
54
  model: "Qwen3-14B (Base Model)",
 
72
  mmmlu: 81.46,
73
  //include: 64.55,
74
  },
75
+ source: "https://arxiv.org/pdf/2505.09388",
76
  },
77
  {
78
  model: "Qwen3-30B-A3B (Base Model)",
 
96
  mmmlu: 81.46,
97
  //include: 67.00,
98
  },
99
+ source: "https://arxiv.org/pdf/2505.09388",
100
  },
101
  {
102
  model: "Qwen3-8B (Base Model)",
 
120
  mmmlu: 75.72,
121
  //include: 59.40,
122
  },
123
+ source: "https://arxiv.org/pdf/2505.09388",
124
  },
125
  {
126
  model: "Qwen3-4B (Base Model)",
 
144
  mmmlu: 71.42,
145
  //include: 56.29,
146
  },
147
+ source: "https://arxiv.org/pdf/2505.09388",
148
  },
149
  {
150
  model: "Qwen3-1.7B (Base Model)",
 
168
  mmmlu: 63.27,
169
  //include: 45.57,
170
  },
171
+ source: "https://arxiv.org/pdf/2505.09388",
172
  },
173
  {
174
  model: "Qwen3-0.6B (Base Model)",
 
192
  mmmlu: 50.16,
193
  //include: 34.26,
194
  },
195
+ source: "https://arxiv.org/pdf/2505.09388",
196
  },
197
  {
198
  model: "Qwen3-235B-A22B (Thinking Mode)",
 
226
  //poly//math: 54.7,
227
  //mlogiqa: 77.1,
228
  },
229
+ source: "https://arxiv.org/pdf/2505.09388",
230
  },
231
  {
232
  model: "Qwen3-235B-A22B (Non-thinking Mode)",
 
261
  //poly//math: 27.0,
262
  //mlogiqa: 67.6,
263
  },
264
+ source: "https://arxiv.org/pdf/2505.09388",
265
  },
266
  {
267
  model: "Qwen3-32B (Thinking Mode)",
 
294
  //poly//math: 47.4,
295
  //mlogiqa: 76.3,
296
  },
297
+ source: "https://arxiv.org/pdf/2505.09388",
298
  },
299
  {
300
  model: "Qwen3-32B (Non-thinking Mode)",
 
328
  //poly//math: 22.5,
329
  //mlogiqa: 62.9,
330
  },
331
+ source: "https://arxiv.org/pdf/2505.09388",
332
  },
333
  {
334
  model: "Qwen3-14B (Thinking Mode)",
 
361
  //poly//math: 45.8,
362
  //mlogiqa: 71.1,
363
  },
364
+ source: "https://arxiv.org/pdf/2505.09388",
365
  },
366
  {
367
  model: "Qwen3-30B-A3B (Thinking Mode)",
 
394
  //poly//math: 46.1,
395
  //mlogiqa: 70.1,
396
  },
397
+ source: "https://arxiv.org/pdf/2505.09388",
398
  },
399
  {
400
  model: "Qwen3-14B (Non-thinking Mode)",
 
427
  //poly//math: 22.0,
428
  //mlogiqa: 58.9,
429
  },
430
+ source: "https://arxiv.org/pdf/2505.09388",
431
  },
432
  {
433
  model: "Qwen3-30B-A3B (Non-thinking Mode)",
 
460
  //poly//math: 23.3,
461
  //mlogiqa: 53.3,
462
  },
463
+ source: "https://arxiv.org/pdf/2505.09388",
464
  },
465
  {
466
  model: "Qwen3-4B (Thinking Mode)",
 
493
  //poly//math: 40.0,
494
  //mlogiqa: 65.9,
495
  },
496
+ source: "https://arxiv.org/pdf/2505.09388",
497
  },
498
  {
499
  model: "Qwen3-8B (Thinking Mode)",
 
526
  //poly//math: 42.7,
527
  //mlogiqa: 69.0,
528
  },
529
+ source: "https://arxiv.org/pdf/2505.09388",
530
  },
531
  {
532
  model: "Qwen3-4B (Non-thinking Mode)",
 
559
  //poly//math: 16.6,
560
  //mlogiqa: 49.9,
561
  },
562
+ source: "https://arxiv.org/pdf/2505.09388",
563
  },
564
  {
565
  model: "Qwen3-8B (Non-thinking Mode)",
 
592
  //poly//math: 18.8,
593
  //mlogiqa: 51.4,
594
  },
595
+ source: "https://arxiv.org/pdf/2505.09388",
596
  },
597
  {
598
  model: "Qwen3-0.6B (Thinking Mode)",
 
623
  //poly//math: 11.4,
624
  //mlogiqa: 40.9,
625
  },
626
+ source: "https://arxiv.org/pdf/2505.09388",
627
  },
628
  {
629
  model: "Qwen3-1.7B (Thinking Mode)",
 
654
  //poly//math: 25.2,
655
  //mlogiqa: 56.0,
656
  },
657
+ source: "https://arxiv.org/pdf/2505.09388",
658
  },
659
  {
660
  model: "Qwen3-0.6B (Non-thinking Mode)",
 
685
  //poly//math: 4.6,
686
  //mlogiqa: 37.3,
687
  },
688
+ source: "https://arxiv.org/pdf/2505.09388",
689
  },
690
  {
691
  model: "Qwen3-1.7B (Non-thinking Mode)",
 
716
  //poly//math: 10.3,
717
  //mlogiqa: 41.1,
718
  },
719
+ source: "https://arxiv.org/pdf/2505.09388",
720
  },];