Commit
·
3999c2c
1
Parent(s):
fffbe5d
- src/lib/benchmarks/ index.ts +2 -2
- src/lib/benchmarks/qwen.ts +0 -84
src/lib/benchmarks/ index.ts
CHANGED
@@ -4,7 +4,7 @@ import { googleBenchmarks } from "./google";
|
|
4 |
import { anthropicBenchmarks } from "./anthropic";
|
5 |
import { openaiBenchmarks } from "./openai";
|
6 |
import { deepseekBenchmarks } from "./deepseek";
|
7 |
-
|
8 |
|
9 |
export const benchmarkData: Benchmark[] = [
|
10 |
...xaiBenchmarks,
|
@@ -12,5 +12,5 @@ export const benchmarkData: Benchmark[] = [
|
|
12 |
...anthropicBenchmarks,
|
13 |
...openaiBenchmarks,
|
14 |
...deepseekBenchmarks,
|
15 |
-
|
16 |
];
|
|
|
4 |
import { anthropicBenchmarks } from "./anthropic";
|
5 |
import { openaiBenchmarks } from "./openai";
|
6 |
import { deepseekBenchmarks } from "./deepseek";
|
7 |
+
|
8 |
|
9 |
export const benchmarkData: Benchmark[] = [
|
10 |
...xaiBenchmarks,
|
|
|
12 |
...anthropicBenchmarks,
|
13 |
...openaiBenchmarks,
|
14 |
...deepseekBenchmarks,
|
15 |
+
|
16 |
];
|
src/lib/benchmarks/qwen.ts
DELETED
@@ -1,84 +0,0 @@
|
|
1 |
-
import { Benchmark } from "./types";
|
2 |
-
|
3 |
-
export const qwenBenchmarks: Benchmark[] = [
|
4 |
-
{
|
5 |
-
model: "Qwen3-235B-A22B",
|
6 |
-
provider: "Qwen",
|
7 |
-
inputPrice: 0.0,
|
8 |
-
outputPrice: 0.0,
|
9 |
-
benchmark: {
|
10 |
-
aime_24: 85.7,
|
11 |
-
aime_2025: 81.5,
|
12 |
-
gpqa_diamond: 44.06,
|
13 |
-
// livecodebench_v6: 70.7,
|
14 |
-
mmlu_pro: 68.18,
|
15 |
-
mmlu: 87.81,
|
16 |
-
mmmu: 71.84,
|
17 |
-
// gsm8k: 47.47,
|
18 |
-
// math: 71.84,
|
19 |
-
// bigbench_extra_hard: 59.54,
|
20 |
-
// global_mmlu_lite: 87.40,
|
21 |
-
// evalplus: 77.60,
|
22 |
-
// humaneval: 79.00,
|
23 |
-
// mbpp: 81.40,
|
24 |
-
// cruxeval_c: 79.00,
|
25 |
-
simpleqa: 85.8,
|
26 |
-
egoschema: 81.1,
|
27 |
-
},
|
28 |
-
source: "https://qwenlm.github.io/blog/qwen3/",
|
29 |
-
},
|
30 |
-
{
|
31 |
-
model: "Qwen3-32B",
|
32 |
-
provider: "Qwen",
|
33 |
-
inputPrice: 0.0,
|
34 |
-
outputPrice: 0.0,
|
35 |
-
benchmark: {
|
36 |
-
aime_24: 81.4,
|
37 |
-
aime_2025: 72.9,
|
38 |
-
// livecodebench_v6: 65.7,
|
39 |
-
//: 1977,
|
40 |
-
//aider_polyglot: 50.2,
|
41 |
-
// livebench: 74.9,
|
42 |
-
// bfcl: 70.3,
|
43 |
-
// multillm: 73.0,
|
44 |
-
},
|
45 |
-
source: "https://qwenlm.github.io/blog/qwe,n3/ (image table)",
|
46 |
-
},
|
47 |
-
|
48 |
-
{
|
49 |
-
model: "Qwen3-30B-A3B",
|
50 |
-
provider: "Qwen",
|
51 |
-
inputPrice: 0.0,
|
52 |
-
outputPrice: 0.0,
|
53 |
-
benchmark: {
|
54 |
-
aime_24: 80.4,
|
55 |
-
aime_2025: 70.9,
|
56 |
-
//livecodebench_v6: 62.6,
|
57 |
-
//codeforces: 1974,
|
58 |
-
gpqa: 65.8,
|
59 |
-
// livebench: 74.3,
|
60 |
-
// bfcl: 69.1,
|
61 |
-
// multillm: 72.2,
|
62 |
-
},
|
63 |
-
source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
|
64 |
-
},
|
65 |
-
{
|
66 |
-
model: "Qwen3-4B",
|
67 |
-
provider: "Qwen",
|
68 |
-
inputPrice: 0.0,
|
69 |
-
outputPrice: 0.0,
|
70 |
-
benchmark: {
|
71 |
-
aime_24: 73.8,
|
72 |
-
aime_2025: 65.6,
|
73 |
-
//livecodebench_v6: 54.2,
|
74 |
-
// codeforces: 1671,
|
75 |
-
gpqa: 55.9,
|
76 |
-
|
77 |
-
// bfcl: 65.9,
|
78 |
-
// math: 62.12,
|
79 |
-
//multillm: 66.3,
|
80 |
-
},
|
81 |
-
source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
|
82 |
-
}
|
83 |
-
|
84 |
-
];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|