Presidentlin commited on
Commit
e955c4c
·
1 Parent(s): 58de9fb
Files changed (1) hide show
  1. src/lib/benchmarks/anthropic.ts +19 -3
src/lib/benchmarks/anthropic.ts CHANGED
@@ -80,13 +80,29 @@ export const anthropicBenchmarks: Benchmark[] = [
80
  mmmu: 70.4,
81
  aime_24: 16.0, // average of 16.0 & 65.4
82
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  },
84
  {
85
  model: "Claude 3 Opus",
86
  provider: "Anthropic",
87
  inputPrice: 15.0,
88
  outputPrice: 75.0,
89
- source: "https://www.anthropic.com/news/claude-3-opus-release",
90
  benchmark: {
91
  gpqa_diamond: 50.4,
92
  mmmlu: 86.8,
@@ -110,7 +126,7 @@ export const anthropicBenchmarks: Benchmark[] = [
110
  provider: "Anthropic",
111
  inputPrice: 3.0,
112
  outputPrice: 15.0,
113
- source: "https://www.anthropic.com/news/claude-3-opus-release",
114
  benchmark: {
115
  gpqa_diamond: 40.4,
116
  mmmlu: 79.0,
@@ -134,7 +150,7 @@ export const anthropicBenchmarks: Benchmark[] = [
134
  provider: "Anthropic",
135
  inputPrice: 0.25,
136
  outputPrice: 1.25,
137
- source: "https://www.anthropic.com/news/claude-3-opus-release",
138
  benchmark: {
139
  gpqa_diamond: 33.3,
140
  mmmlu: 75.2,
 
80
  mmmu: 70.4,
81
  aime_24: 16.0, // average of 16.0 & 65.4
82
  },
83
+ },
84
+ {
85
+ model: "Claude 3.5 Haiku",
86
+ provider: "Anthropic",
87
+ inputPrice: 3.0,
88
+ outputPrice: 15.0,
89
+ source: "https://www.anthropic.com/news/3-5-models-and-computer-use",
90
+ benchmark: {
91
+ gpqa_diamond: 41.6,
92
+ swe_bench_verified: 49.0,
93
+ // tau_bench_retail: 51.0,
94
+ // tau_bench_airline: 22.8,
95
+ humaneval:88.1,
96
+ mmmlu: 65.0,
97
+ aime_24: 5.3,
98
+ },
99
  },
100
  {
101
  model: "Claude 3 Opus",
102
  provider: "Anthropic",
103
  inputPrice: 15.0,
104
  outputPrice: 75.0,
105
+ source: "https://www.anthropic.com/news/claude-3-family",
106
  benchmark: {
107
  gpqa_diamond: 50.4,
108
  mmmlu: 86.8,
 
126
  provider: "Anthropic",
127
  inputPrice: 3.0,
128
  outputPrice: 15.0,
129
+ source: "https://www.anthropic.com/news/claude-3-family",
130
  benchmark: {
131
  gpqa_diamond: 40.4,
132
  mmmlu: 79.0,
 
150
  provider: "Anthropic",
151
  inputPrice: 0.25,
152
  outputPrice: 1.25,
153
+ source: "https://www.anthropic.com/news/claude-3-family",
154
  benchmark: {
155
  gpqa_diamond: 33.3,
156
  mmmlu: 75.2,