benchbench / cache /aggregate_scoress_cache_bdd8d629730630ed8e73cb56f53f335b.csv
Yotam-Perlitz
update cahce
dcfe1ca
raw
history blame
2.55 kB
model,score
gpt_4o_2024_05_13,0.9950869236583522
gpt_4_turbo_2024_04_09,0.9639153609872332
gpt_4_0125_preview,0.9393939393939394
claude_3_opus_20240229,0.9220898724004315
yi_large_preview,0.8845096932053453
llama3.1_70b_instruct,0.8832459207459207
qwen2_72b_instruct,0.8719243719243719
gpt_4_0613,0.8648074952422778
gemma_2_27b_it,0.8510517260517261
llama3_70b_instruct,0.849670938694048
mistral_large,0.8470006184291898
yi_1.5_34b_chat,0.7714889277389277
llama3_70b,0.7611156781553201
claude_3_sonnet_20240229,0.7541101640480523
mixtral_8x22b_instruct_v0.1,0.7425976800976801
qwen1.5_110b_chat,0.7320934784170079
mistral_medium,0.7056277056277057
phi_3_medium_4k_instruct,0.696301247771836
qwen1.5_32b_chat,0.6812609659248315
claude_3_haiku_20240307,0.6757563807253248
mistral_large_2402,0.6705753184014054
qwen1.5_72b,0.6692868451728518
qwen1.5_72b_chat,0.6604933136636727
command_r_plus,0.6508289415898112
yi_34b,0.6270777876367939
phi_3_mini_4k_instruct,0.6218470721621983
gemma_2_9b_it,0.6216006216006217
qwen1.5_32b,0.6175551742436105
llama_2_70b,0.585811187829169
gpt_3.5_turbo_0125,0.5795841063698206
yi_34b_chat,0.5793988997113997
gemini_1.0_pro,0.5205318491032778
gpt_3.5_turbo_0613,0.5196837944664031
llama3_8b_instruct,0.5166348882210227
yi_1.5_9b_chat,0.5150335775335776
qwen2_7b_instruct,0.4803418803418803
phi_3_mini_128k_instruct,0.47774531890678346
mixtral_8x7b_instruct_v0.1,0.47245816635618004
starling_lm_7b_beta,0.46037960638800973
qwen1.5_14b_chat,0.45760855624301
tulu_2_dpo_70b,0.44326821637746006
llama3.1_8b_instruct,0.4429972804972805
command_r,0.42476786498525626
openhermes_2.5_mistral_7b,0.4133219954648526
mistral_7b_instruct_v0.2,0.38528271156947624
yi_1.5_6b_chat,0.3616155902920609
llama_2_70b_chat,0.3429423284465301
starling_lm_7b_alpha,0.3192125334982478
llama3_8b,0.3191327255539897
gemma_1.1_7b_it,0.3093664233370116
zephyr_7b_beta,0.3012633624878523
qwen1.5_7b,0.29290520556048505
gemma_7b,0.29047416067876425
vicuna_33b_v1.3,0.28480038480038483
qwen1.5_7b_chat,0.27333417049803604
llama_2_13b,0.2660179289106947
mistral_7b_instruct_v0.3,0.21805555555555559
yi_6b_chat,0.20091647770219198
yi_6b,0.19687349577512162
qwen1.5_4b,0.16834733893557421
vicuna_7b_v1.5,0.14973544973544975
gemma_7b_it,0.14039115646258504
qwen1.5_4b_chat,0.14036924293526934
phi_2,0.1326664119754182
llama_2_7b_chat,0.11841995751468631
gemma_1.1_2b_it,0.09431689342403628
llama_2_7b,0.06859327581964463
qwen1.5_1.8b_chat,0.06363517629273932
gemma_2b,0.05727290916366547
olmo_7b,0.011775543550753635
qwen1.5_0.5b_chat,0.008163265306122448