Yotam-Perlitz commited on
Commit
a50e6f5
1 Parent(s): 23926f6

update cache

Browse files

Signed-off-by: Yotam-Perlitz <[email protected]>

cache/aggregate_scoress_cache_2916d5df57069c497d825c9f1fac0bd4.csv ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,score
2
+ claude_3_5_sonnet_20240620,1.0
3
+ gpt_4o_2024_05_13,0.9833333333333333
4
+ gpt_4_0125_preview,0.9666666666666667
5
+ gpt_4o_2024_08_06,0.95
6
+ athene_70b,0.9333333333333333
7
+ gpt_4o_mini,0.9166666666666666
8
+ gemini_1_5_pro_api_preview,0.9
9
+ mistral_large_2407,0.8833333333333333
10
+ llama3_1_405b_instruct,0.8666666666666667
11
+ glm_4_0520,0.85
12
+ yi_large,0.8333333333333334
13
+ deepseek_coder_v2,0.8166666666666667
14
+ claude_3_opus_20240229,0.8
15
+ gemma_2_27b_it,0.7833333333333333
16
+ llama3_1_70b_instruct,0.75
17
+ glm_4_0116,0.75
18
+ glm_4_air,0.7333333333333333
19
+ gpt_4_0314,0.7166666666666667
20
+ gemini_1_5_flash_api_preview,0.7
21
+ qwen2_72b_instruct,0.6833333333333333
22
+ claude_3_sonnet_20240229,0.6666666666666666
23
+ llama3_70b_instruct,0.65
24
+ claude_3_haiku_20240307,0.6333333333333333
25
+ gpt_4_0613,0.6166666666666667
26
+ mistral_large_2402,0.6
27
+ mixtral_8x22b_instruct_v0_1,0.5833333333333334
28
+ qwen1_5_72b_chat,0.5666666666666667
29
+ phi_3_medium_4k_instruct,0.55
30
+ command_r_plus,0.5333333333333333
31
+ mistral_medium,0.5166666666666667
32
+ internlm2_5_20b_chat,0.5
33
+ phi_3_small_8k_instruct,0.48333333333333334
34
+ mistral_next,0.4666666666666667
35
+ gpt_3_5_turbo_0613,0.45
36
+ dbrx_instructruct_preview,0.43333333333333335
37
+ internlm2_20b_chat,0.4166666666666667
38
+ claude_2_0,0.4
39
+ mixtral_8x7b_instruct_v0_1,0.38333333333333336
40
+ gpt_3_5_turbo_0125,0.36666666666666664
41
+ yi_34b_chat,0.35
42
+ starling_lm_7b_beta,0.3333333333333333
43
+ claude_2_1,0.31666666666666665
44
+ llama3_1_8b_instruct,0.3
45
+ snorkel_mistral_pairrm_dpo,0.2833333333333333
46
+ llama3_8b_instruct,0.26666666666666666
47
+ gpt_3_5_turbo_1106,0.25
48
+ gpt_3_5_turbo_0301,0.23333333333333334
49
+ gemini_1_0_pro,0.21666666666666667
50
+ snowflake_arctic_instruct,0.2
51
+ command_r,0.18333333333333332
52
+ phi_3_mini_128k_instruct,0.16666666666666666
53
+ tulu_2_dpo_70b,0.15
54
+ starling_lm_7b_alpha,0.13333333333333333
55
+ mistral_7b_instruct,0.11666666666666667
56
+ gemma_1_1_7b_it,0.1
57
+ llama_2_70b_chat,0.08333333333333333
58
+ vicuna_33b_v1_3,0.06666666666666667
59
+ gemma_7b_it,0.05
60
+ llama_2_7b_chat,0.03333333333333333
61
+ gemma_1_1_2b_it,0.016666666666666666
62
+ gemma_2b_it,0.0
cache/aggregate_scoress_cache_e798cd9f99d4c09af5b81734eecc6592.csv CHANGED
@@ -1,13 +1,122 @@
1
  model,score
2
- gpt_4o_2024_05_13,1.0
3
- qwen2_72b_instruct,0.8437710437710438
4
- gemma_2_27b_it,0.8225108225108225
5
- llama3_70b_instruct,0.7288840788840788
6
- qwen1.5_110b_chat,0.6958874458874459
7
- command_r_plus,0.5340909090909091
8
- qwen1.5_72b_chat,0.510909090909091
9
- llama3_8b_instruct,0.2897065897065897
10
- mixtral_8x7b_instruct_v0.1,0.27759740259740256
11
- command_r,0.1515151515151515
12
- qwen1.5_7b_chat,0.06897546897546898
13
- llama_2_7b_chat,0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  model,score
2
+ gpt_4o_2024_05_13,0.9847612958226769
3
+ claude_3_5_sonnet_20240620,0.982905982905983
4
+ gpt_4o_2024_08_06,0.9575873827791986
5
+ gpt_4_turbo_2024_04_09,0.9428463693169576
6
+ gpt_4_0125_preview,0.9171132221004344
7
+ mistral_large_2407,0.8868286445012787
8
+ llama3_1_405b_instruct,0.8672150411280846
9
+ yi_large_preview,0.8641553641553642
10
+ hermes_3_llama3_1_70b,0.8626160990712074
11
+ smaug_qwen2_72b_instruct,0.8593911248710011
12
+ claude_3_opus_20240229,0.8573567665639277
13
+ llama3_1_70b_instruct,0.8528408270971201
14
+ athene_70b,0.8493788819875776
15
+ deepseek_coder_v2,0.8444160272804775
16
+ qwen2_72b_instruct,0.8354710666091739
17
+ yi_large,0.8346273291925466
18
+ gpt_4_0613,0.8146763722211293
19
+ llama3_70b_instruct,0.8127546753337573
20
+ llama3_70b,0.8105600539811066
21
+ gemma_2_27b_it,0.8045273029120115
22
+ gpt_4o_mini_2024_07_18,0.8032033326150972
23
+ gemma_2_9b_it_dpo,0.790057915057915
24
+ llama3_instruct_8b_simpo,0.7884068278805121
25
+ phi_3_5_moe_instruct,0.7808307533539731
26
+ qwen1_5_110b_chat,0.776004448721167
27
+ qwen1_5_32b,0.7658569500674763
28
+ yi_1_5_34b_chat,0.7553884711779449
29
+ llama_2_70b,0.7303193882141251
30
+ mixtral_8x22b_instruct_v0_1,0.7256023690940907
31
+ gemma_2_9b_it_simpo,0.7199248120300753
32
+ qwen1_5_32b_chat,0.7149122807017544
33
+ mixtral_8x22b_v0_1,0.7135490753911806
34
+ yi_34b,0.7128879892037787
35
+ internlm2_5_20b_chat,0.6842105263157895
36
+ phi_3_small_128k_instruct,0.66937564499484
37
+ phi_3_medium_4k_instruct,0.6675079642841117
38
+ claude_3_sonnet_20240229,0.653911731916847
39
+ gemma_2_9b_it,0.6422797189051059
40
+ infinity_instruct_3m_0625_llama3_8b,0.6273115220483642
41
+ mistral_v0_1_7b,0.6239316239316239
42
+ phi_3_5_mini_instruct,0.6202270381836945
43
+ mistral_medium,0.6122209165687427
44
+ mistral_large_2402,0.6058211467418628
45
+ claude_instant_1_2,0.6049896049896051
46
+ claude_2_0,0.6020066889632107
47
+ yi_1_5_9b_chat,0.5881787802840435
48
+ qwen1_5_14b,0.5770917678812416
49
+ command_r_plus,0.5761033510394125
50
+ llama_65b,0.5736992052781527
51
+ gpt_3_5_turbo_0613,0.5724018332713985
52
+ qwen1_5_72b_chat,0.5668371367348349
53
+ phi_3_mini_4k_instruct,0.5548245614035088
54
+ deepseek_llm_67b_chat,0.5506756756756757
55
+ claude_3_haiku_20240307,0.549424005945745
56
+ yi_34b_chat,0.5455449728905107
57
+ dbrx_instructruct,0.5344129554655871
58
+ jurassic_2_jumbo_178b,0.532051282051282
59
+ llama3_1_8b_instruct,0.5175232440678665
60
+ claude_2_1,0.5110980545763154
61
+ qwen2_7b_instruct,0.5034227726178191
62
+ mistral_small_2402,0.49924585218702866
63
+ mixtral_8x7b_v0_1,0.49324324324324326
64
+ glm_4_9b_chat,0.46499582289055974
65
+ qwen1_5_14b_chat,0.4621068436857911
66
+ phi_3_small_8k_instruct,0.45481670929241264
67
+ gpt_3_5_turbo_0301,0.4528985507246377
68
+ snorkel_mistral_pairrm_dpo,0.4521151586368978
69
+ gemma_7b,0.4471997300944669
70
+ gpt_3_5_turbo_0125,0.4401920188365201
71
+ llama3_8b,0.43302968960863697
72
+ dbrx_instruct,0.4266409266409266
73
+ llama3_8b_instruct,0.420135922511747
74
+ phi_3_mini_128k_instruct,0.4153205904787544
75
+ llama_2_13b,0.41490478332583597
76
+ jurassic_2_grande_17b,0.39529914529914534
77
+ openhermes_2_5_mistral_7b,0.3832617447168531
78
+ mistral_7b_v0_3,0.3737553342816501
79
+ mixtral_8x7b_instruct_v0_1,0.3713078251895724
80
+ qwen1_5_7b,0.3508771929824561
81
+ yi_1_5_6b_chat,0.3354636591478697
82
+ falcon_40b,0.32812265707002547
83
+ command_r,0.32386140074759
84
+ internlm2_chat_20b,0.32252252252252256
85
+ mistral_7b_v0_2,0.31970128022759603
86
+ luminous_supreme_70b,0.30128205128205127
87
+ starling_lm_7b_alpha,0.29823530624445954
88
+ yi_6b,0.29234143049932526
89
+ mistral_7b_instruct_v0_2,0.28609513981031004
90
+ zephyr_7b_alpha,0.2838442157327606
91
+ zephyr_7b_beta,0.2666234345800909
92
+ gemma_1_1_7b_it,0.26226051061156724
93
+ mistral_7b_instruct_v0_3,0.2537839697282422
94
+ starling_lm_7b_beta,0.25234441602728047
95
+ llama_2_7b,0.2391288049182786
96
+ luminous_extended_30b,0.2329059829059829
97
+ alpaca_7b,0.22072072072072071
98
+ vicuna_33b_v1_3,0.2056404230317274
99
+ phi_2,0.20087901666849037
100
+ qwen2_1_5b_instruct,0.19711042311661506
101
+ yi_6b_chat,0.1938854489164087
102
+ qwen1_5_7b_chat,0.1916569245052217
103
+ tulu_2_dpo_70b,0.17624223602484473
104
+ qwen1_5_4b_chat,0.1674406604747162
105
+ llama_2_70b_chat,0.15527950310559005
106
+ gpt_neox_20b,0.14400584795321636
107
+ vicuna_7b_v1_5,0.13619501854795973
108
+ falcon_40b_instruct,0.13264580369843526
109
+ gemma_7b_it,0.12136319058515854
110
+ falcon_7b,0.11407257459889038
111
+ gpt_j_6b,0.10160818713450293
112
+ luminous_base_13b,0.08333333333333333
113
+ llama_2_7b_chat,0.08304448781801049
114
+ gemma_1_1_2b_it,0.07665903890160183
115
+ olmo_7b,0.06545209176788123
116
+ gemma_2b_it,0.05921052631578947
117
+ qwen1_5_1_8b_chat,0.059167526659786716
118
+ qwen2_0_5b_instruct,0.059081527347781215
119
+ pythia_12b,0.054093567251461985
120
+ pythia_6_9b,0.019736842105263157
121
+ falcon_7b_instruct,0.013513513513513514
122
+ qwen1_5_0_5b_chat,0.013157894736842105
cache/agreements_cache_2916d5df57069c497d825c9f1fac0bd4.csv ADDED
The diff for this file is too large to render. See raw diff
 
cache/agreements_cache_e798cd9f99d4c09af5b81734eecc6592.csv CHANGED
The diff for this file is too large to render. See raw diff