farimafatahi commited on
Commit
3670f4d
·
verified ·
1 Parent(s): d697d0c

Update tiered_models_data.csv

Browse files
Files changed (1) hide show
  1. tiered_models_data.csv +4 -4
tiered_models_data.csv CHANGED
@@ -1,8 +1,8 @@
1
  tier,model,factuality_score,hallucination_score,avg_tokens,avg_factual_units,avg_undecidable_units,avg_unsupported_units
2
- Tier 1: Hard,GPT4-o,75.69,0.64,561.72,23.91,4.61,1.01
3
- Tier 1: Hard,Gemini1.5-Pro,73.81,0.68,516.41,22.23,4.47,1.12
4
- Tier 1: Hard,Llama3.1-70B-Instruct,70.01,0.89,531.35,27.09,5.67,2.13
5
- Tier 1: Hard,Llama3.1-405B-Instruct,68.64,0.93,550.74,26.6,6.15,2.19
6
  Tier 1: Hard,Claude-3.5-Sonnet,74.95,0.65,395.77,22.64,4.03,1.19
7
  Tier 1: Hard,CommandR+,73.15,0.71,440.93,23.55,4.51,1.4
8
  Tier 1: Hard,Mistral-Large-2,75.19,0.67,485.58,23.21,4.09,1.36
 
1
  tier,model,factuality_score,hallucination_score,avg_tokens,avg_factual_units,avg_undecidable_units,avg_unsupported_units
2
+ Tier 1: Hard,GPT4-o,75.65,0.64,563.15,24.01,4.62,1.01
3
+ Tier 1: Hard,Gemini1.5-Pro,73.78,0.68,517.31,22.25,4.48,1.13
4
+ Tier 1: Hard,Llama3.1-70B-Instruct,70.07,0.89,532.41,27.17,5.67, 2.13
5
+ Tier 1: Hard,Llama3.1-405B-Instruct,68.59,0.93,551.28,26.71,6.19,2.2
6
  Tier 1: Hard,Claude-3.5-Sonnet,74.95,0.65,395.77,22.64,4.03,1.19
7
  Tier 1: Hard,CommandR+,73.15,0.71,440.93,23.55,4.51,1.4
8
  Tier 1: Hard,Mistral-Large-2,75.19,0.67,485.58,23.21,4.09,1.36