Adding Evaluation Results

#4
Files changed (1) hide show
  1. README.md +32 -0
README.md CHANGED
@@ -32,6 +32,9 @@ model-index:
32
  - type: inst_level_strict_acc and prompt_level_strict_acc
33
  value: 55.37
34
  name: strict accuracy
 
 
 
35
  source:
36
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
37
  name: Open LLM Leaderboard
@@ -50,6 +53,9 @@ model-index:
50
  - type: acc_norm
51
  value: 7.17
52
  name: normalized accuracy
 
 
 
53
  source:
54
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
55
  name: Open LLM Leaderboard
@@ -68,6 +74,9 @@ model-index:
68
  - type: exact_match
69
  value: 1.28
70
  name: exact match
 
 
 
71
  source:
72
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
73
  name: Open LLM Leaderboard
@@ -86,6 +95,9 @@ model-index:
86
  - type: acc_norm
87
  value: 0.45
88
  name: acc_norm
 
 
 
89
  source:
90
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
91
  name: Open LLM Leaderboard
@@ -104,6 +116,9 @@ model-index:
104
  - type: acc_norm
105
  value: 0.13
106
  name: acc_norm
 
 
 
107
  source:
108
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
109
  name: Open LLM Leaderboard
@@ -124,6 +139,9 @@ model-index:
124
  - type: acc
125
  value: 7.17
126
  name: accuracy
 
 
 
127
  source:
128
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
129
  name: Open LLM Leaderboard
@@ -179,3 +197,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
179
  |MuSR (0-shot) | 0.13|
180
  |MMLU-PRO (5-shot) | 7.17|
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  - type: inst_level_strict_acc and prompt_level_strict_acc
33
  value: 55.37
34
  name: strict accuracy
35
+ - type: inst_level_strict_acc and prompt_level_strict_acc
36
+ value: 55.37
37
+ name: strict accuracy
38
  source:
39
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
40
  name: Open LLM Leaderboard
 
53
  - type: acc_norm
54
  value: 7.17
55
  name: normalized accuracy
56
+ - type: acc_norm
57
+ value: 7.17
58
+ name: normalized accuracy
59
  source:
60
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
61
  name: Open LLM Leaderboard
 
74
  - type: exact_match
75
  value: 1.28
76
  name: exact match
77
+ - type: exact_match
78
+ value: 1.28
79
+ name: exact match
80
  source:
81
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
82
  name: Open LLM Leaderboard
 
95
  - type: acc_norm
96
  value: 0.45
97
  name: acc_norm
98
+ - type: acc_norm
99
+ value: 0.45
100
+ name: acc_norm
101
  source:
102
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
103
  name: Open LLM Leaderboard
 
116
  - type: acc_norm
117
  value: 0.13
118
  name: acc_norm
119
+ - type: acc_norm
120
+ value: 0.13
121
+ name: acc_norm
122
  source:
123
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
124
  name: Open LLM Leaderboard
 
139
  - type: acc
140
  value: 7.17
141
  name: accuracy
142
+ - type: acc
143
+ value: 7.17
144
+ name: accuracy
145
  source:
146
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
147
  name: Open LLM Leaderboard
 
197
  |MuSR (0-shot) | 0.13|
198
  |MMLU-PRO (5-shot) | 7.17|
199
 
200
+
201
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
202
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_meditsolutions__Llama-3.2-SUN-2.5B-chat)
203
+
204
+ | Metric |Value|
205
+ |-------------------|----:|
206
+ |Avg. |11.93|
207
+ |IFEval (0-Shot) |55.37|
208
+ |BBH (3-Shot) | 7.17|
209
+ |MATH Lvl 5 (4-Shot)| 1.28|
210
+ |GPQA (0-shot) | 0.45|
211
+ |MuSR (0-shot) | 0.13|
212
+ |MMLU-PRO (5-shot) | 7.17|
213
+