Update README.md
Browse files
README.md
CHANGED
@@ -114,9 +114,10 @@ The library used is [lm-evaluation-harness repository](https://github.com/Eleuth
|
|
114 |
|
115 |
|
116 |
#### Main Results
|
117 |
-
| Model | ARC | HellaSwag | MMLU |
|
118 |
-
|
119 |
-
| **Llama-3.1-8B-Instruct** | **
|
|
|
120 |
|
121 |
|
122 |
#### Scripts to generate evalution results
|
@@ -127,7 +128,7 @@ pip install lm-eval>=0.4.7
|
|
127 |
|
128 |
from lm_eval import evaluator
|
129 |
|
130 |
-
tasks_list = ["arc_challenge", "
|
131 |
|
132 |
model_path='rwmasood/llama-3.1-10b-instruct'
|
133 |
model_name_or_path = "./output/checkpoint-2800"
|
|
|
114 |
|
115 |
|
116 |
#### Main Results
|
117 |
+
| Model | ARC | HellaSwag | MMLU | IFEval |
|
118 |
+
|------------------------|----------|--------|------|--------|
|
119 |
+
| **Llama-3.1-8B-Instruct** | **52.05** | **** | **42.07** | **42.14**|
|
120 |
+
| **Llama-3.1-10B-Instruct** | **50.42** | **57.81** | **35.62** | **35.67** |
|
121 |
|
122 |
|
123 |
#### Scripts to generate evalution results
|
|
|
128 |
|
129 |
from lm_eval import evaluator
|
130 |
|
131 |
+
tasks_list = ["arc_challenge", "ifeval", "mmlu_pro", "hellaswag"] # Benchmark dataset
|
132 |
|
133 |
model_path='rwmasood/llama-3.1-10b-instruct'
|
134 |
model_name_or_path = "./output/checkpoint-2800"
|