flunardelli commited on
Commit
834ed82
·
verified ·
1 Parent(s): 74c4e4f

Upload 2 files

Browse files
llm_metaeval_eval_harness_pub.ipynb CHANGED
@@ -4,7 +4,7 @@
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
- "gpuType": "L4",
8
  "machine_shape": "hm"
9
  },
10
  "kernelspec": {
@@ -121,7 +121,7 @@
121
  "fewshot_split: test\n",
122
  "fewshot_config:\n",
123
  " sampler: first_n\n",
124
- "num_fewshot: 10\n",
125
  "output_type: multiple_choice\n",
126
  "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
127
  "doc_to_choice: \"{{options}}\"\n",
@@ -130,9 +130,6 @@
130
  " - metric: acc\n",
131
  " aggregation: mean\n",
132
  " higher_is_better: true\n",
133
- " - metric: acc_norm\n",
134
- " aggregation: mean\n",
135
- " higher_is_better: true\n",
136
  "\"\"\"\n",
137
  "tasks = []\n",
138
  "for t in YAML_template_pub_tasks:\n",
@@ -166,11 +163,11 @@
166
  {
167
  "cell_type": "code",
168
  "source": [
169
- "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
170
- "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
171
  "--tasks $i \\\n",
172
- "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
173
- "--batch_size 8; done"
174
  ],
175
  "metadata": {
176
  "id": "NOwy6ZlY3Mw7"
@@ -192,11 +189,11 @@
192
  {
193
  "cell_type": "code",
194
  "source": [
195
- "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
196
- "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
197
  "--tasks $i \\\n",
198
- "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
199
- "--batch_size 8; done"
200
  ],
201
  "metadata": {
202
  "id": "oIACOAhDW5ow"
@@ -218,11 +215,11 @@
218
  {
219
  "cell_type": "code",
220
  "source": [
221
- "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
222
- "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
223
  "--tasks $i \\\n",
224
- "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
225
- "--batch_size 8; done"
226
  ],
227
  "metadata": {
228
  "id": "1Nxw4WNxZUyb"
@@ -253,11 +250,11 @@
253
  {
254
  "cell_type": "code",
255
  "source": [
256
- "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
257
- "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
258
  "--tasks $i \\\n",
259
- "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
260
- "--batch_size 8; done"
261
  ],
262
  "metadata": {
263
  "id": "E3dBWV1V9C-O"
@@ -279,11 +276,11 @@
279
  {
280
  "cell_type": "code",
281
  "source": [
282
- "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
283
- "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
284
  "--tasks $i \\\n",
285
- "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
286
- "--batch_size 8; done"
287
  ],
288
  "metadata": {
289
  "id": "LPqTo2z29RKx"
 
4
  "metadata": {
5
  "colab": {
6
  "provenance": [],
7
+ "gpuType": "T4",
8
  "machine_shape": "hm"
9
  },
10
  "kernelspec": {
 
121
  "fewshot_split: test\n",
122
  "fewshot_config:\n",
123
  " sampler: first_n\n",
124
+ "num_fewshot: 5\n",
125
  "output_type: multiple_choice\n",
126
  "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
127
  "doc_to_choice: \"{{options}}\"\n",
 
130
  " - metric: acc\n",
131
  " aggregation: mean\n",
132
  " higher_is_better: true\n",
 
 
 
133
  "\"\"\"\n",
134
  "tasks = []\n",
135
  "for t in YAML_template_pub_tasks:\n",
 
163
  {
164
  "cell_type": "code",
165
  "source": [
166
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
167
+ "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
168
  "--tasks $i \\\n",
169
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
170
+ "--batch_size auto; done &> run.log"
171
  ],
172
  "metadata": {
173
  "id": "NOwy6ZlY3Mw7"
 
189
  {
190
  "cell_type": "code",
191
  "source": [
192
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
193
+ "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
194
  "--tasks $i \\\n",
195
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
196
+ "--batch_size auto; done &> run.log"
197
  ],
198
  "metadata": {
199
  "id": "oIACOAhDW5ow"
 
215
  {
216
  "cell_type": "code",
217
  "source": [
218
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
219
+ "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
220
  "--tasks $i \\\n",
221
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
222
+ "--batch_size auto; done &> run.log"
223
  ],
224
  "metadata": {
225
  "id": "1Nxw4WNxZUyb"
 
250
  {
251
  "cell_type": "code",
252
  "source": [
253
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
254
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
255
  "--tasks $i \\\n",
256
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
257
+ "--batch_size auto; done &> run.log"
258
  ],
259
  "metadata": {
260
  "id": "E3dBWV1V9C-O"
 
276
  {
277
  "cell_type": "code",
278
  "source": [
279
+ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
280
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
281
  "--tasks $i \\\n",
282
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
283
+ "--batch_size auto; done &> run.log"
284
  ],
285
  "metadata": {
286
  "id": "LPqTo2z29RKx"
llm_metaeval_eval_harness_results.ipynb ADDED
The diff for this file is too large to render. See raw diff