Upload 2 files
Browse files
llm_metaeval_eval_harness_pub.ipynb
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"metadata": {
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
-
"gpuType": "
|
8 |
"machine_shape": "hm"
|
9 |
},
|
10 |
"kernelspec": {
|
@@ -121,7 +121,7 @@
|
|
121 |
"fewshot_split: test\n",
|
122 |
"fewshot_config:\n",
|
123 |
" sampler: first_n\n",
|
124 |
-
"num_fewshot:
|
125 |
"output_type: multiple_choice\n",
|
126 |
"doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
|
127 |
"doc_to_choice: \"{{options}}\"\n",
|
@@ -130,9 +130,6 @@
|
|
130 |
" - metric: acc\n",
|
131 |
" aggregation: mean\n",
|
132 |
" higher_is_better: true\n",
|
133 |
-
" - metric: acc_norm\n",
|
134 |
-
" aggregation: mean\n",
|
135 |
-
" higher_is_better: true\n",
|
136 |
"\"\"\"\n",
|
137 |
"tasks = []\n",
|
138 |
"for t in YAML_template_pub_tasks:\n",
|
@@ -166,11 +163,11 @@
|
|
166 |
{
|
167 |
"cell_type": "code",
|
168 |
"source": [
|
169 |
-
"!for i in $(echo $TASKS|tr ',' ' '); do
|
170 |
-
"--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
|
171 |
"--tasks $i \\\n",
|
172 |
-
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --
|
173 |
-
"--batch_size
|
174 |
],
|
175 |
"metadata": {
|
176 |
"id": "NOwy6ZlY3Mw7"
|
@@ -192,11 +189,11 @@
|
|
192 |
{
|
193 |
"cell_type": "code",
|
194 |
"source": [
|
195 |
-
"!for i in $(echo $TASKS|tr ',' ' '); do
|
196 |
-
"--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=
|
197 |
"--tasks $i \\\n",
|
198 |
-
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --
|
199 |
-
"--batch_size
|
200 |
],
|
201 |
"metadata": {
|
202 |
"id": "oIACOAhDW5ow"
|
@@ -218,11 +215,11 @@
|
|
218 |
{
|
219 |
"cell_type": "code",
|
220 |
"source": [
|
221 |
-
"!for i in $(echo $TASKS|tr ',' ' '); do
|
222 |
-
"--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=
|
223 |
"--tasks $i \\\n",
|
224 |
-
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --
|
225 |
-
"--batch_size
|
226 |
],
|
227 |
"metadata": {
|
228 |
"id": "1Nxw4WNxZUyb"
|
@@ -253,11 +250,11 @@
|
|
253 |
{
|
254 |
"cell_type": "code",
|
255 |
"source": [
|
256 |
-
"!for i in $(echo $TASKS|tr ',' ' '); do
|
257 |
-
"--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
|
258 |
"--tasks $i \\\n",
|
259 |
-
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --
|
260 |
-
"--batch_size
|
261 |
],
|
262 |
"metadata": {
|
263 |
"id": "E3dBWV1V9C-O"
|
@@ -279,11 +276,11 @@
|
|
279 |
{
|
280 |
"cell_type": "code",
|
281 |
"source": [
|
282 |
-
"!for i in $(echo $TASKS|tr ',' ' '); do
|
283 |
-
"--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
|
284 |
"--tasks $i \\\n",
|
285 |
-
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --
|
286 |
-
"--batch_size
|
287 |
],
|
288 |
"metadata": {
|
289 |
"id": "LPqTo2z29RKx"
|
|
|
4 |
"metadata": {
|
5 |
"colab": {
|
6 |
"provenance": [],
|
7 |
+
"gpuType": "T4",
|
8 |
"machine_shape": "hm"
|
9 |
},
|
10 |
"kernelspec": {
|
|
|
121 |
"fewshot_split: test\n",
|
122 |
"fewshot_config:\n",
|
123 |
" sampler: first_n\n",
|
124 |
+
"num_fewshot: 5\n",
|
125 |
"output_type: multiple_choice\n",
|
126 |
"doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
|
127 |
"doc_to_choice: \"{{options}}\"\n",
|
|
|
130 |
" - metric: acc\n",
|
131 |
" aggregation: mean\n",
|
132 |
" higher_is_better: true\n",
|
|
|
|
|
|
|
133 |
"\"\"\"\n",
|
134 |
"tasks = []\n",
|
135 |
"for t in YAML_template_pub_tasks:\n",
|
|
|
163 |
{
|
164 |
"cell_type": "code",
|
165 |
"source": [
|
166 |
+
"!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
|
167 |
+
"--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
|
168 |
"--tasks $i \\\n",
|
169 |
+
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
|
170 |
+
"--batch_size auto; done &> run.log"
|
171 |
],
|
172 |
"metadata": {
|
173 |
"id": "NOwy6ZlY3Mw7"
|
|
|
189 |
{
|
190 |
"cell_type": "code",
|
191 |
"source": [
|
192 |
+
"!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
|
193 |
+
"--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
|
194 |
"--tasks $i \\\n",
|
195 |
+
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
|
196 |
+
"--batch_size auto; done &> run.log"
|
197 |
],
|
198 |
"metadata": {
|
199 |
"id": "oIACOAhDW5ow"
|
|
|
215 |
{
|
216 |
"cell_type": "code",
|
217 |
"source": [
|
218 |
+
"!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
|
219 |
+
"--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
|
220 |
"--tasks $i \\\n",
|
221 |
+
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
|
222 |
+
"--batch_size auto; done &> run.log"
|
223 |
],
|
224 |
"metadata": {
|
225 |
"id": "1Nxw4WNxZUyb"
|
|
|
250 |
{
|
251 |
"cell_type": "code",
|
252 |
"source": [
|
253 |
+
"!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
|
254 |
+
"--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
|
255 |
"--tasks $i \\\n",
|
256 |
+
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
|
257 |
+
"--batch_size auto; done &> run.log"
|
258 |
],
|
259 |
"metadata": {
|
260 |
"id": "E3dBWV1V9C-O"
|
|
|
276 |
{
|
277 |
"cell_type": "code",
|
278 |
"source": [
|
279 |
+
"!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
|
280 |
+
"--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
|
281 |
"--tasks $i \\\n",
|
282 |
+
"--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
|
283 |
+
"--batch_size auto; done &> run.log"
|
284 |
],
|
285 |
"metadata": {
|
286 |
"id": "LPqTo2z29RKx"
|
llm_metaeval_eval_harness_results.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|