flunardelli commited on
Commit
b0617ff
·
verified ·
1 Parent(s): edfa2e7

Upload 2 files

Browse files
llm_metaeval_eval_harness_Mixtral_8x22B_v0_1_mmlu.ipynb ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "U8RTc2PmnX-v"
7
+ },
8
+ "source": [
9
+ "Initial setup"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "id": "kGW7vfRkrqHe"
17
+ },
18
+ "outputs": [],
19
+ "source": [
20
+ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "metadata": {
27
+ "id": "2I850FIsCVNw"
28
+ },
29
+ "outputs": [],
30
+ "source": [
31
+ "from datetime import datetime\n",
32
+ "import os\n",
33
+ "from huggingface_hub import login, upload_folder\n",
34
+ "from google.colab import userdata\n",
35
+ "import shutil\n",
36
+ "\n",
37
+ "HF_TOKEN = userdata.get('HF_TOKEN')\n",
38
+ "login(HF_TOKEN, True)\n",
39
+ "BASE_DATASET='mmlu'\n",
40
+ "REPO_ID='flunardelli/llm-metaeval'\n",
41
+ "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
42
+ "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
43
+ "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
44
+ "#shutil.rmtree(BASE_FOLDER)\n",
45
+ "os.makedirs(OUTPUT_FOLDER)\n",
46
+ "os.makedirs(TASK_FOLDER)\n",
47
+ "os.environ['HF_TOKEN'] = HF_TOKEN\n",
48
+ "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
49
+ "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
50
+ "\n",
51
+ "def hf_upload_folder(folder_path):\n",
52
+ " upload_folder(\n",
53
+ " folder_path=folder_path,\n",
54
+ " path_in_repo=\"evals/\",\n",
55
+ " repo_id=REPO_ID,\n",
56
+ " token=HF_TOKEN,\n",
57
+ " repo_type=\"dataset\"\n",
58
+ " )\n",
59
+ "\n",
60
+ "def create_task(content, filename):\n",
61
+ " filename_path = os.path.join(TASK_FOLDER,filename)\n",
62
+ " with open(filename_path, \"w\") as f:\n",
63
+ " f.write(content)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "markdown",
68
+ "metadata": {
69
+ "id": "Jd2JwKZaPkNS"
70
+ },
71
+ "source": [
72
+ "Create task for MMLU all datasets"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {
79
+ "id": "xP0cC_sHih7C"
80
+ },
81
+ "outputs": [],
82
+ "source": [
83
+ "YAML_mmlu_en_us_string = \"\"\"\n",
84
+ "task: mmlu_all\n",
85
+ "dataset_path: cais/mmlu\n",
86
+ "dataset_name: all\n",
87
+ "description: \"MMLU dataset\"\n",
88
+ "test_split: test\n",
89
+ "fewshot_split: dev\n",
90
+ "fewshot_config:\n",
91
+ " sampler: first_n\n",
92
+ "output_type: multiple_choice\n",
93
+ "doc_to_text: \"{{question.strip()}}\\nA. {{choices[0]}}\\nB. {{choices[1]}}\\nC. {{choices[2]}}\\nD. {{choices[3]}}\\nAnswer:\"\n",
94
+ "doc_to_choice: [\"A\", \"B\", \"C\", \"D\"]\n",
95
+ "doc_to_target: answer\n",
96
+ "metric_list:\n",
97
+ " - metric: acc\n",
98
+ " aggregation: mean\n",
99
+ " higher_is_better: true\n",
100
+ " - metric: acc_norm\n",
101
+ " aggregation: mean\n",
102
+ " higher_is_better: true\n",
103
+ "\"\"\"\n",
104
+ "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n",
105
+ "os.environ['TASKS'] = 'mmlu_all'\n"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "markdown",
110
+ "metadata": {
111
+ "id": "1fEX-49hQ-Be"
112
+ },
113
+ "source": [
114
+ "Mistral Models"
115
+ ]
116
+ },
117
+ {
118
+ "cell_type": "code",
119
+ "execution_count": null,
120
+ "metadata": {
121
+ "id": "3cHI2qxN2fJ0"
122
+ },
123
+ "outputs": [],
124
+ "source": [
125
+ "!accelerate launch -m lm_eval \\\n",
126
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1 \\\n",
127
+ "--tasks $TASKS \\\n",
128
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
129
+ "--batch_size auto\n",
130
+ "#--limit 10 \\"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "metadata": {
137
+ "id": "mGGdqBNBzFYL"
138
+ },
139
+ "outputs": [],
140
+ "source": [
141
+ "hf_upload_folder(BASE_FOLDER)"
142
+ ]
143
+ }
144
+ ],
145
+ "metadata": {
146
+ "accelerator": "GPU",
147
+ "colab": {
148
+ "gpuType": "L4",
149
+ "machine_shape": "hm",
150
+ "provenance": []
151
+ },
152
+ "kernelspec": {
153
+ "display_name": "Python 3",
154
+ "name": "python3"
155
+ },
156
+ "language_info": {
157
+ "name": "python"
158
+ }
159
+ },
160
+ "nbformat": 4,
161
+ "nbformat_minor": 0
162
+ }
llm_metaeval_eval_harness_Mixtral_8x22B_v0_1_pub.ipynb ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4",
8
+ "machine_shape": "hm"
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ },
17
+ "accelerator": "GPU"
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "markdown",
22
+ "source": [
23
+ "Initial setup"
24
+ ],
25
+ "metadata": {
26
+ "id": "U8RTc2PmnX-v"
27
+ }
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "source": [
32
+ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
33
+ ],
34
+ "metadata": {
35
+ "id": "kGW7vfRkrqHe"
36
+ },
37
+ "execution_count": null,
38
+ "outputs": []
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "source": [
43
+ "from datetime import datetime\n",
44
+ "import os\n",
45
+ "from huggingface_hub import login, upload_folder\n",
46
+ "from google.colab import userdata\n",
47
+ "import shutil\n",
48
+ "\n",
49
+ "HF_TOKEN = userdata.get('HF_TOKEN')\n",
50
+ "login(HF_TOKEN, True)\n",
51
+ "BASE_DATASET='pub'\n",
52
+ "REPO_ID='flunardelli/llm-metaeval'\n",
53
+ "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
54
+ "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
55
+ "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
56
+ "#shutil.rmtree(BASE_FOLDER)\n",
57
+ "os.makedirs(OUTPUT_FOLDER)\n",
58
+ "os.makedirs(TASK_FOLDER)\n",
59
+ "os.environ['HF_TOKEN'] = HF_TOKEN\n",
60
+ "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
61
+ "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
62
+ "\n",
63
+ "def hf_upload_folder(folder_path):\n",
64
+ " upload_folder(\n",
65
+ " folder_path=folder_path,\n",
66
+ " path_in_repo=\"evals/\",\n",
67
+ " repo_id=REPO_ID,\n",
68
+ " token=HF_TOKEN,\n",
69
+ " repo_type=\"dataset\"\n",
70
+ " )\n",
71
+ "\n",
72
+ "def create_task(content, filename):\n",
73
+ " filename_path = os.path.join(TASK_FOLDER,filename)\n",
74
+ " with open(filename_path, \"w\") as f:\n",
75
+ " f.write(content)"
76
+ ],
77
+ "metadata": {
78
+ "id": "IHxFvAC4eSnW"
79
+ },
80
+ "execution_count": null,
81
+ "outputs": []
82
+ },
83
+ {
84
+ "cell_type": "markdown",
85
+ "source": [
86
+ "Create task for PUB all datasets"
87
+ ],
88
+ "metadata": {
89
+ "id": "Jd2JwKZaPkNS"
90
+ }
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "source": [
95
+ "YAML_template_pub_tasks = [\n",
96
+ " (\"task_1\", 2),\n",
97
+ " (\"task_2\", 5),\n",
98
+ " (\"task_3\", 5),\n",
99
+ " (\"task_4\", 3),\n",
100
+ " (\"task_5\", 2),\n",
101
+ " (\"task_6\", 2),\n",
102
+ " (\"task_7\", 2),\n",
103
+ " (\"task_8\", 2),\n",
104
+ " (\"task_9\", 2),\n",
105
+ " (\"task_10\", 3),\n",
106
+ " (\"task_11\", 3),\n",
107
+ " (\"task_12\", 2),\n",
108
+ " (\"task_13\", 2),\n",
109
+ " (\"task_14\", 4)\n",
110
+ "]\n",
111
+ "\n",
112
+ "default_doc_to_text = \"{{pretext.strip()}}\\n {{options[0]}}\\n{{options[1]}}\\\\n{{options[2]}}\\\\n{{options[3]}}\\\\n{{options[4]}}\\\\nAnswer:\"\n",
113
+ "\n",
114
+ "\n",
115
+ "YAML_template_pub_base = \"\"\"\n",
116
+ "task: __task_name__\n",
117
+ "dataset_path: flunardelli/PUB\n",
118
+ "dataset_name: __dataset_name__\n",
119
+ "description: \"PUB\"\n",
120
+ "test_split: test\n",
121
+ "fewshot_split: test\n",
122
+ "fewshot_config:\n",
123
+ " sampler: first_n\n",
124
+ "num_fewshot: 10\n",
125
+ "output_type: multiple_choice\n",
126
+ "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
127
+ "doc_to_choice: \"{{options}}\"\n",
128
+ "doc_to_target: \"correct answer\"\n",
129
+ "metric_list:\n",
130
+ " - metric: acc\n",
131
+ " aggregation: mean\n",
132
+ " higher_is_better: true\n",
133
+ " - metric: acc_norm\n",
134
+ " aggregation: mean\n",
135
+ " higher_is_better: true\n",
136
+ "\"\"\"\n",
137
+ "tasks = []\n",
138
+ "for t in YAML_template_pub_tasks:\n",
139
+ " dataset_name, num_choices = t\n",
140
+ " task_name = f\"pub_{dataset_name}\"\n",
141
+ " tasks.append(task_name)\n",
142
+ " templace_choices = '\\n'.join([\"{{options[__i__]}}\".replace('__i__',str(i)) for i in range(num_choices)])\n",
143
+ " template = (YAML_template_pub_base\n",
144
+ " .replace('__options__',templace_choices)\n",
145
+ " .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
146
+ " )\n",
147
+ " create_task(template, f\"pub_{dataset_name}.yaml\")\n",
148
+ "\n",
149
+ "os.environ['TASKS'] = ','.join(tasks)"
150
+ ],
151
+ "metadata": {
152
+ "id": "xP0cC_sHih7C"
153
+ },
154
+ "execution_count": null,
155
+ "outputs": []
156
+ },
157
+ {
158
+ "cell_type": "markdown",
159
+ "source": [
160
+ "Mistral Models"
161
+ ],
162
+ "metadata": {
163
+ "id": "1fEX-49hQ-Be"
164
+ }
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "source": [
169
+ "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
170
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1 \\\n",
171
+ "--tasks $i \\\n",
172
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
173
+ "--batch_size auto; done"
174
+ ],
175
+ "metadata": {
176
+ "id": "LPqTo2z29RKx"
177
+ },
178
+ "execution_count": null,
179
+ "outputs": []
180
+ },
181
+ {
182
+ "cell_type": "markdown",
183
+ "source": [
184
+ "Save output results"
185
+ ],
186
+ "metadata": {
187
+ "id": "U8qh9BEbgBy7"
188
+ }
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "source": [
193
+ "hf_upload_folder(BASE_FOLDER)"
194
+ ],
195
+ "metadata": {
196
+ "id": "ZQl05b1rf83u"
197
+ },
198
+ "execution_count": null,
199
+ "outputs": []
200
+ },
201
+ {
202
+ "cell_type": "markdown",
203
+ "source": [],
204
+ "metadata": {
205
+ "id": "ZUTPHnV0kMB1"
206
+ }
207
+ }
208
+ ]
209
+ }