Spaces:
Runtime error
Runtime error
Delete app/cookies_openai_model_eval.ipynb
Browse files
app/cookies_openai_model_eval.ipynb
DELETED
@@ -1,797 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"attachments": {},
|
5 |
-
"cell_type": "markdown",
|
6 |
-
"metadata": {},
|
7 |
-
"source": [
|
8 |
-
"This Notebook is to test the various OpenAI models, prompts, and number of few-shot examples to see how they perform on the same task."
|
9 |
-
]
|
10 |
-
},
|
11 |
-
{
|
12 |
-
"cell_type": "code",
|
13 |
-
"execution_count": 1,
|
14 |
-
"metadata": {},
|
15 |
-
"outputs": [],
|
16 |
-
"source": [
|
17 |
-
"!pip install wandb --upgrade openai datasets -qU"
|
18 |
-
]
|
19 |
-
},
|
20 |
-
{
|
21 |
-
"cell_type": "code",
|
22 |
-
"execution_count": 2,
|
23 |
-
"metadata": {},
|
24 |
-
"outputs": [],
|
25 |
-
"source": [
|
26 |
-
"import os\n",
|
27 |
-
"from dotenv import load_dotenv\n",
|
28 |
-
"load_dotenv()\n",
|
29 |
-
"\n",
|
30 |
-
"import openai\n",
|
31 |
-
"\n",
|
32 |
-
"# set OPENAI_API_KEY environment variable from .env file\n",
|
33 |
-
"openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
|
34 |
-
"\n",
|
35 |
-
"# import OpenAIChatCompletions class from openai_chat_completion.py file and compare_completion_and_prediction function from util.py file\n",
|
36 |
-
"from openai_chat_completion import OpenAIChatCompletions\n",
|
37 |
-
"from util import compare_completion_and_prediction"
|
38 |
-
]
|
39 |
-
},
|
40 |
-
{
|
41 |
-
"attachments": {},
|
42 |
-
"cell_type": "markdown",
|
43 |
-
"metadata": {},
|
44 |
-
"source": [
|
45 |
-
"Models:\n",
|
46 |
-
"- gpt-3.5-turbo\n",
|
47 |
-
"- gpt-4\n",
|
48 |
-
"\n",
|
49 |
-
"Prompts:\n",
|
50 |
-
"- gpt4-system-message.txt\n",
|
51 |
-
"\n",
|
52 |
-
"Few-shot examples:\n",
|
53 |
-
"> 0 ... 10"
|
54 |
-
]
|
55 |
-
},
|
56 |
-
{
|
57 |
-
"attachments": {},
|
58 |
-
"cell_type": "markdown",
|
59 |
-
"metadata": {},
|
60 |
-
"source": [
|
61 |
-
"wandb setup:\n",
|
62 |
-
"- entity: kaleidoscope-data\n",
|
63 |
-
"- project: cookies_llm_experimental_eval\n",
|
64 |
-
"- tags: gpt-3.5-turbo, gpt-4, gpt4-system-message, few-shot"
|
65 |
-
]
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"cell_type": "code",
|
69 |
-
"execution_count": 3,
|
70 |
-
"metadata": {},
|
71 |
-
"outputs": [
|
72 |
-
{
|
73 |
-
"name": "stderr",
|
74 |
-
"output_type": "stream",
|
75 |
-
"text": [
|
76 |
-
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
|
77 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
|
78 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
|
79 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[32m\u001b[41mERROR\u001b[0m API key must be 40 characters long, yours was 48\n",
|
80 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
|
81 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
|
82 |
-
"\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/cmagganas/.netrc\n"
|
83 |
-
]
|
84 |
-
},
|
85 |
-
{
|
86 |
-
"data": {
|
87 |
-
"text/html": [
|
88 |
-
"Tracking run with wandb version 0.15.4"
|
89 |
-
],
|
90 |
-
"text/plain": [
|
91 |
-
"<IPython.core.display.HTML object>"
|
92 |
-
]
|
93 |
-
},
|
94 |
-
"metadata": {},
|
95 |
-
"output_type": "display_data"
|
96 |
-
},
|
97 |
-
{
|
98 |
-
"data": {
|
99 |
-
"text/html": [
|
100 |
-
"Run data is saved locally in <code>/home/cmagganas/kaleidoscope/llm_data_cleaner/app/wandb/run-20230626_114056-rbtf91s6</code>"
|
101 |
-
],
|
102 |
-
"text/plain": [
|
103 |
-
"<IPython.core.display.HTML object>"
|
104 |
-
]
|
105 |
-
},
|
106 |
-
"metadata": {},
|
107 |
-
"output_type": "display_data"
|
108 |
-
},
|
109 |
-
{
|
110 |
-
"data": {
|
111 |
-
"text/html": [
|
112 |
-
"Syncing run <strong><a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">rose-puddle-7</a></strong> to <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
|
113 |
-
],
|
114 |
-
"text/plain": [
|
115 |
-
"<IPython.core.display.HTML object>"
|
116 |
-
]
|
117 |
-
},
|
118 |
-
"metadata": {},
|
119 |
-
"output_type": "display_data"
|
120 |
-
},
|
121 |
-
{
|
122 |
-
"data": {
|
123 |
-
"text/html": [
|
124 |
-
" View project at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval</a>"
|
125 |
-
],
|
126 |
-
"text/plain": [
|
127 |
-
"<IPython.core.display.HTML object>"
|
128 |
-
]
|
129 |
-
},
|
130 |
-
"metadata": {},
|
131 |
-
"output_type": "display_data"
|
132 |
-
},
|
133 |
-
{
|
134 |
-
"data": {
|
135 |
-
"text/html": [
|
136 |
-
" View run at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a>"
|
137 |
-
],
|
138 |
-
"text/plain": [
|
139 |
-
"<IPython.core.display.HTML object>"
|
140 |
-
]
|
141 |
-
},
|
142 |
-
"metadata": {},
|
143 |
-
"output_type": "display_data"
|
144 |
-
}
|
145 |
-
],
|
146 |
-
"source": [
|
147 |
-
"from wandb.integration.openai import autolog\n",
|
148 |
-
"\n",
|
149 |
-
"autolog({\"project\":\"cookies_llm_experimental_eval\",\n",
|
150 |
-
" \"entity\": \"kaleidoscope-data\",\n",
|
151 |
-
" \"group\": \"cookies\",\n",
|
152 |
-
" \"job_type\": \"eval\"})"
|
153 |
-
]
|
154 |
-
},
|
155 |
-
{
|
156 |
-
"cell_type": "code",
|
157 |
-
"execution_count": 4,
|
158 |
-
"metadata": {},
|
159 |
-
"outputs": [],
|
160 |
-
"source": [
|
161 |
-
"# create an empty dataframe to store predictions\n",
|
162 |
-
"import pandas as pd\n",
|
163 |
-
"predictions_df = pd.DataFrame(columns=['model', 'system_message', 'n_shot', 'prompt', 'completion', 'prediction'])\n",
|
164 |
-
"\n",
|
165 |
-
"models_to_test = [\"gpt-4\", \"gpt-3.5-turbo\"]\n",
|
166 |
-
"sys_mes_to_test = [\"../prompts/gpt4-system-message.txt\", \"../prompts/gpt4-system-message2.txt\"] # names are arbitrary, same prompts but with \"####\" in system message 2\n",
|
167 |
-
"n_shots_to_test = [None, 1, 2, 3, 5]"
|
168 |
-
]
|
169 |
-
},
|
170 |
-
{
|
171 |
-
"cell_type": "code",
|
172 |
-
"execution_count": 6,
|
173 |
-
"metadata": {},
|
174 |
-
"outputs": [],
|
175 |
-
"source": [
|
176 |
-
"# if rerunning the below cell is required, set the following to True\n",
|
177 |
-
"rerun = False\n",
|
178 |
-
"if rerun:\n",
|
179 |
-
" predictions_df = pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
|
180 |
-
]
|
181 |
-
},
|
182 |
-
{
|
183 |
-
"cell_type": "code",
|
184 |
-
"execution_count": 178,
|
185 |
-
"metadata": {},
|
186 |
-
"outputs": [],
|
187 |
-
"source": [
|
188 |
-
"# get predictions for all combinations of models, prompts, and n_shot values\n",
|
189 |
-
"# save predictions to dataframe and then to csv in data folder after each iteration\n",
|
190 |
-
"\n",
|
191 |
-
"# loop through models_to_test\n",
|
192 |
-
"for model in models_to_test:\n",
|
193 |
-
" # loop through prompts_to_test\n",
|
194 |
-
" for system_message in sys_mes_to_test:\n",
|
195 |
-
" # instantiate OpenAIChatCompletions class\n",
|
196 |
-
" chat = OpenAIChatCompletions(model=model, system_message=system_message)\n",
|
197 |
-
" # loop through n_shots_to_test\n",
|
198 |
-
" for n_shot in n_shots_to_test:\n",
|
199 |
-
" sys_mes_var = 1 if system_message == \"../prompts/gpt4-system-message.txt\" else 2\n",
|
200 |
-
" n_shot_var = 0 if n_shot == None else n_shot\n",
|
201 |
-
" # check if predictions for this model, system_message, and n_shot value have already been made\n",
|
202 |
-
" if predictions_df[(predictions_df['model'] == model) & (predictions_df['system_message'] == sys_mes_var) & (predictions_df['n_shot'] == n_shot_var)].shape[0] == 0:\n",
|
203 |
-
" prompts, completions, predictions = chat.predict_jsonl(n_shot=n_shot)\n",
|
204 |
-
" else:\n",
|
205 |
-
" # skip if predictions for this model, system_message, and n_shot value have already been made\n",
|
206 |
-
" continue\n",
|
207 |
-
" # save predictions to dataframe\n",
|
208 |
-
" df_to_append = pd.DataFrame({'model': model, 'system_message': sys_mes_var, 'n_shot': n_shot_var, 'prompt': prompts, 'completion': completions, 'prediction': predictions})\n",
|
209 |
-
" df_right = df_to_append['prediction'].apply(pd.Series)\n",
|
210 |
-
" df_right['prediction'] = df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
|
211 |
-
" df_to_append = pd.concat([df_to_append[['model', 'system_message', 'n_shot', 'prompt', 'completion']], df_right], axis=1)\n",
|
212 |
-
" df_to_append.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
|
213 |
-
" # save predictions to dataframe\n",
|
214 |
-
" predictions_df = pd.concat([predictions_df, df_to_append], ignore_index=True)\n",
|
215 |
-
" # delete duplicates from dataframe\n",
|
216 |
-
" predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]\n",
|
217 |
-
" predictions_df.to_csv('../data/cookies_llm_eval_predictions.csv', index=False)"
|
218 |
-
]
|
219 |
-
},
|
220 |
-
{
|
221 |
-
"cell_type": "code",
|
222 |
-
"execution_count": 179,
|
223 |
-
"metadata": {},
|
224 |
-
"outputs": [],
|
225 |
-
"source": [
|
226 |
-
"predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]"
|
227 |
-
]
|
228 |
-
},
|
229 |
-
{
|
230 |
-
"cell_type": "code",
|
231 |
-
"execution_count": 180,
|
232 |
-
"metadata": {},
|
233 |
-
"outputs": [
|
234 |
-
{
|
235 |
-
"data": {
|
236 |
-
"text/plain": [
|
237 |
-
"(400, 12)"
|
238 |
-
]
|
239 |
-
},
|
240 |
-
"execution_count": 180,
|
241 |
-
"metadata": {},
|
242 |
-
"output_type": "execute_result"
|
243 |
-
}
|
244 |
-
],
|
245 |
-
"source": [
|
246 |
-
"predictions_df.shape"
|
247 |
-
]
|
248 |
-
},
|
249 |
-
{
|
250 |
-
"cell_type": "code",
|
251 |
-
"execution_count": 143,
|
252 |
-
"metadata": {},
|
253 |
-
"outputs": [],
|
254 |
-
"source": [
|
255 |
-
"# import numpy as np\n",
|
256 |
-
"\n",
|
257 |
-
"# ids = predictions_df['id'].isna()\n",
|
258 |
-
"# # apply pd.Series to predictions column for rows where id is not null and change system_message {0,1} to {1,2}\n",
|
259 |
-
"# new_df_right = predictions_df.loc[ids, 'prediction'].apply(pd.Series)\n",
|
260 |
-
"# new_df_right['prediction'] = new_df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
|
261 |
-
"# new_df_left = predictions_df.loc[ids, ['model', 'system_message', 'n_shot', 'prompt', 'completion']].replace({0:1, 1:2})\n",
|
262 |
-
"# new_df = pd.concat([new_df_left, new_df_right], axis=1)\n",
|
263 |
-
"\n",
|
264 |
-
"# predictions_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
|
265 |
-
"# new_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
|
266 |
-
"# predictions_df.loc[ids] = new_df"
|
267 |
-
]
|
268 |
-
},
|
269 |
-
{
|
270 |
-
"cell_type": "code",
|
271 |
-
"execution_count": 155,
|
272 |
-
"metadata": {},
|
273 |
-
"outputs": [],
|
274 |
-
"source": [
|
275 |
-
"# for col in ['model','system_message','n_shot']:\n",
|
276 |
-
"# print(predictions_df[col].value_counts())"
|
277 |
-
]
|
278 |
-
},
|
279 |
-
{
|
280 |
-
"cell_type": "code",
|
281 |
-
"execution_count": 84,
|
282 |
-
"metadata": {},
|
283 |
-
"outputs": [],
|
284 |
-
"source": [
|
285 |
-
"# import numpy as np\n",
|
286 |
-
"\n",
|
287 |
-
"# # create a copy of predictions_df to manipulate\n",
|
288 |
-
"# new_predictions_df = predictions_df\n",
|
289 |
-
"\n",
|
290 |
-
"# # replace names with 1 or 2\n",
|
291 |
-
"# def replace_sys_mes_name(x):\n",
|
292 |
-
"# if x == \"../prompts/gpt4-system-message.txt\":\n",
|
293 |
-
"# return \"1\"\n",
|
294 |
-
"# elif x == \"../prompts/gpt4-system-message2.txt\":\n",
|
295 |
-
"# return \"2\"\n",
|
296 |
-
"# else:\n",
|
297 |
-
"# return x\n",
|
298 |
-
"# new_predictions_df['system_message'] = new_predictions_df['system_message'].apply(lambda x: replace_sys_mes_name(x))\n",
|
299 |
-
"# # replace None with 0\n",
|
300 |
-
"# new_predictions_df['n_shot'] = new_predictions_df['n_shot'].apply(lambda x: 0 if x == None or np.nan else x)\n",
|
301 |
-
"\n",
|
302 |
-
"# # break up prediction column into sub columns by each of json keys\n",
|
303 |
-
"# new_predictions_df = pd.concat([new_predictions_df, new_predictions_df['prediction'].apply(pd.Series)], axis=1)"
|
304 |
-
]
|
305 |
-
},
|
306 |
-
{
|
307 |
-
"cell_type": "code",
|
308 |
-
"execution_count": 168,
|
309 |
-
"metadata": {},
|
310 |
-
"outputs": [],
|
311 |
-
"source": [
|
312 |
-
"# predictions_df.drop(columns=['num_correct'], inplace=True)"
|
313 |
-
]
|
314 |
-
},
|
315 |
-
{
|
316 |
-
"cell_type": "code",
|
317 |
-
"execution_count": 181,
|
318 |
-
"metadata": {},
|
319 |
-
"outputs": [
|
320 |
-
{
|
321 |
-
"data": {
|
322 |
-
"text/html": [
|
323 |
-
"<div>\n",
|
324 |
-
"<style scoped>\n",
|
325 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
326 |
-
" vertical-align: middle;\n",
|
327 |
-
" }\n",
|
328 |
-
"\n",
|
329 |
-
" .dataframe tbody tr th {\n",
|
330 |
-
" vertical-align: top;\n",
|
331 |
-
" }\n",
|
332 |
-
"\n",
|
333 |
-
" .dataframe thead th {\n",
|
334 |
-
" text-align: right;\n",
|
335 |
-
" }\n",
|
336 |
-
"</style>\n",
|
337 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
338 |
-
" <thead>\n",
|
339 |
-
" <tr style=\"text-align: right;\">\n",
|
340 |
-
" <th></th>\n",
|
341 |
-
" <th>model</th>\n",
|
342 |
-
" <th>system_message</th>\n",
|
343 |
-
" <th>n_shot</th>\n",
|
344 |
-
" <th>prompt</th>\n",
|
345 |
-
" <th>completion</th>\n",
|
346 |
-
" <th>id</th>\n",
|
347 |
-
" <th>object</th>\n",
|
348 |
-
" <th>created</th>\n",
|
349 |
-
" <th>openai_model</th>\n",
|
350 |
-
" <th>choices</th>\n",
|
351 |
-
" <th>usage</th>\n",
|
352 |
-
" <th>prediction</th>\n",
|
353 |
-
" </tr>\n",
|
354 |
-
" </thead>\n",
|
355 |
-
" <tbody>\n",
|
356 |
-
" <tr>\n",
|
357 |
-
" <th>0</th>\n",
|
358 |
-
" <td>gpt-4</td>\n",
|
359 |
-
" <td>1</td>\n",
|
360 |
-
" <td>0</td>\n",
|
361 |
-
" <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
|
362 |
-
" <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
|
363 |
-
" <td>chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg</td>\n",
|
364 |
-
" <td>chat.completion</td>\n",
|
365 |
-
" <td>1.687805e+09</td>\n",
|
366 |
-
" <td>gpt-4-0314</td>\n",
|
367 |
-
" <td>[<OpenAIObject at 0x7fcf7fde94e0> JSON: {\\n \"...</td>\n",
|
368 |
-
" <td>{\\n \"prompt_tokens\": 54,\\n \"completion_token...</td>\n",
|
369 |
-
" <td>Hello! It looks like you mentioned a product: ...</td>\n",
|
370 |
-
" </tr>\n",
|
371 |
-
" <tr>\n",
|
372 |
-
" <th>1</th>\n",
|
373 |
-
" <td>gpt-4</td>\n",
|
374 |
-
" <td>1</td>\n",
|
375 |
-
" <td>0</td>\n",
|
376 |
-
" <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
|
377 |
-
" <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
|
378 |
-
" <td>chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v</td>\n",
|
379 |
-
" <td>chat.completion</td>\n",
|
380 |
-
" <td>1.687805e+09</td>\n",
|
381 |
-
" <td>gpt-4-0314</td>\n",
|
382 |
-
" <td>[<OpenAIObject at 0x7fcf7f49d2b0> JSON: {\\n \"...</td>\n",
|
383 |
-
" <td>{\\n \"prompt_tokens\": 51,\\n \"completion_token...</td>\n",
|
384 |
-
" <td>Hello! It seems like you are referring to a pr...</td>\n",
|
385 |
-
" </tr>\n",
|
386 |
-
" <tr>\n",
|
387 |
-
" <th>2</th>\n",
|
388 |
-
" <td>gpt-4</td>\n",
|
389 |
-
" <td>1</td>\n",
|
390 |
-
" <td>0</td>\n",
|
391 |
-
" <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
|
392 |
-
" <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
|
393 |
-
" <td>chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR</td>\n",
|
394 |
-
" <td>chat.completion</td>\n",
|
395 |
-
" <td>1.687805e+09</td>\n",
|
396 |
-
" <td>gpt-4-0314</td>\n",
|
397 |
-
" <td>[<OpenAIObject at 0x7fcf7e306890> JSON: {\\n \"...</td>\n",
|
398 |
-
" <td>{\\n \"prompt_tokens\": 71,\\n \"completion_token...</td>\n",
|
399 |
-
" <td>Hello! It seems like you're interested in the ...</td>\n",
|
400 |
-
" </tr>\n",
|
401 |
-
" <tr>\n",
|
402 |
-
" <th>3</th>\n",
|
403 |
-
" <td>gpt-4</td>\n",
|
404 |
-
" <td>1</td>\n",
|
405 |
-
" <td>0</td>\n",
|
406 |
-
" <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
|
407 |
-
" <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
|
408 |
-
" <td>chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9</td>\n",
|
409 |
-
" <td>chat.completion</td>\n",
|
410 |
-
" <td>1.687805e+09</td>\n",
|
411 |
-
" <td>gpt-4-0314</td>\n",
|
412 |
-
" <td>[<OpenAIObject at 0x7fcf7e33d940> JSON: {\\n \"...</td>\n",
|
413 |
-
" <td>{\\n \"prompt_tokens\": 49,\\n \"completion_token...</td>\n",
|
414 |
-
" <td>It seems like you are looking for information ...</td>\n",
|
415 |
-
" </tr>\n",
|
416 |
-
" <tr>\n",
|
417 |
-
" <th>4</th>\n",
|
418 |
-
" <td>gpt-4</td>\n",
|
419 |
-
" <td>1</td>\n",
|
420 |
-
" <td>0</td>\n",
|
421 |
-
" <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
|
422 |
-
" <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
|
423 |
-
" <td>chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw</td>\n",
|
424 |
-
" <td>chat.completion</td>\n",
|
425 |
-
" <td>1.687805e+09</td>\n",
|
426 |
-
" <td>gpt-4-0314</td>\n",
|
427 |
-
" <td>[<OpenAIObject at 0x7fcf7e306980> JSON: {\\n \"...</td>\n",
|
428 |
-
" <td>{\\n \"prompt_tokens\": 59,\\n \"completion_token...</td>\n",
|
429 |
-
" <td>Hello! It seems like you are looking for infor...</td>\n",
|
430 |
-
" </tr>\n",
|
431 |
-
" <tr>\n",
|
432 |
-
" <th>...</th>\n",
|
433 |
-
" <td>...</td>\n",
|
434 |
-
" <td>...</td>\n",
|
435 |
-
" <td>...</td>\n",
|
436 |
-
" <td>...</td>\n",
|
437 |
-
" <td>...</td>\n",
|
438 |
-
" <td>...</td>\n",
|
439 |
-
" <td>...</td>\n",
|
440 |
-
" <td>...</td>\n",
|
441 |
-
" <td>...</td>\n",
|
442 |
-
" <td>...</td>\n",
|
443 |
-
" <td>...</td>\n",
|
444 |
-
" <td>...</td>\n",
|
445 |
-
" </tr>\n",
|
446 |
-
" <tr>\n",
|
447 |
-
" <th>395</th>\n",
|
448 |
-
" <td>gpt-3.5-turbo</td>\n",
|
449 |
-
" <td>2</td>\n",
|
450 |
-
" <td>1</td>\n",
|
451 |
-
" <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
|
452 |
-
" <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
|
453 |
-
" <td>chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk</td>\n",
|
454 |
-
" <td>chat.completion</td>\n",
|
455 |
-
" <td>1.687829e+09</td>\n",
|
456 |
-
" <td>gpt-3.5-turbo-0301</td>\n",
|
457 |
-
" <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
|
458 |
-
" <td>{'prompt_tokens': 125, 'completion_tokens': 23...</td>\n",
|
459 |
-
" <td>Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.</td>\n",
|
460 |
-
" </tr>\n",
|
461 |
-
" <tr>\n",
|
462 |
-
" <th>396</th>\n",
|
463 |
-
" <td>gpt-3.5-turbo</td>\n",
|
464 |
-
" <td>2</td>\n",
|
465 |
-
" <td>1</td>\n",
|
466 |
-
" <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
|
467 |
-
" <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
|
468 |
-
" <td>chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ</td>\n",
|
469 |
-
" <td>chat.completion</td>\n",
|
470 |
-
" <td>1.687829e+09</td>\n",
|
471 |
-
" <td>gpt-3.5-turbo-0301</td>\n",
|
472 |
-
" <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
|
473 |
-
" <td>{'prompt_tokens': 123, 'completion_tokens': 22...</td>\n",
|
474 |
-
" <td>The Growers Circle,Double Down,Indoor,3.5g,5RA...</td>\n",
|
475 |
-
" </tr>\n",
|
476 |
-
" <tr>\n",
|
477 |
-
" <th>397</th>\n",
|
478 |
-
" <td>gpt-3.5-turbo</td>\n",
|
479 |
-
" <td>2</td>\n",
|
480 |
-
" <td>1</td>\n",
|
481 |
-
" <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
|
482 |
-
" <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
|
483 |
-
" <td>chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO</td>\n",
|
484 |
-
" <td>chat.completion</td>\n",
|
485 |
-
" <td>1.687829e+09</td>\n",
|
486 |
-
" <td>gpt-3.5-turbo-0301</td>\n",
|
487 |
-
" <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
|
488 |
-
" <td>{'prompt_tokens': 119, 'completion_tokens': 20...</td>\n",
|
489 |
-
" <td>Pacific Stone,Sugar Shake,Blue Dream,Roll Your...</td>\n",
|
490 |
-
" </tr>\n",
|
491 |
-
" <tr>\n",
|
492 |
-
" <th>398</th>\n",
|
493 |
-
" <td>gpt-3.5-turbo</td>\n",
|
494 |
-
" <td>2</td>\n",
|
495 |
-
" <td>1</td>\n",
|
496 |
-
" <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
|
497 |
-
" <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
|
498 |
-
" <td>chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF</td>\n",
|
499 |
-
" <td>chat.completion</td>\n",
|
500 |
-
" <td>1.687829e+09</td>\n",
|
501 |
-
" <td>gpt-3.5-turbo-0301</td>\n",
|
502 |
-
" <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
|
503 |
-
" <td>{'prompt_tokens': 133, 'completion_tokens': 32...</td>\n",
|
504 |
-
" <td>Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...</td>\n",
|
505 |
-
" </tr>\n",
|
506 |
-
" <tr>\n",
|
507 |
-
" <th>399</th>\n",
|
508 |
-
" <td>gpt-3.5-turbo</td>\n",
|
509 |
-
" <td>2</td>\n",
|
510 |
-
" <td>1</td>\n",
|
511 |
-
" <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
|
512 |
-
" <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
|
513 |
-
" <td>chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra</td>\n",
|
514 |
-
" <td>chat.completion</td>\n",
|
515 |
-
" <td>1.687829e+09</td>\n",
|
516 |
-
" <td>gpt-3.5-turbo-0301</td>\n",
|
517 |
-
" <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
|
518 |
-
" <td>{'prompt_tokens': 129, 'completion_tokens': 21...</td>\n",
|
519 |
-
" <td>Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...</td>\n",
|
520 |
-
" </tr>\n",
|
521 |
-
" </tbody>\n",
|
522 |
-
"</table>\n",
|
523 |
-
"<p>400 rows × 12 columns</p>\n",
|
524 |
-
"</div>"
|
525 |
-
],
|
526 |
-
"text/plain": [
|
527 |
-
" model system_message n_shot \\\n",
|
528 |
-
"0 gpt-4 1 0 \n",
|
529 |
-
"1 gpt-4 1 0 \n",
|
530 |
-
"2 gpt-4 1 0 \n",
|
531 |
-
"3 gpt-4 1 0 \n",
|
532 |
-
"4 gpt-4 1 0 \n",
|
533 |
-
".. ... ... ... \n",
|
534 |
-
"395 gpt-3.5-turbo 2 1 \n",
|
535 |
-
"396 gpt-3.5-turbo 2 1 \n",
|
536 |
-
"397 gpt-3.5-turbo 2 1 \n",
|
537 |
-
"398 gpt-3.5-turbo 2 1 \n",
|
538 |
-
"399 gpt-3.5-turbo 2 1 \n",
|
539 |
-
"\n",
|
540 |
-
" prompt \\\n",
|
541 |
-
"0 co-2MFE5QVF,Chill Medicated - Watermelon - Syr... \n",
|
542 |
-
"1 bl-111630024545,Feelz - Space Cowboy 3.5g,nan,... \n",
|
543 |
-
"2 fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh... \n",
|
544 |
-
"3 bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER... \n",
|
545 |
-
"4 fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato... \n",
|
546 |
-
".. ... \n",
|
547 |
-
"395 co-76GP441T,Minntz - Emerald Cut - Indoor - Jo... \n",
|
548 |
-
"396 co-5RAWYHYQ,The Growers Circle - Double Down -... \n",
|
549 |
-
"397 md-1195389,Blue Dream Roll Your Own Sugar Shak... \n",
|
550 |
-
"398 co-847ZXF37,The Grower Circle - Zoo Dawg x Cos... \n",
|
551 |
-
"399 co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1... \n",
|
552 |
-
"\n",
|
553 |
-
" completion \\\n",
|
554 |
-
"0 Chill Medicated,Edible,Beverage,nan,nan \n",
|
555 |
-
"1 Feelz,Flower,Bud,Space Cowboy,3.5 \n",
|
556 |
-
"2 Champelli,Flower,Bud,Xclusivo,3.5 \n",
|
557 |
-
"3 CAM,Flower,Bud,Mellowz #7,7 \n",
|
558 |
-
"4 Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan... \n",
|
559 |
-
".. ... \n",
|
560 |
-
"395 Minntz,Preroll,Joint,Emerald Cut,1 \n",
|
561 |
-
"396 The Growers Circle,Flower,Bud,Double Down,3.5 \n",
|
562 |
-
"397 Pacific Stone,Flower,Bud,nan,14 \n",
|
563 |
-
"398 The Growers Circle,Preroll,Joint,Zoo Dawg x Co... \n",
|
564 |
-
"399 Flight Bites,Edible,Gummies,nan,nan \n",
|
565 |
-
"\n",
|
566 |
-
" id object created \\\n",
|
567 |
-
"0 chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg chat.completion 1.687805e+09 \n",
|
568 |
-
"1 chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v chat.completion 1.687805e+09 \n",
|
569 |
-
"2 chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR chat.completion 1.687805e+09 \n",
|
570 |
-
"3 chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9 chat.completion 1.687805e+09 \n",
|
571 |
-
"4 chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw chat.completion 1.687805e+09 \n",
|
572 |
-
".. ... ... ... \n",
|
573 |
-
"395 chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk chat.completion 1.687829e+09 \n",
|
574 |
-
"396 chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ chat.completion 1.687829e+09 \n",
|
575 |
-
"397 chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO chat.completion 1.687829e+09 \n",
|
576 |
-
"398 chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF chat.completion 1.687829e+09 \n",
|
577 |
-
"399 chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra chat.completion 1.687829e+09 \n",
|
578 |
-
"\n",
|
579 |
-
" openai_model choices \\\n",
|
580 |
-
"0 gpt-4-0314 [<OpenAIObject at 0x7fcf7fde94e0> JSON: {\\n \"... \n",
|
581 |
-
"1 gpt-4-0314 [<OpenAIObject at 0x7fcf7f49d2b0> JSON: {\\n \"... \n",
|
582 |
-
"2 gpt-4-0314 [<OpenAIObject at 0x7fcf7e306890> JSON: {\\n \"... \n",
|
583 |
-
"3 gpt-4-0314 [<OpenAIObject at 0x7fcf7e33d940> JSON: {\\n \"... \n",
|
584 |
-
"4 gpt-4-0314 [<OpenAIObject at 0x7fcf7e306980> JSON: {\\n \"... \n",
|
585 |
-
".. ... ... \n",
|
586 |
-
"395 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
|
587 |
-
"396 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
|
588 |
-
"397 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
|
589 |
-
"398 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
|
590 |
-
"399 gpt-3.5-turbo-0301 [{'index': 0, 'message': {'role': 'assistant',... \n",
|
591 |
-
"\n",
|
592 |
-
" usage \\\n",
|
593 |
-
"0 {\\n \"prompt_tokens\": 54,\\n \"completion_token... \n",
|
594 |
-
"1 {\\n \"prompt_tokens\": 51,\\n \"completion_token... \n",
|
595 |
-
"2 {\\n \"prompt_tokens\": 71,\\n \"completion_token... \n",
|
596 |
-
"3 {\\n \"prompt_tokens\": 49,\\n \"completion_token... \n",
|
597 |
-
"4 {\\n \"prompt_tokens\": 59,\\n \"completion_token... \n",
|
598 |
-
".. ... \n",
|
599 |
-
"395 {'prompt_tokens': 125, 'completion_tokens': 23... \n",
|
600 |
-
"396 {'prompt_tokens': 123, 'completion_tokens': 22... \n",
|
601 |
-
"397 {'prompt_tokens': 119, 'completion_tokens': 20... \n",
|
602 |
-
"398 {'prompt_tokens': 133, 'completion_tokens': 32... \n",
|
603 |
-
"399 {'prompt_tokens': 129, 'completion_tokens': 21... \n",
|
604 |
-
"\n",
|
605 |
-
" prediction \n",
|
606 |
-
"0 Hello! It looks like you mentioned a product: ... \n",
|
607 |
-
"1 Hello! It seems like you are referring to a pr... \n",
|
608 |
-
"2 Hello! It seems like you're interested in the ... \n",
|
609 |
-
"3 It seems like you are looking for information ... \n",
|
610 |
-
"4 Hello! It seems like you are looking for infor... \n",
|
611 |
-
".. ... \n",
|
612 |
-
"395 Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T. \n",
|
613 |
-
"396 The Growers Circle,Double Down,Indoor,3.5g,5RA... \n",
|
614 |
-
"397 Pacific Stone,Sugar Shake,Blue Dream,Roll Your... \n",
|
615 |
-
"398 Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ... \n",
|
616 |
-
"399 Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ... \n",
|
617 |
-
"\n",
|
618 |
-
"[400 rows x 12 columns]"
|
619 |
-
]
|
620 |
-
},
|
621 |
-
"execution_count": 181,
|
622 |
-
"metadata": {},
|
623 |
-
"output_type": "execute_result"
|
624 |
-
}
|
625 |
-
],
|
626 |
-
"source": [
|
627 |
-
"predictions_df"
|
628 |
-
]
|
629 |
-
},
|
630 |
-
{
|
631 |
-
"cell_type": "code",
|
632 |
-
"execution_count": 182,
|
633 |
-
"metadata": {},
|
634 |
-
"outputs": [
|
635 |
-
{
|
636 |
-
"data": {
|
637 |
-
"text/plain": [
|
638 |
-
"669"
|
639 |
-
]
|
640 |
-
},
|
641 |
-
"execution_count": 182,
|
642 |
-
"metadata": {},
|
643 |
-
"output_type": "execute_result"
|
644 |
-
}
|
645 |
-
],
|
646 |
-
"source": [
|
647 |
-
"from util import compare_completion_and_prediction\n",
|
648 |
-
"\n",
|
649 |
-
"# Function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
|
650 |
-
"def get_num_correct(completion, prediction):\n",
|
651 |
-
" try:\n",
|
652 |
-
" return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
|
653 |
-
" except:\n",
|
654 |
-
" return 0 # this will be the case when format is incorrect\n",
|
655 |
-
" \n",
|
656 |
-
"# Apply get_num_correct function to predictions_df dataframe\n",
|
657 |
-
"predictions_df['num_correct'] = predictions_df.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
|
658 |
-
"predictions_df['num_correct'].sum() # out of 1000 possible correct predictions (20 samples * 5 cols per sample) * (2 system messages * 2 models * 5 n_shot values)"
|
659 |
-
]
|
660 |
-
},
|
661 |
-
{
|
662 |
-
"cell_type": "code",
|
663 |
-
"execution_count": 187,
|
664 |
-
"metadata": {},
|
665 |
-
"outputs": [
|
666 |
-
{
|
667 |
-
"data": {
|
668 |
-
"text/plain": [
|
669 |
-
"model system_message n_shot\n",
|
670 |
-
"gpt-3.5-turbo 1 0 0.00\n",
|
671 |
-
" 1 0.00\n",
|
672 |
-
" 2 0 0.00\n",
|
673 |
-
"gpt-4 1 0 0.00\n",
|
674 |
-
" 1 0.00\n",
|
675 |
-
" 2 0 0.00\n",
|
676 |
-
"gpt-3.5-turbo 1 2 0.24\n",
|
677 |
-
" 2 1 0.24\n",
|
678 |
-
" 2 0.27\n",
|
679 |
-
" 3 0.36\n",
|
680 |
-
" 1 3 0.40\n",
|
681 |
-
" 5 0.44\n",
|
682 |
-
"gpt-4 2 2 0.45\n",
|
683 |
-
" 1 2 0.45\n",
|
684 |
-
" 2 1 0.47\n",
|
685 |
-
"gpt-3.5-turbo 2 5 0.56\n",
|
686 |
-
"gpt-4 1 3 0.62\n",
|
687 |
-
" 2 3 0.67\n",
|
688 |
-
" 5 0.73\n",
|
689 |
-
" 1 5 0.79\n",
|
690 |
-
"Name: num_correct, dtype: float64"
|
691 |
-
]
|
692 |
-
},
|
693 |
-
"execution_count": 187,
|
694 |
-
"metadata": {},
|
695 |
-
"output_type": "execute_result"
|
696 |
-
}
|
697 |
-
],
|
698 |
-
"source": [
|
699 |
-
"predictions_df.groupby(['model', 'system_message', 'n_shot'])['num_correct'].sum().sort_values() / 100 # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
|
700 |
-
]
|
701 |
-
},
|
702 |
-
{
|
703 |
-
"cell_type": "code",
|
704 |
-
"execution_count": 184,
|
705 |
-
"metadata": {},
|
706 |
-
"outputs": [],
|
707 |
-
"source": [
|
708 |
-
"new_predictions_df.to_csv('../data/cookies_llm_eval_proc_preds.csv', index=False)"
|
709 |
-
]
|
710 |
-
},
|
711 |
-
{
|
712 |
-
"cell_type": "code",
|
713 |
-
"execution_count": 76,
|
714 |
-
"metadata": {},
|
715 |
-
"outputs": [
|
716 |
-
{
|
717 |
-
"data": {
|
718 |
-
"text/html": [
|
719 |
-
"Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
|
720 |
-
],
|
721 |
-
"text/plain": [
|
722 |
-
"<IPython.core.display.HTML object>"
|
723 |
-
]
|
724 |
-
},
|
725 |
-
"metadata": {},
|
726 |
-
"output_type": "display_data"
|
727 |
-
},
|
728 |
-
{
|
729 |
-
"data": {
|
730 |
-
"text/html": [
|
731 |
-
"<style>\n",
|
732 |
-
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
|
733 |
-
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
|
734 |
-
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
|
735 |
-
" </style>\n",
|
736 |
-
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>▆▆▁▁▁▁▁▁▁▁█▄▁▁▁▁▁▁▁▃▁▁▁▆▂▆▃▅▄▅▆▄▃▁▁▁▁▁▁▁</td></tr><tr><td>usage/elapsed_time</td><td>▄▆▁▁▁▁▂▁▂▁█▃▁▁▁▂▁▁▂▁▁▁▁▄▂▄▂▃▃▄▅▂▁▁▁▁▂▁▁▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁▁▂▂▄▄▆▅██▁▁▃▃▄▅▅██▁▁▃▃▁▁▁▁▁▁▂▁▂▁▄▄▆▆██▁</td></tr><tr><td>usage/total_tokens</td><td>▄▄▂▂▃▃▅▅█▇▆▃▂▂▄▅▅▇▇▂▁▃▂▄▂▄▂▄▃▄▄▃▂▄▃▅▆██▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>62</td></tr><tr><td>usage/elapsed_time</td><td>2.40086</td></tr><tr><td>usage/prompt_tokens</td><td>54</td></tr><tr><td>usage/total_tokens</td><td>116</td></tr></table><br/></div></div>"
|
737 |
-
],
|
738 |
-
"text/plain": [
|
739 |
-
"<IPython.core.display.HTML object>"
|
740 |
-
]
|
741 |
-
},
|
742 |
-
"metadata": {},
|
743 |
-
"output_type": "display_data"
|
744 |
-
},
|
745 |
-
{
|
746 |
-
"data": {
|
747 |
-
"text/html": [
|
748 |
-
" View run <strong style=\"color:#cdcd00\">rose-puddle-7</strong> at: <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a><br/>Synced 6 W&B file(s), 422 media file(s), 422 artifact file(s) and 0 other file(s)"
|
749 |
-
],
|
750 |
-
"text/plain": [
|
751 |
-
"<IPython.core.display.HTML object>"
|
752 |
-
]
|
753 |
-
},
|
754 |
-
"metadata": {},
|
755 |
-
"output_type": "display_data"
|
756 |
-
},
|
757 |
-
{
|
758 |
-
"data": {
|
759 |
-
"text/html": [
|
760 |
-
"Find logs at: <code>./wandb/run-20230626_114056-rbtf91s6/logs</code>"
|
761 |
-
],
|
762 |
-
"text/plain": [
|
763 |
-
"<IPython.core.display.HTML object>"
|
764 |
-
]
|
765 |
-
},
|
766 |
-
"metadata": {},
|
767 |
-
"output_type": "display_data"
|
768 |
-
}
|
769 |
-
],
|
770 |
-
"source": [
|
771 |
-
"autolog.disable()"
|
772 |
-
]
|
773 |
-
}
|
774 |
-
],
|
775 |
-
"metadata": {
|
776 |
-
"kernelspec": {
|
777 |
-
"display_name": "kd-llm-dc",
|
778 |
-
"language": "python",
|
779 |
-
"name": "python3"
|
780 |
-
},
|
781 |
-
"language_info": {
|
782 |
-
"codemirror_mode": {
|
783 |
-
"name": "ipython",
|
784 |
-
"version": 3
|
785 |
-
},
|
786 |
-
"file_extension": ".py",
|
787 |
-
"mimetype": "text/x-python",
|
788 |
-
"name": "python",
|
789 |
-
"nbconvert_exporter": "python",
|
790 |
-
"pygments_lexer": "ipython3",
|
791 |
-
"version": "3.10.11"
|
792 |
-
},
|
793 |
-
"orig_nbformat": 4
|
794 |
-
},
|
795 |
-
"nbformat": 4,
|
796 |
-
"nbformat_minor": 2
|
797 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|