Spaces:

kaleidoscope-data
/

data-cleaning-llm

Runtime error

App Files Files Community

cmagganas commited on Jul 20, 2023

Commit

23722a9

•

1 Parent(s): 03e5095

Delete app/cookies_openai_model_eval.ipynb

Browse files

Files changed (1) hide show

app/cookies_openai_model_eval.ipynb +0 -797

app/cookies_openai_model_eval.ipynb DELETED Viewed

@@ -1,797 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This Notebook is to test the various OpenAI models, prompts, and number of few-shot examples to see how they perform on the same task."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install wandb --upgrade openai datasets -qU"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from dotenv import load_dotenv\n",
-    "load_dotenv()\n",
-    "\n",
-    "import openai\n",
-    "\n",
-    "# set OPENAI_API_KEY environment variable from .env file\n",
-    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
-    "\n",
-    "# import OpenAIChatCompletions class from openai_chat_completion.py file and compare_completion_and_prediction function from util.py file\n",
-    "from openai_chat_completion import OpenAIChatCompletions\n",
-    "from util import compare_completion_and_prediction"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Models:\n",
-    "- gpt-3.5-turbo\n",
-    "- gpt-4\n",
-    "\n",
-    "Prompts:\n",
-    "- gpt4-system-message.txt\n",
-    "\n",
-    "Few-shot examples:\n",
-    "> 0 ... 10"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "wandb setup:\n",
-    "- entity: kaleidoscope-data\n",
-    "- project: cookies_llm_experimental_eval\n",
-    "- tags: gpt-3.5-turbo, gpt-4, gpt4-system-message, few-shot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[32m\u001b[41mERROR\u001b[0m API key must be 40 characters long, yours was 48\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/cmagganas/.netrc\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "Tracking run with wandb version 0.15.4"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Run data is saved locally in <code>/home/cmagganas/kaleidoscope/llm_data_cleaner/app/wandb/run-20230626_114056-rbtf91s6</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Syncing run <strong><a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">rose-puddle-7</a></strong> to <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View project at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run at <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from wandb.integration.openai import autolog\n",
-    "\n",
-    "autolog({\"project\":\"cookies_llm_experimental_eval\",\n",
-    "         \"entity\": \"kaleidoscope-data\",\n",
-    "         \"group\": \"cookies\",\n",
-    "         \"job_type\": \"eval\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# create an empty dataframe to store predictions\n",
-    "import pandas as pd\n",
-    "predictions_df = pd.DataFrame(columns=['model', 'system_message', 'n_shot', 'prompt', 'completion', 'prediction'])\n",
-    "\n",
-    "models_to_test = [\"gpt-4\", \"gpt-3.5-turbo\"]\n",
-    "sys_mes_to_test = [\"../prompts/gpt4-system-message.txt\", \"../prompts/gpt4-system-message2.txt\"] # names are arbitrary, same prompts but with \"####\" in system message 2\n",
-    "n_shots_to_test = [None, 1, 2, 3, 5]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# if rerunning the below cell is required, set the following to True\n",
-    "rerun = False\n",
-    "if rerun:\n",
-    "    predictions_df = pd.read_csv('../data/cookies_llm_eval_predictions.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 178,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get predictions for all combinations of models, prompts, and n_shot values\n",
-    "# save predictions to dataframe and then to csv in data folder after each iteration\n",
-    "\n",
-    "# loop through models_to_test\n",
-    "for model in models_to_test:\n",
-    "    # loop through prompts_to_test\n",
-    "    for system_message in sys_mes_to_test:\n",
-    "            # instantiate OpenAIChatCompletions class\n",
-    "            chat = OpenAIChatCompletions(model=model, system_message=system_message)\n",
-    "            # loop through n_shots_to_test\n",
-    "            for n_shot in n_shots_to_test:\n",
-    "                sys_mes_var = 1 if system_message == \"../prompts/gpt4-system-message.txt\" else 2\n",
-    "                n_shot_var = 0 if n_shot == None else n_shot\n",
-    "                # check if predictions for this model, system_message, and n_shot value have already been made\n",
-    "                if predictions_df[(predictions_df['model'] == model) & (predictions_df['system_message'] == sys_mes_var) & (predictions_df['n_shot'] == n_shot_var)].shape[0] == 0:\n",
-    "                    prompts, completions, predictions = chat.predict_jsonl(n_shot=n_shot)\n",
-    "                else:\n",
-    "                     # skip if predictions for this model, system_message, and n_shot value have already been made\n",
-    "                    continue\n",
-    "                # save predictions to dataframe\n",
-    "                df_to_append = pd.DataFrame({'model': model, 'system_message': sys_mes_var, 'n_shot': n_shot_var, 'prompt': prompts, 'completion': completions, 'prediction': predictions})\n",
-    "                df_right = df_to_append['prediction'].apply(pd.Series)\n",
-    "                df_right['prediction'] = df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
-    "                df_to_append = pd.concat([df_to_append[['model', 'system_message', 'n_shot', 'prompt', 'completion']], df_right], axis=1)\n",
-    "                df_to_append.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
-    "                # save predictions to dataframe\n",
-    "                predictions_df = pd.concat([predictions_df, df_to_append], ignore_index=True)\n",
-    "                # delete duplicates from dataframe\n",
-    "                predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]\n",
-    "                predictions_df.to_csv('../data/cookies_llm_eval_predictions.csv', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 179,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "predictions_df = predictions_df[~predictions_df.duplicated(subset=['model', 'system_message', 'n_shot', 'prompt'])]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 180,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(400, 12)"
-      ]
-     },
-     "execution_count": 180,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predictions_df.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 143,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import numpy as np\n",
-    "\n",
-    "# ids = predictions_df['id'].isna()\n",
-    "# # apply pd.Series to predictions column for rows where id is not null and change system_message {0,1} to {1,2}\n",
-    "# new_df_right = predictions_df.loc[ids, 'prediction'].apply(pd.Series)\n",
-    "# new_df_right['prediction'] = new_df_right['choices'].apply(lambda x: x[0]['message']['content']).drop(columns=['choices'])\n",
-    "# new_df_left = predictions_df.loc[ids, ['model', 'system_message', 'n_shot', 'prompt', 'completion']].replace({0:1, 1:2})\n",
-    "# new_df = pd.concat([new_df_left, new_df_right], axis=1)\n",
-    "\n",
-    "# predictions_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
-    "# new_df.columns = ['model', 'system_message', 'n_shot', 'prompt', 'completion', 'id', 'object', 'created', 'openai_model', 'choices', 'usage', 'prediction']\n",
-    "# predictions_df.loc[ids] = new_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 155,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# for col in ['model','system_message','n_shot']:\n",
-    "#     print(predictions_df[col].value_counts())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 84,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import numpy as np\n",
-    "\n",
-    "# # create a copy of predictions_df to manipulate\n",
-    "# new_predictions_df = predictions_df\n",
-    "\n",
-    "# # replace names with 1 or 2\n",
-    "# def replace_sys_mes_name(x):\n",
-    "#     if x == \"../prompts/gpt4-system-message.txt\":\n",
-    "#         return \"1\"\n",
-    "#     elif x == \"../prompts/gpt4-system-message2.txt\":\n",
-    "#         return \"2\"\n",
-    "#     else:\n",
-    "#         return x\n",
-    "# new_predictions_df['system_message'] = new_predictions_df['system_message'].apply(lambda x: replace_sys_mes_name(x))\n",
-    "# # replace None with 0\n",
-    "# new_predictions_df['n_shot'] = new_predictions_df['n_shot'].apply(lambda x: 0 if x == None or np.nan else x)\n",
-    "\n",
-    "# # break up prediction column into sub columns by each of json keys\n",
-    "# new_predictions_df = pd.concat([new_predictions_df, new_predictions_df['prediction'].apply(pd.Series)], axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 168,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# predictions_df.drop(columns=['num_correct'], inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 181,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>model</th>\n",
-       "      <th>system_message</th>\n",
-       "      <th>n_shot</th>\n",
-       "      <th>prompt</th>\n",
-       "      <th>completion</th>\n",
-       "      <th>id</th>\n",
-       "      <th>object</th>\n",
-       "      <th>created</th>\n",
-       "      <th>openai_model</th>\n",
-       "      <th>choices</th>\n",
-       "      <th>usage</th>\n",
-       "      <th>prediction</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>gpt-4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>co-2MFE5QVF,Chill Medicated - Watermelon - Syr...</td>\n",
-       "      <td>Chill Medicated,Edible,Beverage,nan,nan</td>\n",
-       "      <td>chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687805e+09</td>\n",
-       "      <td>gpt-4-0314</td>\n",
-       "      <td>[&lt;OpenAIObject at 0x7fcf7fde94e0&gt; JSON: {\\n  \"...</td>\n",
-       "      <td>{\\n  \"prompt_tokens\": 54,\\n  \"completion_token...</td>\n",
-       "      <td>Hello! It looks like you mentioned a product: ...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>gpt-4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...</td>\n",
-       "      <td>Feelz,Flower,Bud,Space Cowboy,3.5</td>\n",
-       "      <td>chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687805e+09</td>\n",
-       "      <td>gpt-4-0314</td>\n",
-       "      <td>[&lt;OpenAIObject at 0x7fcf7f49d2b0&gt; JSON: {\\n  \"...</td>\n",
-       "      <td>{\\n  \"prompt_tokens\": 51,\\n  \"completion_token...</td>\n",
-       "      <td>Hello! It seems like you are referring to a pr...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>gpt-4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...</td>\n",
-       "      <td>Champelli,Flower,Bud,Xclusivo,3.5</td>\n",
-       "      <td>chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687805e+09</td>\n",
-       "      <td>gpt-4-0314</td>\n",
-       "      <td>[&lt;OpenAIObject at 0x7fcf7e306890&gt; JSON: {\\n  \"...</td>\n",
-       "      <td>{\\n  \"prompt_tokens\": 71,\\n  \"completion_token...</td>\n",
-       "      <td>Hello! It seems like you're interested in the ...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>gpt-4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...</td>\n",
-       "      <td>CAM,Flower,Bud,Mellowz #7,7</td>\n",
-       "      <td>chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687805e+09</td>\n",
-       "      <td>gpt-4-0314</td>\n",
-       "      <td>[&lt;OpenAIObject at 0x7fcf7e33d940&gt; JSON: {\\n  \"...</td>\n",
-       "      <td>{\\n  \"prompt_tokens\": 49,\\n  \"completion_token...</td>\n",
-       "      <td>It seems like you are looking for information ...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>gpt-4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...</td>\n",
-       "      <td>Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...</td>\n",
-       "      <td>chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687805e+09</td>\n",
-       "      <td>gpt-4-0314</td>\n",
-       "      <td>[&lt;OpenAIObject at 0x7fcf7e306980&gt; JSON: {\\n  \"...</td>\n",
-       "      <td>{\\n  \"prompt_tokens\": 59,\\n  \"completion_token...</td>\n",
-       "      <td>Hello! It seems like you are looking for infor...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>gpt-3.5-turbo</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...</td>\n",
-       "      <td>Minntz,Preroll,Joint,Emerald Cut,1</td>\n",
-       "      <td>chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687829e+09</td>\n",
-       "      <td>gpt-3.5-turbo-0301</td>\n",
-       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
-       "      <td>{'prompt_tokens': 125, 'completion_tokens': 23...</td>\n",
-       "      <td>Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>gpt-3.5-turbo</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>co-5RAWYHYQ,The Growers Circle - Double Down -...</td>\n",
-       "      <td>The Growers Circle,Flower,Bud,Double Down,3.5</td>\n",
-       "      <td>chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687829e+09</td>\n",
-       "      <td>gpt-3.5-turbo-0301</td>\n",
-       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
-       "      <td>{'prompt_tokens': 123, 'completion_tokens': 22...</td>\n",
-       "      <td>The Growers Circle,Double Down,Indoor,3.5g,5RA...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>gpt-3.5-turbo</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>md-1195389,Blue Dream Roll Your Own Sugar Shak...</td>\n",
-       "      <td>Pacific Stone,Flower,Bud,nan,14</td>\n",
-       "      <td>chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687829e+09</td>\n",
-       "      <td>gpt-3.5-turbo-0301</td>\n",
-       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
-       "      <td>{'prompt_tokens': 119, 'completion_tokens': 20...</td>\n",
-       "      <td>Pacific Stone,Sugar Shake,Blue Dream,Roll Your...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>gpt-3.5-turbo</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...</td>\n",
-       "      <td>The Growers Circle,Preroll,Joint,Zoo Dawg x Co...</td>\n",
-       "      <td>chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687829e+09</td>\n",
-       "      <td>gpt-3.5-turbo-0301</td>\n",
-       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
-       "      <td>{'prompt_tokens': 133, 'completion_tokens': 32...</td>\n",
-       "      <td>Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>gpt-3.5-turbo</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...</td>\n",
-       "      <td>Flight Bites,Edible,Gummies,nan,nan</td>\n",
-       "      <td>chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra</td>\n",
-       "      <td>chat.completion</td>\n",
-       "      <td>1.687829e+09</td>\n",
-       "      <td>gpt-3.5-turbo-0301</td>\n",
-       "      <td>[{'index': 0, 'message': {'role': 'assistant',...</td>\n",
-       "      <td>{'prompt_tokens': 129, 'completion_tokens': 21...</td>\n",
-       "      <td>Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 12 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             model  system_message  n_shot  \\\n",
-       "0            gpt-4               1       0   \n",
-       "1            gpt-4               1       0   \n",
-       "2            gpt-4               1       0   \n",
-       "3            gpt-4               1       0   \n",
-       "4            gpt-4               1       0   \n",
-       "..             ...             ...     ...   \n",
-       "395  gpt-3.5-turbo               2       1   \n",
-       "396  gpt-3.5-turbo               2       1   \n",
-       "397  gpt-3.5-turbo               2       1   \n",
-       "398  gpt-3.5-turbo               2       1   \n",
-       "399  gpt-3.5-turbo               2       1   \n",
-       "\n",
-       "                                                prompt  \\\n",
-       "0    co-2MFE5QVF,Chill Medicated - Watermelon - Syr...   \n",
-       "1    bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...   \n",
-       "2    fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...   \n",
-       "3    bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...   \n",
-       "4    fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...   \n",
-       "..                                                 ...   \n",
-       "395  co-76GP441T,Minntz - Emerald Cut - Indoor - Jo...   \n",
-       "396  co-5RAWYHYQ,The Growers Circle - Double Down -...   \n",
-       "397  md-1195389,Blue Dream Roll Your Own Sugar Shak...   \n",
-       "398  co-847ZXF37,The Grower Circle - Zoo Dawg x Cos...   \n",
-       "399  co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1...   \n",
-       "\n",
-       "                                            completion  \\\n",
-       "0              Chill Medicated,Edible,Beverage,nan,nan   \n",
-       "1                    Feelz,Flower,Bud,Space Cowboy,3.5   \n",
-       "2                    Champelli,Flower,Bud,Xclusivo,3.5   \n",
-       "3                          CAM,Flower,Bud,Mellowz #7,7   \n",
-       "4    Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...   \n",
-       "..                                                 ...   \n",
-       "395                 Minntz,Preroll,Joint,Emerald Cut,1   \n",
-       "396      The Growers Circle,Flower,Bud,Double Down,3.5   \n",
-       "397                    Pacific Stone,Flower,Bud,nan,14   \n",
-       "398  The Growers Circle,Preroll,Joint,Zoo Dawg x Co...   \n",
-       "399                Flight Bites,Edible,Gummies,nan,nan   \n",
-       "\n",
-       "                                         id           object       created  \\\n",
-       "0    chatcmpl-7VlTkjAqXNRWfltMPpr5v37uBJIsg  chat.completion  1.687805e+09   \n",
-       "1    chatcmpl-7VlTtGF3RGsngfKB1BXufxoTixX2v  chat.completion  1.687805e+09   \n",
-       "2    chatcmpl-7VlU80b0m00VaiGymtj9dbqOggTgR  chat.completion  1.687805e+09   \n",
-       "3    chatcmpl-7VlUHqbsG2kpFHDxAWfsryh6pHmC9  chat.completion  1.687805e+09   \n",
-       "4    chatcmpl-7VlUYvcad2wahIMHavhDEkYrgvjpw  chat.completion  1.687805e+09   \n",
-       "..                                      ...              ...           ...   \n",
-       "395  chatcmpl-7VrjRMvs2l8EJd4PVecpSRPCvV9Hk  chat.completion  1.687829e+09   \n",
-       "396  chatcmpl-7VrjT3wfVoLtq3G6xksfVtLz4FloJ  chat.completion  1.687829e+09   \n",
-       "397  chatcmpl-7VrjVafi1eGBXYfgmGBN0H3b0FzYO  chat.completion  1.687829e+09   \n",
-       "398  chatcmpl-7VrjWQpcRxJTdr3f4BUd7totDZpdF  chat.completion  1.687829e+09   \n",
-       "399  chatcmpl-7VrjXiUHiyUyH7udPXIjANVmAUrra  chat.completion  1.687829e+09   \n",
-       "\n",
-       "           openai_model                                            choices  \\\n",
-       "0            gpt-4-0314  [<OpenAIObject at 0x7fcf7fde94e0> JSON: {\\n  \"...   \n",
-       "1            gpt-4-0314  [<OpenAIObject at 0x7fcf7f49d2b0> JSON: {\\n  \"...   \n",
-       "2            gpt-4-0314  [<OpenAIObject at 0x7fcf7e306890> JSON: {\\n  \"...   \n",
-       "3            gpt-4-0314  [<OpenAIObject at 0x7fcf7e33d940> JSON: {\\n  \"...   \n",
-       "4            gpt-4-0314  [<OpenAIObject at 0x7fcf7e306980> JSON: {\\n  \"...   \n",
-       "..                  ...                                                ...   \n",
-       "395  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
-       "396  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
-       "397  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
-       "398  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
-       "399  gpt-3.5-turbo-0301  [{'index': 0, 'message': {'role': 'assistant',...   \n",
-       "\n",
-       "                                                 usage  \\\n",
-       "0    {\\n  \"prompt_tokens\": 54,\\n  \"completion_token...   \n",
-       "1    {\\n  \"prompt_tokens\": 51,\\n  \"completion_token...   \n",
-       "2    {\\n  \"prompt_tokens\": 71,\\n  \"completion_token...   \n",
-       "3    {\\n  \"prompt_tokens\": 49,\\n  \"completion_token...   \n",
-       "4    {\\n  \"prompt_tokens\": 59,\\n  \"completion_token...   \n",
-       "..                                                 ...   \n",
-       "395  {'prompt_tokens': 125, 'completion_tokens': 23...   \n",
-       "396  {'prompt_tokens': 123, 'completion_tokens': 22...   \n",
-       "397  {'prompt_tokens': 119, 'completion_tokens': 20...   \n",
-       "398  {'prompt_tokens': 133, 'completion_tokens': 32...   \n",
-       "399  {'prompt_tokens': 129, 'completion_tokens': 21...   \n",
-       "\n",
-       "                                            prediction  \n",
-       "0    Hello! It looks like you mentioned a product: ...  \n",
-       "1    Hello! It seems like you are referring to a pr...  \n",
-       "2    Hello! It seems like you're interested in the ...  \n",
-       "3    It seems like you are looking for information ...  \n",
-       "4    Hello! It seems like you are looking for infor...  \n",
-       "..                                                 ...  \n",
-       "395    Minntz,Joint,Indoor,Emerald Cut,1g,co-76GP441T.  \n",
-       "396  The Growers Circle,Double Down,Indoor,3.5g,5RA...  \n",
-       "397  Pacific Stone,Sugar Shake,Blue Dream,Roll Your...  \n",
-       "398  Multi Joint,Zoo Dawg x Cosa Nostra,The Grower ...  \n",
-       "399  Flight Bites,Gummy,S'mores,10 count,100mg CO₂ ...  \n",
-       "\n",
-       "[400 rows x 12 columns]"
-      ]
-     },
-     "execution_count": 181,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predictions_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 182,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "669"
-      ]
-     },
-     "execution_count": 182,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from util import compare_completion_and_prediction\n",
-    "\n",
-    "# Function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error\n",
-    "def get_num_correct(completion, prediction):\n",
-    "    try:\n",
-    "        return compare_completion_and_prediction(completion, prediction)['num_correct']\n",
-    "    except:\n",
-    "        return 0 # this will be the case when format is incorrect\n",
-    "        \n",
-    "# Apply get_num_correct function to predictions_df dataframe\n",
-    "predictions_df['num_correct'] = predictions_df.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)\n",
-    "predictions_df['num_correct'].sum() # out of 1000 possible correct predictions (20 samples * 5 cols per sample) * (2 system messages * 2 models * 5 n_shot values)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 187,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "model          system_message  n_shot\n",
-       "gpt-3.5-turbo  1               0         0.00\n",
-       "                               1         0.00\n",
-       "               2               0         0.00\n",
-       "gpt-4          1               0         0.00\n",
-       "                               1         0.00\n",
-       "               2               0         0.00\n",
-       "gpt-3.5-turbo  1               2         0.24\n",
-       "               2               1         0.24\n",
-       "                               2         0.27\n",
-       "                               3         0.36\n",
-       "               1               3         0.40\n",
-       "                               5         0.44\n",
-       "gpt-4          2               2         0.45\n",
-       "               1               2         0.45\n",
-       "               2               1         0.47\n",
-       "gpt-3.5-turbo  2               5         0.56\n",
-       "gpt-4          1               3         0.62\n",
-       "               2               3         0.67\n",
-       "                               5         0.73\n",
-       "               1               5         0.79\n",
-       "Name: num_correct, dtype: float64"
-      ]
-     },
-     "execution_count": 187,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predictions_df.groupby(['model', 'system_message', 'n_shot'])['num_correct'].sum().sort_values() / 100 # out of 100 possible correct predictions (20 samples * 5 cols per sample)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 184,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "new_predictions_df.to_csv('../data/cookies_llm_eval_proc_preds.csv', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Waiting for W&B process to finish... <strong style=\"color:green\">(success).</strong>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style>\n",
-       "    table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
-       "    .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
-       "    .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
-       "    </style>\n",
-       "<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>▆▆▁▁▁▁▁▁▁▁█▄▁▁▁▁▁▁▁▃▁▁▁▆▂▆▃▅▄▅▆▄▃▁▁▁▁▁▁▁</td></tr><tr><td>usage/elapsed_time</td><td>▄▆▁▁▁▁▂▁▂▁█▃▁▁▁▂▁▁▂▁▁▁▁▄▂▄▂▃▃▄▅▂▁▁▁▁▂▁▁▁</td></tr><tr><td>usage/prompt_tokens</td><td>▁▁▂▂▄▄▆▅██▁▁▃▃▄▅▅██▁▁▃▃▁▁▁▁▁▁▂▁▂▁▄▄▆▆██▁</td></tr><tr><td>usage/total_tokens</td><td>▄▄▂▂▃▃▅▅█▇▆▃▂▂▄▅▅▇▇▂▁▃▂▄▂▄▂▄▃▄▄▃▂▄▃▅▆██▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>usage/completion_tokens</td><td>62</td></tr><tr><td>usage/elapsed_time</td><td>2.40086</td></tr><tr><td>usage/prompt_tokens</td><td>54</td></tr><tr><td>usage/total_tokens</td><td>116</td></tr></table><br/></div></div>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       " View run <strong style=\"color:#cdcd00\">rose-puddle-7</strong> at: <a href='https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6' target=\"_blank\">https://wandb.ai/kaleidoscope-data/cookies_llm_experimental_eval/runs/rbtf91s6</a><br/>Synced 6 W&B file(s), 422 media file(s), 422 artifact file(s) and 0 other file(s)"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "Find logs at: <code>./wandb/run-20230626_114056-rbtf91s6/logs</code>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "autolog.disable()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "kd-llm-dc",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.11"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}