## This notebook is used to evaluate the performance of the LLM model to clean the cookies dataset.

#### Using OpenAI API

Try to call function from newly created module `openai_chat_completion.py`

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

import openai

# set OPENAI_API_KEY environment variable from .env file
openai.api_key = os.getenv("OPENAI_API_KEY")

# import OpenAIChatCompletions class from openai_chat_completion.py file located in llm_data_cleaner/scripts folder (this notebook is located in llm_data_cleaner/notebooks folder)
from openai_chat_completion import OpenAIChatCompletions, compare_completion_and_prediction

# read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message
system_message = open('../prompts/gpt4-system-message.txt', 'r').read()

# create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions
chat_completions = OpenAIChatCompletions(model="gpt-4", system_message=system_message)

prompt = "co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan"
completion = chat_completions.openai_chat_completion(prompt, n_shot=1)
print(completion)

{
  "id": "chatcmpl-7UecmbvjkbwamEpYXyIvGO4w4Ae8i",
  "object": "chat.completion",
  "created": 1687540356,
  "model": "gpt-4-0314",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Chill Medicated,Edible,Beverage,Watermelon,250"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 636,
    "completion_tokens": 15,
    "total_tokens": 651
  }
}


In [7]:
from openai_chat_completion import OpenAIChatCompletions

test_prompts, test_completions, test_predictions = chat_completions.predict_jsonl(n_shot=1)

In [44]:
import pandas as pd

test_results = pd.DataFrame(
    zip(test_prompts,
        test_completions,
        [ele['choices'][0]['message']['content'] for ele in test_predictions])
    , columns = ['prompt','completion','prediction'])
test_results

Unnamed: 0,prompt,completion,prediction
0,"co-2MFE5QVF,Chill Medicated - Watermelon - Syr...","Chill Medicated,Edible,Beverage,nan,nan","Chill Medicated,Edible,Beverage,Watermelon,250"
1,"bl-111630024545,Feelz - Space Cowboy 3.5g,nan,...","Feelz,Flower,Bud,Space Cowboy,3.5","Feelz,Flower,Bud,Space Cowboy,3.5"
2,"fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh...","Champelli,Flower,Bud,Xclusivo,3.5","Champelli,Flower,Bud,Xclusivo,3.5"
3,"bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER...","CAM,Flower,Bud,Mellowz #7,7","CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7"
4,"fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato...","Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan...","Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1"
5,"fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush...","Raw Garden,Concentrate,Diamonds,Pink Lemonade,1","Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0"
6,"md-1159983,Baby Jeeter Peaches | Infused Prero...","Jeeter,Preroll,Infused Joint,Peaches,12.5","Jeeter,Preroll,Infused Joint,Peaches,12.5"
7,"co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle...","Ocean Breeze Cultivators,Preroll,Blunt,Truffle...","Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0"
8,"fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1...","Cookies,Preroll,Joint,Triple Scoop,1","Cookies,Preroll,Joint,Triple Scoop,1.0"
9,"bl-842922110296,STIIIZY - Birthday Cake Pod 1g...","STIIIZY,Vape,Vape,Birthday Cake,1","Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0"


In [27]:
from util import compare_completion_and_prediction

compare_completion_and_prediction(test_completions[0], test_predictions[0]["choices"][0]["message"]["content"])

{'completion': ['Chill Medicated', 'Edible', 'Beverage', 'nan', 'nan'],
 'prediction': ['Chill Medicated', 'Edible', 'Beverage', 'Watermelon', '250'],
 'matches': [True, True, True, False, False],
 'num_correct': 3}

In [58]:
# Write function that uses compare_completion_and_prediction to return num_correct and return zero if there is an error
def get_num_correct(completion, prediction):
    try:
        return compare_completion_and_prediction(completion, prediction)['num_correct']
    except:
        return 0 # this will be the case when format is incorrect

# Apply get_num_correct function to test_results dataframe
test_results['num_correct'] = test_results.apply(lambda row: get_num_correct(row['completion'], row['prediction']), axis=1)
test_results['num_correct'].sum() # out of 100 possible correct predictions (20 samples * 5 cols per sample)

66