{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## This notebook is used to evaluate the performance of the LLM model to clean the cookies dataset." ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "#### Using OpenAI API" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Try to call function from newly created module `openai_chat_completion.py`" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"id\": \"chatcmpl-7UecmbvjkbwamEpYXyIvGO4w4Ae8i\",\n", " \"object\": \"chat.completion\",\n", " \"created\": 1687540356,\n", " \"model\": \"gpt-4-0314\",\n", " \"choices\": [\n", " {\n", " \"index\": 0,\n", " \"message\": {\n", " \"role\": \"assistant\",\n", " \"content\": \"Chill Medicated,Edible,Beverage,Watermelon,250\"\n", " },\n", " \"finish_reason\": \"stop\"\n", " }\n", " ],\n", " \"usage\": {\n", " \"prompt_tokens\": 636,\n", " \"completion_tokens\": 15,\n", " \"total_tokens\": 651\n", " }\n", "}\n" ] } ], "source": [ "import os\n", "from dotenv import load_dotenv\n", "load_dotenv()\n", "\n", "import openai\n", "\n", "# set OPENAI_API_KEY environment variable from .env file\n", "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", "\n", "# import OpenAIChatCompletions class from openai_chat_completion.py file located in llm_data_cleaner/scripts folder (this notebook is located in llm_data_cleaner/notebooks folder)\n", "from openai_chat_completion import OpenAIChatCompletions, compare_completion_and_prediction\n", "\n", "# read in llm-data-cleaner/prompts/gpt4-system-message.txt file into variable system_message\n", "system_message = open('../prompts/gpt4-system-message.txt', 'r').read()\n", "\n", "# create an instance of the OpenAIChatCompletions class and use the openai_chat_completion method to get chat completions\n", "chat_completions = OpenAIChatCompletions(model=\"gpt-4\", system_message=system_message)\n", "\n", "prompt = \"co-2MFE5QVF,Chill Medicated - Watermelon - Syrup - 250mg,Chill Medicated,nan,nan,nan\"\n", "completion = chat_completions.openai_chat_completion(prompt, n_shot=1)\n", "print(completion)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "from openai_chat_completion import OpenAIChatCompletions\n", "\n", "test_prompts, test_completions, test_predictions = chat_completions.predict_jsonl(n_shot=1)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | prompt | \n", "completion | \n", "prediction | \n", "
---|---|---|---|
0 | \n", "co-2MFE5QVF,Chill Medicated - Watermelon - Syr... | \n", "Chill Medicated,Edible,Beverage,nan,nan | \n", "Chill Medicated,Edible,Beverage,Watermelon,250 | \n", "
1 | \n", "bl-111630024545,Feelz - Space Cowboy 3.5g,nan,... | \n", "Feelz,Flower,Bud,Space Cowboy,3.5 | \n", "Feelz,Flower,Bud,Space Cowboy,3.5 | \n", "
2 | \n", "fl-8voAjt83sD,Champelli | Xclusivo 3.5g | Eigh... | \n", "Champelli,Flower,Bud,Xclusivo,3.5 | \n", "Champelli,Flower,Bud,Xclusivo,3.5 | \n", "
3 | \n", "bl-073133213364,CAM - Mellowz #7 7g,nan,FLOWER... | \n", "CAM,Flower,Bud,Mellowz #7,7 | \n", "CAM - Mellowz #7 7g,CAM,Flower,Bud,Mellowz,7 | \n", "
4 | \n", "fl-fwJQL2AWnS,Backpack Boyz | Bubblegum Gelato... | \n", "Backpack Boyz,Edible,CBD Tincture/Caps/etc,nan... | \n", "Backpack Boyz,Edible,Syrup,Bubblegum Gelato,1 | \n", "
5 | \n", "fl-dXE5cH45AQ,Raw Garden | Pink Lemonade Crush... | \n", "Raw Garden,Concentrate,Diamonds,Pink Lemonade,1 | \n", "Raw Garden,Concentrate,Diamonds,Pink Lemonade,1.0 | \n", "
6 | \n", "md-1159983,Baby Jeeter Peaches | Infused Prero... | \n", "Jeeter,Preroll,Infused Joint,Peaches,12.5 | \n", "Jeeter,Preroll,Infused Joint,Peaches,12.5 | \n", "
7 | \n", "co-6WGV1Z0H,Ocean Breeze Cultivators - Truffle... | \n", "Ocean Breeze Cultivators,Preroll,Blunt,Truffle... | \n", "Ocean Breeze Cultivators,Blunt,Truffle Pupil,1.0 | \n", "
8 | \n", "fl-doWkMzvFq2,Cookies | Triple Scoop Preroll 1... | \n", "Cookies,Preroll,Joint,Triple Scoop,1 | \n", "Cookies,Preroll,Joint,Triple Scoop,1.0 | \n", "
9 | \n", "bl-842922110296,STIIIZY - Birthday Cake Pod 1g... | \n", "STIIIZY,Vape,Vape,Birthday Cake,1 | \n", "Birthday Cake Pod,STIIIZY,Vape,Birthday Cake,1.0 | \n", "
10 | \n", "co-6GGKA0GV,Cookies x The Grower Circle - Tang... | \n", "Cookies,Preroll,Infused Joint,TangEray,1 | \n", "Here is the missing information:\\n\\nproduct_na... | \n", "
11 | \n", "co-7YGGNHBR,Kanha - Nano Blood Orange Bliss In... | \n", "Kanha,Edible,Gummies,nan,nan | \n", "Kanha,Edible,Gummies,nan,nan | \n", "
12 | \n", "md-1242517,BOBBI HYLL | Indoor - 3.5g,Fresca,F... | \n", "Fresca,Flower,Bud,BOBBI HYLL,3.5 | \n", "Fresca,Flower,Bud,BOBBI HYLL,3.5 | \n", "
13 | \n", "fl-9nEPGsnYtY,Flav | Blueberry Belts 100mg,Fla... | \n", "Flav,Edible,Gummies,nan,nan | \n", "Flav,Edible,Gummies,Blueberry,100 | \n", "
14 | \n", "md-1347497,Cherry Pie - 1g - Pod,Left Coast Ex... | \n", "Left Coast Extracts,Vape,510 cart,Cherry Pie,1 | \n", "Cherry Pie - 1g - Pod,Left Coast Extracts,Vape... | \n", "
15 | \n", "co-76GP441T,Minntz - Emerald Cut - Indoor - Jo... | \n", "Minntz,Preroll,Joint,Emerald Cut,1 | \n", "Minntz,Preroll,Joint,Emerald Cut,1 | \n", "
16 | \n", "co-5RAWYHYQ,The Growers Circle - Double Down -... | \n", "The Growers Circle,Flower,Bud,Double Down,3.5 | \n", "The Growers Circle,Flower,Bud,Double Down,3.5 | \n", "
17 | \n", "md-1195389,Blue Dream Roll Your Own Sugar Shak... | \n", "Pacific Stone,Flower,Bud,nan,14 | \n", "Pacific Stone,Flower,Bud,Blue Dream,14 | \n", "
18 | \n", "co-847ZXF37,The Grower Circle - Zoo Dawg x Cos... | \n", "The Growers Circle,Preroll,Joint,Zoo Dawg x Co... | \n", "The Grower Circle,Preroll,Joint,Zoo Dawg x Cos... | \n", "
19 | \n", "co-8EMW15ZM,Flight Bites - S'mores - Gummy - 1... | \n", "Flight Bites,Edible,Gummies,nan,nan | \n", "Flight Bites,Edible,Gummies,S'mores,10 | \n", "