{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## This notebook is to show how to load csv data and into jsonl format for the LLM data cleaner.\n", "\n", "First, we load the data." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | sku | \n", "product_name (pos) | \n", "brand (pos) | \n", "product_category (pos) | \n", "strain_name (pos) | \n", "product_weight_grams (pos) | \n", "brand (manual review) | \n", "product_category (manual review) | \n", "sub_product_category (manual review) | \n", "strain_name (manual review) | \n", "product_weight_grams (manual review) | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "bl-842922110296 | \n", "STIIIZY - Birthday Cake Pod 1g | \n", "NaN | \n", "VAPE PENS 1G | \n", "NaN | \n", "1.0 | \n", "STIIIZY | \n", "Vape | \n", "Vape | \n", "Birthday Cake | \n", "1 | \n", "
1 | \n", "co-6ARLLX12 | \n", "SMASH Hits - Hippie Slayer - Indoor - 1g | \n", "SMASH Hits | \n", "NaN | \n", "Hippie Slayer | \n", "NaN | \n", "SMASH Hits | \n", "Preroll | \n", "Joint | \n", "Hippie Slayer | \n", "1 | \n", "
2 | \n", "bl-090035986141 | \n", "Eighth Brothers - Black Jack 1g Preroll | \n", "NaN | \n", "PREROLLS | \n", "NaN | \n", "NaN | \n", "Eighth Brothers | \n", "Preroll | \n", "Joint | \n", "Black Jack | \n", "1 | \n", "
3 | \n", "bl-850002822274 | \n", "GRIZZLY PEAK - Indica Bone 0.5g 7PK Prerolls | \n", "NaN | \n", "PREROLL PACKS | \n", "NaN | \n", "NaN | \n", "GRIZZLY PEAK | \n", "Preroll | \n", "Joint | \n", "NaN | \n", "3.5 | \n", "
4 | \n", "co-76GP441T | \n", "Minntz - Emerald Cut - Indoor - Joint - 1g | \n", "Minntz | \n", "NaN | \n", "Emerald Cut | \n", "NaN | \n", "Minntz | \n", "Preroll | \n", "Joint | \n", "Emerald Cut | \n", "1 | \n", "