{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import faiss\n", "import requests\n", "import warnings\n", "import pandas as pd\n", "import nest_asyncio\n", "from llama_parse import LlamaParse\n", "from llama_index.core import Settings\n", "from llama_index.vector_stores.faiss import FaissVectorStore\n", "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n", "from llama_index.core import VectorStoreIndex, StorageContext\n", "\n", "nest_asyncio.apply()\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# **Data Pre-Pocessing**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ORDERNUMBER | \n", "QUANTITYORDERED | \n", "PRICEEACH | \n", "ORDERLINENUMBER | \n", "SALES | \n", "ORDERDATE | \n", "STATUS | \n", "QTR_ID | \n", "MONTH_ID | \n", "YEAR_ID | \n", "... | \n", "ADDRESSLINE1 | \n", "ADDRESSLINE2 | \n", "CITY | \n", "STATE | \n", "POSTALCODE | \n", "COUNTRY | \n", "TERRITORY | \n", "CONTACTLASTNAME | \n", "CONTACTFIRSTNAME | \n", "DEALSIZE | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "10107 | \n", "30 | \n", "95.70 | \n", "2 | \n", "2871.00 | \n", "2/24/2003 0:00 | \n", "Shipped | \n", "1 | \n", "2 | \n", "2003 | \n", "... | \n", "897 Long Airport Avenue | \n", "NaN | \n", "NYC | \n", "NY | \n", "10022 | \n", "USA | \n", "NaN | \n", "Yu | \n", "Kwai | \n", "Small | \n", "
1 | \n", "10121 | \n", "34 | \n", "81.35 | \n", "5 | \n", "2765.90 | \n", "5/7/2003 0:00 | \n", "Shipped | \n", "2 | \n", "5 | \n", "2003 | \n", "... | \n", "59 rue de l'Abbaye | \n", "NaN | \n", "Reims | \n", "NaN | \n", "51100 | \n", "France | \n", "EMEA | \n", "Henriot | \n", "Paul | \n", "Small | \n", "
2 | \n", "10134 | \n", "41 | \n", "94.74 | \n", "2 | \n", "3884.34 | \n", "7/1/2003 0:00 | \n", "Shipped | \n", "3 | \n", "7 | \n", "2003 | \n", "... | \n", "27 rue du Colonel Pierre Avia | \n", "NaN | \n", "Paris | \n", "NaN | \n", "75508 | \n", "France | \n", "EMEA | \n", "Da Cunha | \n", "Daniel | \n", "Medium | \n", "
3 | \n", "10145 | \n", "45 | \n", "83.26 | \n", "6 | \n", "3746.70 | \n", "8/25/2003 0:00 | \n", "Shipped | \n", "3 | \n", "8 | \n", "2003 | \n", "... | \n", "78934 Hillside Dr. | \n", "NaN | \n", "Pasadena | \n", "CA | \n", "90003 | \n", "USA | \n", "NaN | \n", "Young | \n", "Julie | \n", "Medium | \n", "
4 | \n", "10159 | \n", "49 | \n", "100.00 | \n", "14 | \n", "5205.27 | \n", "10/10/2003 0:00 | \n", "Shipped | \n", "4 | \n", "10 | \n", "2003 | \n", "... | \n", "7734 Strong St. | \n", "NaN | \n", "San Francisco | \n", "CA | \n", "NaN | \n", "USA | \n", "NaN | \n", "Brown | \n", "Julie | \n", "Medium | \n", "
5 rows × 25 columns
\n", "