Vrushali's picture
Update requirements.txt, base.py, settings.py, llm_vision.py, product_description.py, and vectorsearch.py
1dc30e5
import os
from config import OPENAI_API_KEY, file_Directory
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
import pandas as pd
import chromadb,uuid
from chromadb.utils import embedding_functions
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
db_path = os.path.join(file_Directory,"vectorstore")
client = chromadb.PersistentClient(path=db_path)
def generate_uuid():
return str(uuid.uuid4())
emmbedding_model = "text-embedding-3-large"
openai_ef = embedding_functions.OpenAIEmbeddingFunction(model_name=emmbedding_model,api_key=OPENAI_API_KEY)
collection = client.get_or_create_collection(name="products")
def add_document_chroma_collection(collection_object, document_list, embedding_list, metadata):
metadata_list = [metadata for i in range(len(document_list))]
ids_gen = [generate_uuid() for i in range(len(document_list))]
collection_object.add(embeddings = embedding_list,documents = document_list,metadatas = metadata_list , ids = ids_gen)
if collection_object:
return True
def create_vector():
df = pd.read_csv(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/data.csv")
for i , items in df.iterrows():
print(items['name'])
metadata = {"empty":""}
doc_embed = openai_ef([items['name']])
add_document_chroma_collection(collection_object = collection, document_list = [items["name"]], embedding_list = doc_embed ,metadata = metadata)
def search(query):
embbed_text_search = openai_ef(query)
data = collection.query(query_embeddings = embbed_text_search, n_results=10)
return data
def get_detail_df(name):
print(name)
df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/Catalog/Data_Images/ONDCSampleData.xlsx")
for i,item in df.iterrows():
if str(item['name']) == str(name).split(":")[1].strip():
return item
else:
continue
if __name__ == "__main__":
# create_vector()
name = search("Atta")
print(name)
# # # print(get_detail_df(name))