Spaces:
Sleeping
Sleeping
import os | |
from config import OPENAI_API_KEY, file_Directory | |
from langchain_community.document_loaders.csv_loader import CSVLoader | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
import pandas as pd | |
import chromadb,uuid | |
from chromadb.utils import embedding_functions | |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
db_path = os.path.join(file_Directory,"vectorstore") | |
client = chromadb.PersistentClient(path=db_path) | |
def generate_uuid(): | |
return str(uuid.uuid4()) | |
emmbedding_model = "text-embedding-3-large" | |
openai_ef = embedding_functions.OpenAIEmbeddingFunction(model_name=emmbedding_model,api_key=OPENAI_API_KEY) | |
collection = client.get_or_create_collection(name="products") | |
def add_document_chroma_collection(collection_object, document_list, embedding_list, metadata): | |
metadata_list = [metadata for i in range(len(document_list))] | |
ids_gen = [generate_uuid() for i in range(len(document_list))] | |
collection_object.add(embeddings = embedding_list,documents = document_list,metadatas = metadata_list , ids = ids_gen) | |
if collection_object: | |
return True | |
def create_vector(): | |
df = pd.read_csv(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/data.csv") | |
for i , items in df.iterrows(): | |
print(items['name']) | |
metadata = {"empty":""} | |
doc_embed = openai_ef([items['name']]) | |
add_document_chroma_collection(collection_object = collection, document_list = [items["name"]], embedding_list = doc_embed ,metadata = metadata) | |
def search(query): | |
embbed_text_search = openai_ef(query) | |
data = collection.query(query_embeddings = embbed_text_search, n_results=10) | |
return data | |
def get_detail_df(name): | |
print(name) | |
df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/app/api/module/data/Catalog/Data_Images/ONDCSampleData.xlsx") | |
for i,item in df.iterrows(): | |
if str(item['name']) == str(name).split(":")[1].strip(): | |
return item | |
else: | |
continue | |
if __name__ == "__main__": | |
# create_vector() | |
name = search("Atta") | |
print(name) | |
# # # print(get_detail_df(name)) | |