Vrushali commited on
Commit
6e67de0
·
1 Parent(s): 5383897

Add vector search module

Browse files
Files changed (1) hide show
  1. src/module/vectorsearch.py +38 -0
src/module/vectorsearch.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from config import OPENAI_API_KEY, file_Directory
3
+ from langchain_community.document_loaders.csv_loader import CSVLoader
4
+ from langchain_openai import OpenAIEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain_community.vectorstores import Chroma
7
+ import pandas as pd
8
+
9
+ os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
10
+
11
+
12
+ # df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx")
13
+ # df_new = pd.DataFrame(columns=["id", "name"])
14
+ # df_new = df['name']
15
+ # df_new.to_csv(r"data/data.csv", index=False)
16
+
17
+ def create_vector():
18
+ loader = CSVLoader(file_path="data/data.csv")
19
+ docs = loader.load()
20
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
21
+ documents = text_splitter.split_documents(docs)
22
+ db_path = os.path.join(file_Directory,"vectorstore")
23
+ embeddings = OpenAIEmbeddings()
24
+ os.makedirs(db_path, exist_ok=True)
25
+ Chroma.from_documents(docs, embeddings, persist_directory= db_path)
26
+
27
+ def search(query):
28
+ embeddings = OpenAIEmbeddings()
29
+ db_path = os.path.join(file_Directory,"vectorstore")
30
+ db = Chroma(persist_directory= db_path, embedding_function= embeddings)
31
+ embedding_vector = OpenAIEmbeddings().embed_query(query)
32
+ docs = db.similarity_search_by_vector(embedding_vector)
33
+ print(docs[0].page_content)
34
+
35
+
36
+ if __name__ == "__main__":
37
+ create_vector()
38
+ search("Choco Creme Wafers")