anindya-hf-2002 commited on
Commit
24ca106
·
verified ·
1 Parent(s): 09e42ee

Update src/vectorstore/pinecone_db.py

Browse files
Files changed (1) hide show
  1. src/vectorstore/pinecone_db.py +34 -34
src/vectorstore/pinecone_db.py CHANGED
@@ -1,5 +1,5 @@
1
- from data_processing.loader import MultiFormatDocumentLoader
2
- from data_processing.chunker import SDPMChunker, BGEM3Embeddings
3
 
4
  import pandas as pd
5
  from typing import List, Dict, Any
@@ -13,7 +13,7 @@ import os
13
  load_dotenv()
14
 
15
  # API Keys
16
- PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
17
 
18
  embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
19
 
@@ -222,20 +222,20 @@ def get_retriever(
222
  embedding_generator=embedding_model
223
  )
224
 
225
- def main():
226
- # Initialize Pinecone client
227
- pc = Pinecone(api_key=PINECONE_API_KEY)
228
 
229
- # Define input files
230
- file_paths=[
231
- # './data/2404.19756v1.pdf',
232
- # './data/OD429347375590223100.pdf',
233
- # './data/Project Report Format.docx',
234
- './data/UNIT 2 GENDER BASED VIOLENCE.pptx'
235
- ]
236
 
237
- # Process pipeline
238
- try:
239
  # Step 1: Load and combine documents
240
  # print("Loading documents...")
241
  # markdown_path = load_documents(file_paths)
@@ -257,26 +257,26 @@ def main():
257
  # pinecone_client=pc,
258
  # )
259
 
260
- # Step 5: Test retrieval
261
- print("\nTesting retrieval...")
262
- retriever = get_retriever(
263
- pinecone_client=pc,
264
- index_name="vector-index",
265
- namespace="rag"
266
- )
267
 
268
- results = retriever.invoke(
269
- question="describe the gender based violence",
270
- top_k=5
271
- )
272
 
273
- for i, doc in enumerate(results, 1):
274
- print(f"\nResult {i}:")
275
- print(f"Content: {doc['page_content']}...")
276
- print(f"Score: {doc['score']}")
277
 
278
- except Exception as e:
279
- print(f"Error in pipeline: {str(e)}")
280
 
281
- if __name__ == "__main__":
282
- main()
 
1
+ from src.data_processing.loader import MultiFormatDocumentLoader
2
+ from src.data_processing.chunker import SDPMChunker, BGEM3Embeddings
3
 
4
  import pandas as pd
5
  from typing import List, Dict, Any
 
13
  load_dotenv()
14
 
15
  # API Keys
16
+ # PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
17
 
18
  embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
19
 
 
222
  embedding_generator=embedding_model
223
  )
224
 
225
+ # def main():
226
+ # # Initialize Pinecone client
227
+ # pc = Pinecone(api_key=PINECONE_API_KEY)
228
 
229
+ # # Define input files
230
+ # file_paths=[
231
+ # # './data/2404.19756v1.pdf',
232
+ # # './data/OD429347375590223100.pdf',
233
+ # # './data/Project Report Format.docx',
234
+ # './data/UNIT 2 GENDER BASED VIOLENCE.pptx'
235
+ # ]
236
 
237
+ # # Process pipeline
238
+ # try:
239
  # Step 1: Load and combine documents
240
  # print("Loading documents...")
241
  # markdown_path = load_documents(file_paths)
 
257
  # pinecone_client=pc,
258
  # )
259
 
260
+ # # Step 5: Test retrieval
261
+ # print("\nTesting retrieval...")
262
+ # retriever = get_retriever(
263
+ # pinecone_client=pc,
264
+ # index_name="vector-index",
265
+ # namespace="rag"
266
+ # )
267
 
268
+ # results = retriever.invoke(
269
+ # question="describe the gender based violence",
270
+ # top_k=5
271
+ # )
272
 
273
+ # for i, doc in enumerate(results, 1):
274
+ # print(f"\nResult {i}:")
275
+ # print(f"Content: {doc['page_content']}...")
276
+ # print(f"Score: {doc['score']}")
277
 
278
+ # except Exception as e:
279
+ # print(f"Error in pipeline: {str(e)}")
280
 
281
+ # if __name__ == "__main__":
282
+ # main()