Spaces:

anindya-hf-2002
/

Adaptive-RAG

Sleeping

App Files Files Community

anindya-hf-2002 commited on Dec 22, 2024

Commit

24ca106

verified ·

1 Parent(s): 09e42ee

Update src/vectorstore/pinecone_db.py

Browse files

Files changed (1) hide show

src/vectorstore/pinecone_db.py +34 -34

src/vectorstore/pinecone_db.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from data_processing.loader import MultiFormatDocumentLoader
-from data_processing.chunker import SDPMChunker, BGEM3Embeddings
 import pandas as pd
 from typing import List, Dict, Any
@@ -13,7 +13,7 @@ import os
 load_dotenv()
 # API Keys
-PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
 embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
@@ -222,20 +222,20 @@ def get_retriever(
         embedding_generator=embedding_model
     )
-def main():
-    # Initialize Pinecone client
-    pc = Pinecone(api_key=PINECONE_API_KEY)
-    # Define input files
-    file_paths=[
-        # './data/2404.19756v1.pdf',
-        # './data/OD429347375590223100.pdf',
-        # './data/Project Report Format.docx',
-        './data/UNIT 2 GENDER BASED VIOLENCE.pptx'
-    ]
-    # Process pipeline
-    try:
         # Step 1: Load and combine documents
         # print("Loading documents...")
         # markdown_path = load_documents(file_paths)
@@ -257,26 +257,26 @@ def main():
         #     pinecone_client=pc,
         # )
-        # Step 5: Test retrieval
-        print("\nTesting retrieval...")
-        retriever = get_retriever(
-            pinecone_client=pc,
-            index_name="vector-index",
-            namespace="rag"
-        )
-        results = retriever.invoke(
-            question="describe the gender based violence",
-            top_k=5
-        )
-        for i, doc in enumerate(results, 1):
-            print(f"\nResult {i}:")
-            print(f"Content: {doc['page_content']}...")
-            print(f"Score: {doc['score']}")
-    except Exception as e:
-        print(f"Error in pipeline: {str(e)}")
-if __name__ == "__main__":
-    main()

+from src.data_processing.loader import MultiFormatDocumentLoader
+from src.data_processing.chunker import SDPMChunker, BGEM3Embeddings
 import pandas as pd
 from typing import List, Dict, Any
 load_dotenv()
 # API Keys
+# PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
 embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
         embedding_generator=embedding_model
     )
+# def main():
+#     # Initialize Pinecone client
+#     pc = Pinecone(api_key=PINECONE_API_KEY)
+#     # Define input files
+#     file_paths=[
+#         # './data/2404.19756v1.pdf',
+#         # './data/OD429347375590223100.pdf',
+#         # './data/Project Report Format.docx',
+#         './data/UNIT 2 GENDER BASED VIOLENCE.pptx'
+#     ]
+#     # Process pipeline
+#     try:
         # Step 1: Load and combine documents
         # print("Loading documents...")
         # markdown_path = load_documents(file_paths)
         #     pinecone_client=pc,
         # )
+#         # Step 5: Test retrieval
+#         print("\nTesting retrieval...")
+#         retriever = get_retriever(
+#             pinecone_client=pc,
+#             index_name="vector-index",
+#             namespace="rag"
+#         )
+#         results = retriever.invoke(
+#             question="describe the gender based violence",
+#             top_k=5
+#         )
+#         for i, doc in enumerate(results, 1):
+#             print(f"\nResult {i}:")
+#             print(f"Content: {doc['page_content']}...")
+#             print(f"Score: {doc['score']}")
+#     except Exception as e:
+#         print(f"Error in pipeline: {str(e)}")
+# if __name__ == "__main__":
+#     main()