Spaces:
Sleeping
Sleeping
Update src/vectorstore/pinecone_db.py
Browse files- src/vectorstore/pinecone_db.py +34 -34
src/vectorstore/pinecone_db.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
from data_processing.loader import MultiFormatDocumentLoader
|
2 |
-
from data_processing.chunker import SDPMChunker, BGEM3Embeddings
|
3 |
|
4 |
import pandas as pd
|
5 |
from typing import List, Dict, Any
|
@@ -13,7 +13,7 @@ import os
|
|
13 |
load_dotenv()
|
14 |
|
15 |
# API Keys
|
16 |
-
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
|
17 |
|
18 |
embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
|
19 |
|
@@ -222,20 +222,20 @@ def get_retriever(
|
|
222 |
embedding_generator=embedding_model
|
223 |
)
|
224 |
|
225 |
-
def main():
|
226 |
-
|
227 |
-
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
|
237 |
-
|
238 |
-
|
239 |
# Step 1: Load and combine documents
|
240 |
# print("Loading documents...")
|
241 |
# markdown_path = load_documents(file_paths)
|
@@ -257,26 +257,26 @@ def main():
|
|
257 |
# pinecone_client=pc,
|
258 |
# )
|
259 |
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
|
278 |
-
|
279 |
-
|
280 |
|
281 |
-
if __name__ == "__main__":
|
282 |
-
|
|
|
1 |
+
from src.data_processing.loader import MultiFormatDocumentLoader
|
2 |
+
from src.data_processing.chunker import SDPMChunker, BGEM3Embeddings
|
3 |
|
4 |
import pandas as pd
|
5 |
from typing import List, Dict, Any
|
|
|
13 |
load_dotenv()
|
14 |
|
15 |
# API Keys
|
16 |
+
# PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
|
17 |
|
18 |
embedding_model = BGEM3Embeddings(model_name="BAAI/bge-m3")
|
19 |
|
|
|
222 |
embedding_generator=embedding_model
|
223 |
)
|
224 |
|
225 |
+
# def main():
|
226 |
+
# # Initialize Pinecone client
|
227 |
+
# pc = Pinecone(api_key=PINECONE_API_KEY)
|
228 |
|
229 |
+
# # Define input files
|
230 |
+
# file_paths=[
|
231 |
+
# # './data/2404.19756v1.pdf',
|
232 |
+
# # './data/OD429347375590223100.pdf',
|
233 |
+
# # './data/Project Report Format.docx',
|
234 |
+
# './data/UNIT 2 GENDER BASED VIOLENCE.pptx'
|
235 |
+
# ]
|
236 |
|
237 |
+
# # Process pipeline
|
238 |
+
# try:
|
239 |
# Step 1: Load and combine documents
|
240 |
# print("Loading documents...")
|
241 |
# markdown_path = load_documents(file_paths)
|
|
|
257 |
# pinecone_client=pc,
|
258 |
# )
|
259 |
|
260 |
+
# # Step 5: Test retrieval
|
261 |
+
# print("\nTesting retrieval...")
|
262 |
+
# retriever = get_retriever(
|
263 |
+
# pinecone_client=pc,
|
264 |
+
# index_name="vector-index",
|
265 |
+
# namespace="rag"
|
266 |
+
# )
|
267 |
|
268 |
+
# results = retriever.invoke(
|
269 |
+
# question="describe the gender based violence",
|
270 |
+
# top_k=5
|
271 |
+
# )
|
272 |
|
273 |
+
# for i, doc in enumerate(results, 1):
|
274 |
+
# print(f"\nResult {i}:")
|
275 |
+
# print(f"Content: {doc['page_content']}...")
|
276 |
+
# print(f"Score: {doc['score']}")
|
277 |
|
278 |
+
# except Exception as e:
|
279 |
+
# print(f"Error in pipeline: {str(e)}")
|
280 |
|
281 |
+
# if __name__ == "__main__":
|
282 |
+
# main()
|