Spaces:
Running
Running
Daniel Foley
commited on
Commit
·
42a5a2d
1
Parent(s):
2329708
testing new fields for reranking and scaling up initial retrieved docs
Browse files
RAG.py
CHANGED
@@ -56,7 +56,7 @@ class RunLogger:
|
|
56 |
"""Ensure logs are output if logger is garbage collected"""
|
57 |
self.output_logs()
|
58 |
|
59 |
-
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int =
|
60 |
start = time.time()
|
61 |
# pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
62 |
# pc = Pinecone(api_key=pinecone_api_key)
|
@@ -97,7 +97,7 @@ def extract_text_from_json(json_data: Dict) -> str:
|
|
97 |
text_parts = []
|
98 |
|
99 |
# Handle direct text fields
|
100 |
-
text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_specific_ssim"]
|
101 |
for field in text_fields:
|
102 |
if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
|
103 |
# print(json_data[field])
|
@@ -162,7 +162,7 @@ def parse_xml_and_check(xml_string: str) -> str:
|
|
162 |
|
163 |
return parsed_response.get('RESPONSE', "No response found in the output")
|
164 |
|
165 |
-
def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int =
|
166 |
"""Main RAG function with improved error handling and validation."""
|
167 |
start = time.time()
|
168 |
try:
|
|
|
56 |
"""Ensure logs are output if logger is garbage collected"""
|
57 |
self.output_logs()
|
58 |
|
59 |
+
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 100) -> Tuple[List[Document], List[float]]:
|
60 |
start = time.time()
|
61 |
# pinecone_api_key = os.getenv("PINECONE_API_KEY")
|
62 |
# pc = Pinecone(api_key=pinecone_api_key)
|
|
|
97 |
text_parts = []
|
98 |
|
99 |
# Handle direct text fields
|
100 |
+
text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_basic_ssim","genre_specific_ssim","date_tsim"]
|
101 |
for field in text_fields:
|
102 |
if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
|
103 |
# print(json_data[field])
|
|
|
162 |
|
163 |
return parsed_response.get('RESPONSE', "No response found in the output")
|
164 |
|
165 |
+
def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int = 1000) -> Tuple[str, List[Document]]:
|
166 |
"""Main RAG function with improved error handling and validation."""
|
167 |
start = time.time()
|
168 |
try:
|