Daniel Foley commited on
Commit
42a5a2d
·
1 Parent(s): 2329708

testing new fields for reranking and scaling up initial retrieved docs

Browse files
Files changed (1) hide show
  1. RAG.py +3 -3
RAG.py CHANGED
@@ -56,7 +56,7 @@ class RunLogger:
56
  """Ensure logs are output if logger is garbage collected"""
57
  self.output_logs()
58
 
59
- def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 1000) -> Tuple[List[Document], List[float]]:
60
  start = time.time()
61
  # pinecone_api_key = os.getenv("PINECONE_API_KEY")
62
  # pc = Pinecone(api_key=pinecone_api_key)
@@ -97,7 +97,7 @@ def extract_text_from_json(json_data: Dict) -> str:
97
  text_parts = []
98
 
99
  # Handle direct text fields
100
- text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_specific_ssim"]
101
  for field in text_fields:
102
  if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
103
  # print(json_data[field])
@@ -162,7 +162,7 @@ def parse_xml_and_check(xml_string: str) -> str:
162
 
163
  return parsed_response.get('RESPONSE', "No response found in the output")
164
 
165
- def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int = 100) -> Tuple[str, List[Document]]:
166
  """Main RAG function with improved error handling and validation."""
167
  start = time.time()
168
  try:
 
56
  """Ensure logs are output if logger is garbage collected"""
57
  self.output_logs()
58
 
59
+ def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 100) -> Tuple[List[Document], List[float]]:
60
  start = time.time()
61
  # pinecone_api_key = os.getenv("PINECONE_API_KEY")
62
  # pc = Pinecone(api_key=pinecone_api_key)
 
97
  text_parts = []
98
 
99
  # Handle direct text fields
100
+ text_fields = ["title_info_primary_tsi","abstract_tsi","subject_geographic_sim","genre_basic_ssim","genre_specific_ssim","date_tsim"]
101
  for field in text_fields:
102
  if field in json_data['data']['attributes'] and json_data['data']['attributes'][field]:
103
  # print(json_data[field])
 
162
 
163
  return parsed_response.get('RESPONSE', "No response found in the output")
164
 
165
+ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k: int = 1000) -> Tuple[str, List[Document]]:
166
  """Main RAG function with improved error handling and validation."""
167
  start = time.time()
168
  try: