karthikvarunn commited on
Commit
8503da7
·
verified ·
1 Parent(s): 30b777c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -56
app.py CHANGED
@@ -179,101 +179,74 @@ def generate_output(context, query):
179
  # context_data, combined_context = search_documents(query)
180
 
181
  # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
182
-
183
  # formatted_titles = " " + "\n".join(document_titles)
184
 
 
 
185
  # results = {
186
  # "results": [
187
  # {
188
  # "natural_language_output": generate_output(doc["relevant_text"], query),
189
- # "doc_id": doc["doc_id"],
190
  # "chunk_id": doc["chunk_id"],
 
191
  # "title": doc["title"],
192
  # "relevant_text": doc["relevant_text"],
193
  # "page_number": doc["page_number"],
194
  # "score": doc["score"],
195
  # }
196
  # for doc in context_data
197
- # ]
 
198
  # }
199
 
200
  # return results, formatted_titles # Return results and formatted document titles
201
  # except Exception as e:
202
- # return {"results": []}, f"Error in workflow: {str(e)}"
 
203
 
204
  def complete_workflow(query):
205
  try:
206
- context_data, combined_context = search_documents(query)
 
 
 
 
 
 
207
 
208
- document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
209
- formatted_titles = " " + "\n".join(document_titles)
 
 
 
 
 
210
 
211
- total_results = len(context_data) # Count the total number of results
 
 
212
 
213
  results = {
214
  "results": [
215
  {
216
- "natural_language_output": generate_output(doc["relevant_text"], query),
217
  "chunk_id": doc["chunk_id"],
218
- "document_id": doc["doc_id"], # Assuming doc_id is the UUID
219
  "title": doc["title"],
220
  "relevant_text": doc["relevant_text"],
221
  "page_number": doc["page_number"],
222
  "score": doc["score"],
 
223
  }
224
  for doc in context_data
225
  ],
226
- "total_results": total_results # Added total_results field
227
  }
228
 
229
- return results, formatted_titles # Return results and formatted document titles
230
  except Exception as e:
231
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
232
 
233
-
234
- # def complete_workflow(query):
235
- # try:
236
- # # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
237
- # context_data, combined_context = hybrid_search_documents(query)
238
-
239
- # # 🔹 Step 2: Generate LLM-based Natural Language Output
240
- # llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
241
- # prompt_template = """
242
- # Use the following context to answer the question as accurately as possible:
243
-
244
- # Context: {context}
245
- # Question: {question}
246
-
247
- # Answer:
248
- # """
249
- # prompt = prompt_template.format(context=combined_context, question=query)
250
- # response = llm([HumanMessage(content=prompt)])
251
-
252
- # # 🔹 Step 3: Format Results
253
- # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
254
- # formatted_titles = "\n".join(document_titles)
255
-
256
- # results = {
257
- # "results": [
258
- # {
259
- # "natural_language_output": response.content,
260
- # "chunk_id": doc["chunk_id"],
261
- # "document_id": doc["doc_id"],
262
- # "title": doc["title"],
263
- # "relevant_text": doc["relevant_text"],
264
- # "page_number": doc["page_number"],
265
- # "score": doc["score"],
266
- # "method": doc["method"], # "vector" or "bm25"
267
- # }
268
- # for doc in context_data
269
- # ],
270
- # "total_results": len(context_data), # Return total number of retrieved results
271
- # }
272
-
273
- # return results, formatted_titles # Return both results and formatted document titles
274
- # except Exception as e:
275
- # return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
276
-
277
  def gradio_app():
278
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
279
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
 
179
  # context_data, combined_context = search_documents(query)
180
 
181
  # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
 
182
  # formatted_titles = " " + "\n".join(document_titles)
183
 
184
+ # total_results = len(context_data) # Count the total number of results
185
+
186
  # results = {
187
  # "results": [
188
  # {
189
  # "natural_language_output": generate_output(doc["relevant_text"], query),
 
190
  # "chunk_id": doc["chunk_id"],
191
+ # "document_id": doc["doc_id"], # Assuming doc_id is the UUID
192
  # "title": doc["title"],
193
  # "relevant_text": doc["relevant_text"],
194
  # "page_number": doc["page_number"],
195
  # "score": doc["score"],
196
  # }
197
  # for doc in context_data
198
+ # ],
199
+ # "total_results": total_results # Added total_results field
200
  # }
201
 
202
  # return results, formatted_titles # Return results and formatted document titles
203
  # except Exception as e:
204
+ # return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
205
+
206
 
207
  def complete_workflow(query):
208
  try:
209
+ # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
210
+ context_data, combined_context = hybrid_search_documents(query)
211
+
212
+ # 🔹 Step 2: Generate LLM-based Natural Language Output
213
+ llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
214
+ prompt_template = """
215
+ Use the following context to answer the question as accurately as possible:
216
 
217
+ Context: {context}
218
+ Question: {question}
219
+
220
+ Answer:
221
+ """
222
+ prompt = prompt_template.format(context=combined_context, question=query)
223
+ response = llm([HumanMessage(content=prompt)])
224
 
225
+ # 🔹 Step 3: Format Results
226
+ document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
227
+ formatted_titles = "\n".join(document_titles)
228
 
229
  results = {
230
  "results": [
231
  {
232
+ "natural_language_output": response.content,
233
  "chunk_id": doc["chunk_id"],
234
+ "document_id": doc["doc_id"],
235
  "title": doc["title"],
236
  "relevant_text": doc["relevant_text"],
237
  "page_number": doc["page_number"],
238
  "score": doc["score"],
239
+ "method": doc["method"], # "vector" or "bm25"
240
  }
241
  for doc in context_data
242
  ],
243
+ "total_results": len(context_data), # Return total number of retrieved results
244
  }
245
 
246
+ return results, formatted_titles # Return both results and formatted document titles
247
  except Exception as e:
248
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def gradio_app():
251
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
252
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")