karthikvarunn commited on
Commit
61b65cd
·
verified ·
1 Parent(s): c2785d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -23
app.py CHANGED
@@ -201,50 +201,79 @@ def generate_output(context, query):
201
  # except Exception as e:
202
  # return {"results": []}, f"Error in workflow: {str(e)}"
203
 
204
-
205
  def complete_workflow(query):
206
  try:
207
- # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
208
- context_data, combined_context = hybrid_search_documents(query)
209
-
210
- # 🔹 Step 2: Generate LLM-based Natural Language Output
211
- llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
212
- prompt_template = """
213
- Use the following context to answer the question as accurately as possible:
214
-
215
- Context: {context}
216
- Question: {question}
217
 
218
- Answer:
219
- """
220
- prompt = prompt_template.format(context=combined_context, question=query)
221
- response = llm([HumanMessage(content=prompt)])
222
 
223
- # 🔹 Step 3: Format Results
224
- document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
225
- formatted_titles = "\n".join(document_titles)
226
 
227
  results = {
228
  "results": [
229
  {
230
- "natural_language_output": response.content,
231
  "chunk_id": doc["chunk_id"],
232
- "document_id": doc["doc_id"],
233
  "title": doc["title"],
234
  "relevant_text": doc["relevant_text"],
235
  "page_number": doc["page_number"],
236
  "score": doc["score"],
237
- "method": doc["method"], # "vector" or "bm25"
238
  }
239
  for doc in context_data
240
  ],
241
- "total_results": len(context_data), # Return total number of retrieved results
242
  }
243
 
244
- return results, formatted_titles # Return both results and formatted document titles
245
  except Exception as e:
246
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def gradio_app():
249
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
250
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
 
201
  # except Exception as e:
202
  # return {"results": []}, f"Error in workflow: {str(e)}"
203
 
 
204
  def complete_workflow(query):
205
  try:
206
+ context_data, combined_context = search_documents(query)
 
 
 
 
 
 
 
 
 
207
 
208
+ document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
209
+ formatted_titles = " " + "\n".join(document_titles)
 
 
210
 
211
+ total_results = len(context_data) # Count the total number of results
 
 
212
 
213
  results = {
214
  "results": [
215
  {
216
+ "natural_language_output": generate_output(doc["relevant_text"], query),
217
  "chunk_id": doc["chunk_id"],
218
+ "document_id": doc["doc_id"], # Assuming doc_id is the UUID
219
  "title": doc["title"],
220
  "relevant_text": doc["relevant_text"],
221
  "page_number": doc["page_number"],
222
  "score": doc["score"],
 
223
  }
224
  for doc in context_data
225
  ],
226
+ "total_results": total_results # Added total_results field
227
  }
228
 
229
+ return results, formatted_titles # Return results and formatted document titles
230
  except Exception as e:
231
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
232
 
233
+
234
+ # def complete_workflow(query):
235
+ # try:
236
+ # # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
237
+ # context_data, combined_context = hybrid_search_documents(query)
238
+
239
+ # # 🔹 Step 2: Generate LLM-based Natural Language Output
240
+ # llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
241
+ # prompt_template = """
242
+ # Use the following context to answer the question as accurately as possible:
243
+
244
+ # Context: {context}
245
+ # Question: {question}
246
+
247
+ # Answer:
248
+ # """
249
+ # prompt = prompt_template.format(context=combined_context, question=query)
250
+ # response = llm([HumanMessage(content=prompt)])
251
+
252
+ # # 🔹 Step 3: Format Results
253
+ # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
254
+ # formatted_titles = "\n".join(document_titles)
255
+
256
+ # results = {
257
+ # "results": [
258
+ # {
259
+ # "natural_language_output": response.content,
260
+ # "chunk_id": doc["chunk_id"],
261
+ # "document_id": doc["doc_id"],
262
+ # "title": doc["title"],
263
+ # "relevant_text": doc["relevant_text"],
264
+ # "page_number": doc["page_number"],
265
+ # "score": doc["score"],
266
+ # "method": doc["method"], # "vector" or "bm25"
267
+ # }
268
+ # for doc in context_data
269
+ # ],
270
+ # "total_results": len(context_data), # Return total number of retrieved results
271
+ # }
272
+
273
+ # return results, formatted_titles # Return both results and formatted document titles
274
+ # except Exception as e:
275
+ # return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
276
+
277
  def gradio_app():
278
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
279
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")