karthikvarunn commited on
Commit
e6ae15d
·
verified ·
1 Parent(s): 8503da7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -52
app.py CHANGED
@@ -174,79 +174,79 @@ def generate_output(context, query):
174
  except Exception as e:
175
  return f"Error generating output: {str(e)}"
176
 
177
- # def complete_workflow(query):
178
- # try:
179
- # context_data, combined_context = search_documents(query)
180
-
181
- # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
182
- # formatted_titles = " " + "\n".join(document_titles)
183
-
184
- # total_results = len(context_data) # Count the total number of results
185
-
186
- # results = {
187
- # "results": [
188
- # {
189
- # "natural_language_output": generate_output(doc["relevant_text"], query),
190
- # "chunk_id": doc["chunk_id"],
191
- # "document_id": doc["doc_id"], # Assuming doc_id is the UUID
192
- # "title": doc["title"],
193
- # "relevant_text": doc["relevant_text"],
194
- # "page_number": doc["page_number"],
195
- # "score": doc["score"],
196
- # }
197
- # for doc in context_data
198
- # ],
199
- # "total_results": total_results # Added total_results field
200
- # }
201
-
202
- # return results, formatted_titles # Return results and formatted document titles
203
- # except Exception as e:
204
- # return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
205
-
206
-
207
  def complete_workflow(query):
208
  try:
209
- # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
210
- context_data, combined_context = hybrid_search_documents(query)
211
-
212
- # 🔹 Step 2: Generate LLM-based Natural Language Output
213
- llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
214
- prompt_template = """
215
- Use the following context to answer the question as accurately as possible:
216
-
217
- Context: {context}
218
- Question: {question}
219
 
220
- Answer:
221
- """
222
- prompt = prompt_template.format(context=combined_context, question=query)
223
- response = llm([HumanMessage(content=prompt)])
224
 
225
- # 🔹 Step 3: Format Results
226
- document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
227
- formatted_titles = "\n".join(document_titles)
228
 
229
  results = {
230
  "results": [
231
  {
232
- "natural_language_output": response.content,
233
  "chunk_id": doc["chunk_id"],
234
- "document_id": doc["doc_id"],
235
  "title": doc["title"],
236
  "relevant_text": doc["relevant_text"],
237
  "page_number": doc["page_number"],
238
  "score": doc["score"],
239
- "method": doc["method"], # "vector" or "bm25"
240
  }
241
  for doc in context_data
242
  ],
243
- "total_results": len(context_data), # Return total number of retrieved results
244
  }
245
 
246
- return results, formatted_titles # Return both results and formatted document titles
247
  except Exception as e:
248
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def gradio_app():
251
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
252
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
 
174
  except Exception as e:
175
  return f"Error generating output: {str(e)}"
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def complete_workflow(query):
178
  try:
179
+ context_data, combined_context = search_documents(query)
 
 
 
 
 
 
 
 
 
180
 
181
+ document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
182
+ formatted_titles = " " + "\n".join(document_titles)
 
 
183
 
184
+ total_results = len(context_data) # Count the total number of results
 
 
185
 
186
  results = {
187
  "results": [
188
  {
189
+ "natural_language_output": generate_output(doc["relevant_text"], query),
190
  "chunk_id": doc["chunk_id"],
191
+ "document_id": doc["doc_id"], # Assuming doc_id is the UUID
192
  "title": doc["title"],
193
  "relevant_text": doc["relevant_text"],
194
  "page_number": doc["page_number"],
195
  "score": doc["score"],
 
196
  }
197
  for doc in context_data
198
  ],
199
+ "total_results": total_results # Added total_results field
200
  }
201
 
202
+ return results, formatted_titles # Return results and formatted document titles
203
  except Exception as e:
204
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
205
 
206
+
207
+ # def complete_workflow(query):
208
+ # try:
209
+ # # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
210
+ # context_data, combined_context = hybrid_search_documents(query)
211
+
212
+ # # 🔹 Step 2: Generate LLM-based Natural Language Output
213
+ # llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
214
+ # prompt_template = """
215
+ # Use the following context to answer the question as accurately as possible:
216
+
217
+ # Context: {context}
218
+ # Question: {question}
219
+
220
+ # Answer:
221
+ # """
222
+ # prompt = prompt_template.format(context=combined_context, question=query)
223
+ # response = llm([HumanMessage(content=prompt)])
224
+
225
+ # # 🔹 Step 3: Format Results
226
+ # document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
227
+ # formatted_titles = "\n".join(document_titles)
228
+
229
+ # results = {
230
+ # "results": [
231
+ # {
232
+ # "natural_language_output": response.content,
233
+ # "chunk_id": doc["chunk_id"],
234
+ # "document_id": doc["doc_id"],
235
+ # "title": doc["title"],
236
+ # "relevant_text": doc["relevant_text"],
237
+ # "page_number": doc["page_number"],
238
+ # "score": doc["score"],
239
+ # "method": doc["method"], # "vector" or "bm25"
240
+ # }
241
+ # for doc in context_data
242
+ # ],
243
+ # "total_results": len(context_data), # Return total number of retrieved results
244
+ # }
245
+
246
+ # return results, formatted_titles # Return both results and formatted document titles
247
+ # except Exception as e:
248
+ # return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
249
+
250
  def gradio_app():
251
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
252
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")