Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -201,50 +201,79 @@ def generate_output(context, query):
|
|
201 |
# except Exception as e:
|
202 |
# return {"results": []}, f"Error in workflow: {str(e)}"
|
203 |
|
204 |
-
|
205 |
def complete_workflow(query):
|
206 |
try:
|
207 |
-
|
208 |
-
context_data, combined_context = hybrid_search_documents(query)
|
209 |
-
|
210 |
-
# 🔹 Step 2: Generate LLM-based Natural Language Output
|
211 |
-
llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
|
212 |
-
prompt_template = """
|
213 |
-
Use the following context to answer the question as accurately as possible:
|
214 |
-
|
215 |
-
Context: {context}
|
216 |
-
Question: {question}
|
217 |
|
218 |
-
|
219 |
-
"""
|
220 |
-
prompt = prompt_template.format(context=combined_context, question=query)
|
221 |
-
response = llm([HumanMessage(content=prompt)])
|
222 |
|
223 |
-
#
|
224 |
-
document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
|
225 |
-
formatted_titles = "\n".join(document_titles)
|
226 |
|
227 |
results = {
|
228 |
"results": [
|
229 |
{
|
230 |
-
"natural_language_output":
|
231 |
"chunk_id": doc["chunk_id"],
|
232 |
-
"document_id": doc["doc_id"],
|
233 |
"title": doc["title"],
|
234 |
"relevant_text": doc["relevant_text"],
|
235 |
"page_number": doc["page_number"],
|
236 |
"score": doc["score"],
|
237 |
-
"method": doc["method"], # "vector" or "bm25"
|
238 |
}
|
239 |
for doc in context_data
|
240 |
],
|
241 |
-
"total_results":
|
242 |
}
|
243 |
|
244 |
-
return results, formatted_titles # Return
|
245 |
except Exception as e:
|
246 |
return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
def gradio_app():
|
249 |
with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
|
250 |
gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
|
|
|
201 |
# except Exception as e:
|
202 |
# return {"results": []}, f"Error in workflow: {str(e)}"
|
203 |
|
|
|
204 |
def complete_workflow(query):
|
205 |
try:
|
206 |
+
context_data, combined_context = search_documents(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
+
document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
|
209 |
+
formatted_titles = " " + "\n".join(document_titles)
|
|
|
|
|
210 |
|
211 |
+
total_results = len(context_data) # Count the total number of results
|
|
|
|
|
212 |
|
213 |
results = {
|
214 |
"results": [
|
215 |
{
|
216 |
+
"natural_language_output": generate_output(doc["relevant_text"], query),
|
217 |
"chunk_id": doc["chunk_id"],
|
218 |
+
"document_id": doc["doc_id"], # Assuming doc_id is the UUID
|
219 |
"title": doc["title"],
|
220 |
"relevant_text": doc["relevant_text"],
|
221 |
"page_number": doc["page_number"],
|
222 |
"score": doc["score"],
|
|
|
223 |
}
|
224 |
for doc in context_data
|
225 |
],
|
226 |
+
"total_results": total_results # Added total_results field
|
227 |
}
|
228 |
|
229 |
+
return results, formatted_titles # Return results and formatted document titles
|
230 |
except Exception as e:
|
231 |
return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
232 |
|
233 |
+
|
234 |
+
# def complete_workflow(query):
|
235 |
+
# try:
|
236 |
+
# # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
|
237 |
+
# context_data, combined_context = hybrid_search_documents(query)
|
238 |
+
|
239 |
+
# # 🔹 Step 2: Generate LLM-based Natural Language Output
|
240 |
+
# llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
|
241 |
+
# prompt_template = """
|
242 |
+
# Use the following context to answer the question as accurately as possible:
|
243 |
+
|
244 |
+
# Context: {context}
|
245 |
+
# Question: {question}
|
246 |
+
|
247 |
+
# Answer:
|
248 |
+
# """
|
249 |
+
# prompt = prompt_template.format(context=combined_context, question=query)
|
250 |
+
# response = llm([HumanMessage(content=prompt)])
|
251 |
+
|
252 |
+
# # 🔹 Step 3: Format Results
|
253 |
+
# document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
|
254 |
+
# formatted_titles = "\n".join(document_titles)
|
255 |
+
|
256 |
+
# results = {
|
257 |
+
# "results": [
|
258 |
+
# {
|
259 |
+
# "natural_language_output": response.content,
|
260 |
+
# "chunk_id": doc["chunk_id"],
|
261 |
+
# "document_id": doc["doc_id"],
|
262 |
+
# "title": doc["title"],
|
263 |
+
# "relevant_text": doc["relevant_text"],
|
264 |
+
# "page_number": doc["page_number"],
|
265 |
+
# "score": doc["score"],
|
266 |
+
# "method": doc["method"], # "vector" or "bm25"
|
267 |
+
# }
|
268 |
+
# for doc in context_data
|
269 |
+
# ],
|
270 |
+
# "total_results": len(context_data), # Return total number of retrieved results
|
271 |
+
# }
|
272 |
+
|
273 |
+
# return results, formatted_titles # Return both results and formatted document titles
|
274 |
+
# except Exception as e:
|
275 |
+
# return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
276 |
+
|
277 |
def gradio_app():
|
278 |
with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
|
279 |
gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
|