Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -174,79 +174,79 @@ def generate_output(context, query):
|
|
174 |
except Exception as e:
|
175 |
return f"Error generating output: {str(e)}"
|
176 |
|
177 |
-
# def complete_workflow(query):
|
178 |
-
# try:
|
179 |
-
# context_data, combined_context = search_documents(query)
|
180 |
-
|
181 |
-
# document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
|
182 |
-
# formatted_titles = " " + "\n".join(document_titles)
|
183 |
-
|
184 |
-
# total_results = len(context_data) # Count the total number of results
|
185 |
-
|
186 |
-
# results = {
|
187 |
-
# "results": [
|
188 |
-
# {
|
189 |
-
# "natural_language_output": generate_output(doc["relevant_text"], query),
|
190 |
-
# "chunk_id": doc["chunk_id"],
|
191 |
-
# "document_id": doc["doc_id"], # Assuming doc_id is the UUID
|
192 |
-
# "title": doc["title"],
|
193 |
-
# "relevant_text": doc["relevant_text"],
|
194 |
-
# "page_number": doc["page_number"],
|
195 |
-
# "score": doc["score"],
|
196 |
-
# }
|
197 |
-
# for doc in context_data
|
198 |
-
# ],
|
199 |
-
# "total_results": total_results # Added total_results field
|
200 |
-
# }
|
201 |
-
|
202 |
-
# return results, formatted_titles # Return results and formatted document titles
|
203 |
-
# except Exception as e:
|
204 |
-
# return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
205 |
-
|
206 |
-
|
207 |
def complete_workflow(query):
|
208 |
try:
|
209 |
-
|
210 |
-
context_data, combined_context = hybrid_search_documents(query)
|
211 |
-
|
212 |
-
# 🔹 Step 2: Generate LLM-based Natural Language Output
|
213 |
-
llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
|
214 |
-
prompt_template = """
|
215 |
-
Use the following context to answer the question as accurately as possible:
|
216 |
-
|
217 |
-
Context: {context}
|
218 |
-
Question: {question}
|
219 |
|
220 |
-
|
221 |
-
"""
|
222 |
-
prompt = prompt_template.format(context=combined_context, question=query)
|
223 |
-
response = llm([HumanMessage(content=prompt)])
|
224 |
|
225 |
-
#
|
226 |
-
document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
|
227 |
-
formatted_titles = "\n".join(document_titles)
|
228 |
|
229 |
results = {
|
230 |
"results": [
|
231 |
{
|
232 |
-
"natural_language_output":
|
233 |
"chunk_id": doc["chunk_id"],
|
234 |
-
"document_id": doc["doc_id"],
|
235 |
"title": doc["title"],
|
236 |
"relevant_text": doc["relevant_text"],
|
237 |
"page_number": doc["page_number"],
|
238 |
"score": doc["score"],
|
239 |
-
"method": doc["method"], # "vector" or "bm25"
|
240 |
}
|
241 |
for doc in context_data
|
242 |
],
|
243 |
-
"total_results":
|
244 |
}
|
245 |
|
246 |
-
return results, formatted_titles # Return
|
247 |
except Exception as e:
|
248 |
return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
def gradio_app():
|
251 |
with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
|
252 |
gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
|
|
|
174 |
except Exception as e:
|
175 |
return f"Error generating output: {str(e)}"
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
def complete_workflow(query):
|
178 |
try:
|
179 |
+
context_data, combined_context = search_documents(query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
+
document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
|
182 |
+
formatted_titles = " " + "\n".join(document_titles)
|
|
|
|
|
183 |
|
184 |
+
total_results = len(context_data) # Count the total number of results
|
|
|
|
|
185 |
|
186 |
results = {
|
187 |
"results": [
|
188 |
{
|
189 |
+
"natural_language_output": generate_output(doc["relevant_text"], query),
|
190 |
"chunk_id": doc["chunk_id"],
|
191 |
+
"document_id": doc["doc_id"], # Assuming doc_id is the UUID
|
192 |
"title": doc["title"],
|
193 |
"relevant_text": doc["relevant_text"],
|
194 |
"page_number": doc["page_number"],
|
195 |
"score": doc["score"],
|
|
|
196 |
}
|
197 |
for doc in context_data
|
198 |
],
|
199 |
+
"total_results": total_results # Added total_results field
|
200 |
}
|
201 |
|
202 |
+
return results, formatted_titles # Return results and formatted document titles
|
203 |
except Exception as e:
|
204 |
return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
205 |
|
206 |
+
|
207 |
+
# def complete_workflow(query):
|
208 |
+
# try:
|
209 |
+
# # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
|
210 |
+
# context_data, combined_context = hybrid_search_documents(query)
|
211 |
+
|
212 |
+
# # 🔹 Step 2: Generate LLM-based Natural Language Output
|
213 |
+
# llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
|
214 |
+
# prompt_template = """
|
215 |
+
# Use the following context to answer the question as accurately as possible:
|
216 |
+
|
217 |
+
# Context: {context}
|
218 |
+
# Question: {question}
|
219 |
+
|
220 |
+
# Answer:
|
221 |
+
# """
|
222 |
+
# prompt = prompt_template.format(context=combined_context, question=query)
|
223 |
+
# response = llm([HumanMessage(content=prompt)])
|
224 |
+
|
225 |
+
# # 🔹 Step 3: Format Results
|
226 |
+
# document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Extract unique file names
|
227 |
+
# formatted_titles = "\n".join(document_titles)
|
228 |
+
|
229 |
+
# results = {
|
230 |
+
# "results": [
|
231 |
+
# {
|
232 |
+
# "natural_language_output": response.content,
|
233 |
+
# "chunk_id": doc["chunk_id"],
|
234 |
+
# "document_id": doc["doc_id"],
|
235 |
+
# "title": doc["title"],
|
236 |
+
# "relevant_text": doc["relevant_text"],
|
237 |
+
# "page_number": doc["page_number"],
|
238 |
+
# "score": doc["score"],
|
239 |
+
# "method": doc["method"], # "vector" or "bm25"
|
240 |
+
# }
|
241 |
+
# for doc in context_data
|
242 |
+
# ],
|
243 |
+
# "total_results": len(context_data), # Return total number of retrieved results
|
244 |
+
# }
|
245 |
+
|
246 |
+
# return results, formatted_titles # Return both results and formatted document titles
|
247 |
+
# except Exception as e:
|
248 |
+
# return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
|
249 |
+
|
250 |
def gradio_app():
|
251 |
with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
|
252 |
gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
|