Spaces:

briefme-io
/

RAG-proto-v0.1.2

Sleeping

App Files Files Community

karthikvarunn commited on Jan 30

Commit

e6ae15d

verified ·

1 Parent(s): 8503da7

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -52

app.py CHANGED Viewed

@@ -174,79 +174,79 @@ def generate_output(context, query):
     except Exception as e:
         return f"Error generating output: {str(e)}"
-# def complete_workflow(query):
-#     try:
-#         context_data, combined_context = search_documents(query)
-#         document_titles = list({os.path.basename(doc["title"]) for doc in context_data})  # Get only file names
-#         formatted_titles = " " + "\n".join(document_titles)
-#         total_results = len(context_data)  # Count the total number of results
-#         results = {
-#             "results": [
-#                 {
-#                     "natural_language_output": generate_output(doc["relevant_text"], query),
-#                     "chunk_id": doc["chunk_id"],
-#                     "document_id": doc["doc_id"],  # Assuming doc_id is the UUID
-#                     "title": doc["title"],
-#                     "relevant_text": doc["relevant_text"],
-#                     "page_number": doc["page_number"],
-#                     "score": doc["score"],
-#                 }
-#                 for doc in context_data
-#             ],
-#             "total_results": total_results  # Added total_results field
-#         }
-#         return results, formatted_titles  # Return results and formatted document titles
-#     except Exception as e:
-#         return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
 def complete_workflow(query):
     try:
-        # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
-        context_data, combined_context = hybrid_search_documents(query)
-        # 🔹 Step 2: Generate LLM-based Natural Language Output
-        llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
-        prompt_template = """
-        Use the following context to answer the question as accurately as possible:
-        Context: {context}
-        Question: {question}
-        Answer:
-        """
-        prompt = prompt_template.format(context=combined_context, question=query)
-        response = llm([HumanMessage(content=prompt)])
-        # 🔹 Step 3: Format Results
-        document_titles = list({os.path.basename(doc["title"]) for doc in context_data})  # Extract unique file names
-        formatted_titles = "\n".join(document_titles)
         results = {
             "results": [
                 {
-                    "natural_language_output": response.content,
                     "chunk_id": doc["chunk_id"],
-                    "document_id": doc["doc_id"],
                     "title": doc["title"],
                     "relevant_text": doc["relevant_text"],
                     "page_number": doc["page_number"],
                     "score": doc["score"],
-                    "method": doc["method"],  # "vector" or "bm25"
                 }
                 for doc in context_data
             ],
-            "total_results": len(context_data),  # Return total number of retrieved results
         }
-        return results, formatted_titles  # Return both results and formatted document titles
     except Exception as e:
         return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
 def gradio_app():
     with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
         gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")

     except Exception as e:
         return f"Error generating output: {str(e)}"
 def complete_workflow(query):
     try:
+        context_data, combined_context = search_documents(query)
+        document_titles = list({os.path.basename(doc["title"]) for doc in context_data})  # Get only file names
+        formatted_titles = " " + "\n".join(document_titles)
+        total_results = len(context_data)  # Count the total number of results
         results = {
             "results": [
                 {
+                    "natural_language_output": generate_output(doc["relevant_text"], query),
                     "chunk_id": doc["chunk_id"],
+                    "document_id": doc["doc_id"],  # Assuming doc_id is the UUID
                     "title": doc["title"],
                     "relevant_text": doc["relevant_text"],
                     "page_number": doc["page_number"],
                     "score": doc["score"],
                 }
                 for doc in context_data
             ],
+            "total_results": total_results  # Added total_results field
         }
+        return results, formatted_titles  # Return results and formatted document titles
     except Exception as e:
         return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
+# def complete_workflow(query):
+#     try:
+#         # 🔹 Step 1: Perform Hybrid Search (Vector + BM25)
+#         context_data, combined_context = hybrid_search_documents(query)
+#         # 🔹 Step 2: Generate LLM-based Natural Language Output
+#         llm = ChatOpenAI(model="gpt-4", openai_api_key=openai.api_key, temperature=0.7)
+#         prompt_template = """
+#         Use the following context to answer the question as accurately as possible:
+#         Context: {context}
+#         Question: {question}
+#         Answer:
+#         """
+#         prompt = prompt_template.format(context=combined_context, question=query)
+#         response = llm([HumanMessage(content=prompt)])
+#         # 🔹 Step 3: Format Results
+#         document_titles = list({os.path.basename(doc["title"]) for doc in context_data})  # Extract unique file names
+#         formatted_titles = "\n".join(document_titles)
+#         results = {
+#             "results": [
+#                 {
+#                     "natural_language_output": response.content,
+#                     "chunk_id": doc["chunk_id"],
+#                     "document_id": doc["doc_id"],
+#                     "title": doc["title"],
+#                     "relevant_text": doc["relevant_text"],
+#                     "page_number": doc["page_number"],
+#                     "score": doc["score"],
+#                     "method": doc["method"],  # "vector" or "bm25"
+#                 }
+#                 for doc in context_data
+#             ],
+#             "total_results": len(context_data),  # Return total number of retrieved results
+#         }
+#         return results, formatted_titles  # Return both results and formatted document titles
+#     except Exception as e:
+#         return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
 def gradio_app():
     with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
         gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")