radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Sep 15, 2024

Commit

8b5d3bd

verified ·

1 Parent(s): 072f1b0

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -10

app.py CHANGED Viewed

@@ -604,6 +604,55 @@ import re
 #     return final_response
 def clean_response(response_text):
     # Remove system and user tags
     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -623,31 +672,35 @@ def clean_response(response_text):
     # Remove the entire 'Document(metadata=...' and any mention of it from the response
     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
-    # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
     top_results = response_text.split('\n')[:5]  # Adjust this as per your actual data structure
     # Remove any unwanted escape characters like \u and \u00
     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
-    # Ensure proper spacing between words and dates
-    response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
-    response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
-    # Remove the phrase "Sure! The Responses are as follows:" from the actual content
-    response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
     # Clean up the text by removing extra whitespace
     cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
     # Format the final response with bullet points
     top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
     final_response = (
         f"Sure! Here is the response for your Query:\n"
         f"• Document name - {document_name}\n"
         f"• Page No - {page_number}\n"
-        f"• Top 5 Fetched Results:\n{top_five_formatted}"
-        f"• Actual Response - {cleaned_response}"
     )
     return final_response

 #     return final_response
+# def clean_response(response_text):
+#     # Remove system and user tags
+#     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+#     response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+#     response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
+#     # Extract the document name and page number
+#     document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
+#     if document_match:
+#         document_name = document_match.group(1).split('/')[-1]  # Get the document name
+#         page_number = document_match.group(2)  # Get the page number
+#     else:
+#         document_name = "Unknown"
+#         page_number = "Unknown"
+#     # Remove the entire 'Document(metadata=...' and any mention of it from the response
+#     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
+#     # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
+#     top_results = response_text.split('\n')[:5]  # Adjust this as per your actual data structure
+#     # Remove any unwanted escape characters like \u and \u00
+#     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
+#     # Ensure proper spacing between words and dates
+#     response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
+#     response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
+#     # Remove the phrase "Sure! The Responses are as follows:" from the actual content
+#     response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
+#     # Clean up the text by removing extra whitespace
+#     cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
+#     # Format the final response with bullet points
+#     top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
+#     final_response = (
+#         f"Sure! Here is the response for your Query:\n"
+#         f"• Document name - {document_name}\n"
+#         f"• Page No - {page_number}\n"
+#         f"• Top 5 Fetched Results:\n{top_five_formatted}"
+#         f"• Actual Response - {cleaned_response}"
+#     )
+#     return final_response
 def clean_response(response_text):
     # Remove system and user tags
     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
     # Remove the entire 'Document(metadata=...' and any mention of it from the response
     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
+    # Extract the actual content from the response
+    content_match = re.search(r'page_content="(.+?)"', response_text)
+    if content_match:
+        actual_content = content_match.group(1)
+    else:
+        actual_content = "No content available."
+    # Limit the actual content to a short, precise snippet
+    actual_content = actual_content[:200] + "..." if len(actual_content) > 200 else actual_content
+    # Extract top 5 fetched results (based on some identifier you have)
     top_results = response_text.split('\n')[:5]  # Adjust this as per your actual data structure
     # Remove any unwanted escape characters like \u and \u00
     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
     # Clean up the text by removing extra whitespace
     cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
     # Format the final response with bullet points
     top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
+    # Final formatted response
     final_response = (
         f"Sure! Here is the response for your Query:\n"
         f"• Document name - {document_name}\n"
         f"• Page No - {page_number}\n"
+        f"• Top 5 Fetched Results:\n{top_five_formatted if top_five_formatted.strip() else 'No results found.'}"
+        f"• Actual Response - {actual_content}"
     )
     return final_response