radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Sep 13, 2024

Commit

9628571

verified ·

1 Parent(s): d6acf59

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -11

app.py CHANGED Viewed

@@ -527,12 +527,55 @@ import re
 #     return final_response
 def clean_response(response_text):
     # Remove system and user tags
     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
     response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
     response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
     # Extract the document name and page number
     document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
     if document_match:
@@ -541,27 +584,25 @@ def clean_response(response_text):
     else:
         document_name = "Unknown"
         page_number = "Unknown"
     # Remove the 'Document(metadata=...' part and keep only the page content
     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
     # Remove any unwanted escape characters like \u and \u00
     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
-    # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
     response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
     response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
-    # Clean up the text by removing extra whitespace
-    cleaned_response = response_text.strip()
-    cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
-    # Ensure the response is conversational and organized by removing any prefixes like "Document:"
-    cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
-    # Return the cleaned response with bullet points
     final_response = f"""
-Sure! The Responses are as follows:
 • Document name - {document_name}
 • Page No - {page_number}
 • Response - {cleaned_response}

 #     return final_response
+# def clean_response(response_text):
+#     # Remove system and user tags
+#     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+#     response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+#     response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
+#     # Extract the document name and page number
+#     document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
+#     if document_match:
+#         document_name = document_match.group(1).split('/')[-1]  # Get the document name
+#         page_number = document_match.group(2)  # Get the page number
+#     else:
+#         document_name = "Unknown"
+#         page_number = "Unknown"
+#     # Remove the 'Document(metadata=...' part and keep only the page content
+#     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
+#     # Remove any unwanted escape characters like \u and \u00
+#     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
+#     # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
+#     response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
+#     response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
+#     # Clean up the text by removing extra whitespace
+#     cleaned_response = response_text.strip()
+#     cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
+#     # Ensure the response is conversational and organized by removing any prefixes like "Document:"
+#     cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
+#     # Return the cleaned response with bullet points
+#     final_response = f"""
+# Sure! The Responses are as follows:
+# • Document name - {document_name}
+# • Page No - {page_number}
+# • Response - {cleaned_response}
+# """
+#     return final_response
 def clean_response(response_text):
     # Remove system and user tags
     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
     response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
     response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
     # Extract the document name and page number
     document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
     if document_match:
     else:
         document_name = "Unknown"
         page_number = "Unknown"
     # Remove the 'Document(metadata=...' part and keep only the page content
     response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
     # Remove any unwanted escape characters like \u and \u00
     response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
+    # Ensure proper spacing between words and dates
     response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
     response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
+    # Remove the phrase "Sure! The Responses are as follows:" from the actual content
+    response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
+    # Clean up the text by removing extra whitespace
+    cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
+    # Format the final response with bullet points
     final_response = f"""
 • Document name - {document_name}
 • Page No - {page_number}
 • Response - {cleaned_response}