radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Sep 13, 2024

Commit

893b484

verified ·

1 Parent(s): 7357061

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -58

app.py CHANGED Viewed

@@ -437,70 +437,26 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
-# import re
-# def clean_response(response_text):
-#     # Remove system and user tags
-#     response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
-#     response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
-#     response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
-#     # Clean up the text by removing extra whitespace
-#     cleaned_response = response_text.strip()
-#     cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
-#     # Ensure the response is conversational and organized
-#     cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
-#     return cleaned_response
-def extract_metadata(response_text):
-    """
-    Extract document metadata like document name and page number from the response.
-    """
-    # Extract document name (source) and page number
-    doc_name_match = re.search(r"'source':\s?'([^']*)'", response_text)
-    page_number_match = re.search(r"'page':\s?(\d+)", response_text)
-    # Get the document name and page number from the matches
-    document_name = doc_name_match.group(1) if doc_name_match else "Unknown Document"
-    page_number = page_number_match.group(1) if page_number_match else "Unknown Page"
-    return document_name, page_number
-def clean_and_format_response(response_text):
-    """
-    Clean the response and format it into a structured format:
-    - Document Name
-    - Document Page No
-    - Response Content
-    """
-    # Extract metadata (document name and page number)
-    document_name, page_number = extract_metadata(response_text)
-    # Remove metadata section from the response
-    response_text = re.sub(r'Document\(metadata=.*?,page_content="', '', response_text)
-    response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
-    response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
-    response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
-    # Replace encoded characters and clean the content
-    response_text = response_text.replace('\\u2019', "'")  # replace unicode apostrophe
-    response_text = response_text.replace('\\u00e8', 'è')  # replace accented characters
-    response_text = response_text.replace('\\u00e0', 'à')
-    response_text = response_text.replace('\\n', '\n')  # newline characters
-    response_text = response_text.replace('\\\\', '\\')  # backslashes
-    # Remove any trailing document information and unwanted characters
-    response_text = re.sub(r'\\.*$', '', response_text)
-    # Clean up spaces and new lines
-    response_text = response_text.strip()  # Remove leading/trailing whitespace
-    response_text = re.sub(r' +', ' ', response_text)  # Replace multiple spaces with a single space
-    response_text = re.sub(r'\n+', '\n', response_text)  # Replace multiple newlines with a single newline
-    # Return the formatted output
-    return f"Document Name: {document_name}\nDocument Page No: {page_number}\nResponse:\n{response_text}"

+import re
+def clean_response(response_text):
+    # Remove system and user tags
+    response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
+    # Clean up the text by removing extra whitespace
+    cleaned_response = response_text.strip()
+    cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
+    # Ensure the response is conversational and organized
+    cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
+    return cleaned_response