radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Sep 13, 2024

Commit

7357061

verified ·

1 Parent(s): 9ade06a

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -15

app.py CHANGED Viewed

@@ -437,7 +437,7 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
-import re
 # def clean_response(response_text):
 #     # Remove system and user tags
@@ -454,37 +454,57 @@ import re
 #     return cleaned_response
-import re
-def clean_response(response_text):
     """
-    This function removes metadata and unnecessary symbols from the document response
-    and formats the output in a readable way.
     """
     # Remove metadata section from the response
     response_text = re.sub(r'Document\(metadata=.*?,page_content="', '', response_text)
-    # Replace encoded characters
     response_text = response_text.replace('\\u2019', "'")  # replace unicode apostrophe
     response_text = response_text.replace('\\u00e8', 'è')  # replace accented characters
     response_text = response_text.replace('\\u00e0', 'à')
     response_text = response_text.replace('\\n', '\n')  # newline characters
     response_text = response_text.replace('\\\\', '\\')  # backslashes
-    # Remove any trailing document information
-    response_text = re.sub(r'\\.*$', '', response_text)
-    # Ensure proper spacing for better readability
-    response_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', response_text)  # Add spaces between words joined together
-    # Properly format new lines and spacing
     response_text = response_text.strip()  # Remove leading/trailing whitespace
     response_text = re.sub(r' +', ' ', response_text)  # Replace multiple spaces with a single space
     response_text = re.sub(r'\n+', '\n', response_text)  # Replace multiple newlines with a single newline
-    return response_text
 # Define a new template specifically for GPT-4o-mini in VDB Details mode

+# import re
 # def clean_response(response_text):
 #     # Remove system and user tags
 #     return cleaned_response
+def extract_metadata(response_text):
+    """
+    Extract document metadata like document name and page number from the response.
+    """
+    # Extract document name (source) and page number
+    doc_name_match = re.search(r"'source':\s?'([^']*)'", response_text)
+    page_number_match = re.search(r"'page':\s?(\d+)", response_text)
+    # Get the document name and page number from the matches
+    document_name = doc_name_match.group(1) if doc_name_match else "Unknown Document"
+    page_number = page_number_match.group(1) if page_number_match else "Unknown Page"
+    return document_name, page_number
+def clean_and_format_response(response_text):
     """
+    Clean the response and format it into a structured format:
+    - Document Name
+    - Document Page No
+    - Response Content
     """
+    # Extract metadata (document name and page number)
+    document_name, page_number = extract_metadata(response_text)
     # Remove metadata section from the response
     response_text = re.sub(r'Document\(metadata=.*?,page_content="', '', response_text)
+    response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
+    response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
+    # Replace encoded characters and clean the content
     response_text = response_text.replace('\\u2019', "'")  # replace unicode apostrophe
     response_text = response_text.replace('\\u00e8', 'è')  # replace accented characters
     response_text = response_text.replace('\\u00e0', 'à')
     response_text = response_text.replace('\\n', '\n')  # newline characters
     response_text = response_text.replace('\\\\', '\\')  # backslashes
+    # Remove any trailing document information and unwanted characters
+    response_text = re.sub(r'\\.*$', '', response_text)
+    # Clean up spaces and new lines
     response_text = response_text.strip()  # Remove leading/trailing whitespace
     response_text = re.sub(r' +', ' ', response_text)  # Replace multiple spaces with a single space
     response_text = re.sub(r'\n+', '\n', response_text)  # Replace multiple newlines with a single newline
+    # Return the formatted output
+    return f"Document Name: {document_name}\nDocument Page No: {page_number}\nResponse:\n{response_text}"
 # Define a new template specifically for GPT-4o-mini in VDB Details mode