Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -437,70 +437,26 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
|
|
437 |
|
438 |
|
439 |
|
440 |
-
|
441 |
|
442 |
-
|
443 |
-
#
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
|
448 |
-
#
|
449 |
-
|
450 |
-
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
-
# # Ensure the response is conversational and organized
|
453 |
-
# cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
|
454 |
|
455 |
-
# return cleaned_response
|
456 |
|
457 |
-
def extract_metadata(response_text):
|
458 |
-
"""
|
459 |
-
Extract document metadata like document name and page number from the response.
|
460 |
-
"""
|
461 |
-
# Extract document name (source) and page number
|
462 |
-
doc_name_match = re.search(r"'source':\s?'([^']*)'", response_text)
|
463 |
-
page_number_match = re.search(r"'page':\s?(\d+)", response_text)
|
464 |
-
|
465 |
-
# Get the document name and page number from the matches
|
466 |
-
document_name = doc_name_match.group(1) if doc_name_match else "Unknown Document"
|
467 |
-
page_number = page_number_match.group(1) if page_number_match else "Unknown Page"
|
468 |
-
|
469 |
-
return document_name, page_number
|
470 |
|
471 |
-
def clean_and_format_response(response_text):
|
472 |
-
"""
|
473 |
-
Clean the response and format it into a structured format:
|
474 |
-
- Document Name
|
475 |
-
- Document Page No
|
476 |
-
- Response Content
|
477 |
-
"""
|
478 |
-
# Extract metadata (document name and page number)
|
479 |
-
document_name, page_number = extract_metadata(response_text)
|
480 |
-
|
481 |
-
# Remove metadata section from the response
|
482 |
-
response_text = re.sub(r'Document\(metadata=.*?,page_content="', '', response_text)
|
483 |
-
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
484 |
-
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
485 |
-
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
486 |
-
|
487 |
-
# Replace encoded characters and clean the content
|
488 |
-
response_text = response_text.replace('\\u2019', "'") # replace unicode apostrophe
|
489 |
-
response_text = response_text.replace('\\u00e8', 'è') # replace accented characters
|
490 |
-
response_text = response_text.replace('\\u00e0', 'à')
|
491 |
-
response_text = response_text.replace('\\n', '\n') # newline characters
|
492 |
-
response_text = response_text.replace('\\\\', '\\') # backslashes
|
493 |
-
|
494 |
-
# Remove any trailing document information and unwanted characters
|
495 |
-
response_text = re.sub(r'\\.*$', '', response_text)
|
496 |
-
|
497 |
-
# Clean up spaces and new lines
|
498 |
-
response_text = response_text.strip() # Remove leading/trailing whitespace
|
499 |
-
response_text = re.sub(r' +', ' ', response_text) # Replace multiple spaces with a single space
|
500 |
-
response_text = re.sub(r'\n+', '\n', response_text) # Replace multiple newlines with a single newline
|
501 |
-
|
502 |
-
# Return the formatted output
|
503 |
-
return f"Document Name: {document_name}\nDocument Page No: {page_number}\nResponse:\n{response_text}"
|
504 |
|
505 |
|
506 |
|
|
|
437 |
|
438 |
|
439 |
|
440 |
+
import re
|
441 |
|
442 |
+
def clean_response(response_text):
|
443 |
+
# Remove system and user tags
|
444 |
+
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
445 |
+
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
446 |
+
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
447 |
|
448 |
+
# Clean up the text by removing extra whitespace
|
449 |
+
cleaned_response = response_text.strip()
|
450 |
+
cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
|
451 |
+
|
452 |
+
# Ensure the response is conversational and organized
|
453 |
+
cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
|
454 |
+
|
455 |
+
return cleaned_response
|
456 |
|
|
|
|
|
457 |
|
|
|
458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
|
461 |
|
462 |
|