Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -557,6 +557,53 @@ import re
|
|
557 |
# return final_response
|
558 |
|
559 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
import re
|
561 |
|
562 |
def clean_response(response_text):
|
@@ -565,7 +612,7 @@ def clean_response(response_text):
|
|
565 |
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
566 |
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
567 |
|
568 |
-
# Extract the document name and page number
|
569 |
document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
|
570 |
|
571 |
if document_match:
|
@@ -591,18 +638,24 @@ def clean_response(response_text):
|
|
591 |
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
592 |
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
593 |
|
|
|
|
|
|
|
|
|
594 |
# Clean up the text by removing extra whitespace
|
595 |
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
596 |
|
597 |
-
# Format the final response with bullet points
|
598 |
final_response = f"""
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
|
605 |
-
|
|
|
|
|
606 |
|
607 |
|
608 |
|
|
|
557 |
# return final_response
|
558 |
|
559 |
|
560 |
+
import re
|
561 |
+
|
562 |
+
# def clean_response(response_text):
|
563 |
+
# # Remove system and user tags
|
564 |
+
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
565 |
+
# response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
566 |
+
# response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
567 |
+
|
568 |
+
# # Extract the document name and page number from updated pattern
|
569 |
+
# document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
|
570 |
+
|
571 |
+
# if document_match:
|
572 |
+
# document_name = document_match.group(1).split('/')[-1] # Get the document name
|
573 |
+
# page_number = document_match.group(2) # Get the page number
|
574 |
+
# else:
|
575 |
+
# document_name = "Unknown"
|
576 |
+
# page_number = "Unknown"
|
577 |
+
|
578 |
+
# # Remove the entire 'Document(metadata=...' and any mention of it from the response
|
579 |
+
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
580 |
+
|
581 |
+
# # Remove any mention of "Document:" in the response
|
582 |
+
# response_text = re.sub(r'- Document:.*', '', response_text)
|
583 |
+
|
584 |
+
# # Remove any unwanted escape characters like \u and \u00
|
585 |
+
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
586 |
+
|
587 |
+
# # Ensure proper spacing between words and dates
|
588 |
+
# response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
589 |
+
# response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
590 |
+
|
591 |
+
# # Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
592 |
+
# response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
593 |
+
|
594 |
+
# # Clean up the text by removing extra whitespace
|
595 |
+
# cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
596 |
+
|
597 |
+
# # Format the final response with bullet points
|
598 |
+
# final_response = f"""
|
599 |
+
# Sure! Here is the response for your Query:
|
600 |
+
# • Document name - {document_name}
|
601 |
+
# • Page No - {page_number}
|
602 |
+
# • Responses - {cleaned_response}
|
603 |
+
# """
|
604 |
+
|
605 |
+
# return final_response
|
606 |
+
|
607 |
import re
|
608 |
|
609 |
def clean_response(response_text):
|
|
|
612 |
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
613 |
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
614 |
|
615 |
+
# Extract the document name and page number
|
616 |
document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
|
617 |
|
618 |
if document_match:
|
|
|
638 |
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
639 |
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
640 |
|
641 |
+
# Extract and limit to top 5 results
|
642 |
+
result_lines = response_text.splitlines()[:5]
|
643 |
+
top_5_results = "\n".join(result_lines)
|
644 |
+
|
645 |
# Clean up the text by removing extra whitespace
|
646 |
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
647 |
|
648 |
+
# Format the final response with bullet points in Italian
|
649 |
final_response = f"""
|
650 |
+
Nome del documento: {document_name}
|
651 |
+
Numero di pagina: {page_number}
|
652 |
+
|
653 |
+
Risultati principali (Top 5):
|
654 |
+
{top_5_results}
|
655 |
|
656 |
+
Risultato effettivo:
|
657 |
+
{cleaned_response}
|
658 |
+
"""
|
659 |
|
660 |
|
661 |
|