Pijush2023 commited on
Commit
e6f0b98
·
verified ·
1 Parent(s): f694fcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -8
app.py CHANGED
@@ -557,6 +557,53 @@ import re
557
  # return final_response
558
 
559
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
  import re
561
 
562
  def clean_response(response_text):
@@ -565,7 +612,7 @@ def clean_response(response_text):
565
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
566
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
567
 
568
- # Extract the document name and page number from updated pattern
569
  document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
570
 
571
  if document_match:
@@ -591,18 +638,24 @@ def clean_response(response_text):
591
  # Remove the phrase "Sure! The Responses are as follows:" from the actual content
592
  response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
593
 
 
 
 
 
594
  # Clean up the text by removing extra whitespace
595
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
596
 
597
- # Format the final response with bullet points
598
  final_response = f"""
599
- Sure! Here is the response for your Query:
600
- Document name - {document_name}
601
- • Page No - {page_number}
602
- Responses - {cleaned_response}
603
- """
604
 
605
- return final_response
 
 
606
 
607
 
608
 
 
557
  # return final_response
558
 
559
 
560
+ import re
561
+
562
+ # def clean_response(response_text):
563
+ # # Remove system and user tags
564
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
565
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
566
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
567
+
568
+ # # Extract the document name and page number from updated pattern
569
+ # document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
570
+
571
+ # if document_match:
572
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
573
+ # page_number = document_match.group(2) # Get the page number
574
+ # else:
575
+ # document_name = "Unknown"
576
+ # page_number = "Unknown"
577
+
578
+ # # Remove the entire 'Document(metadata=...' and any mention of it from the response
579
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
580
+
581
+ # # Remove any mention of "Document:" in the response
582
+ # response_text = re.sub(r'- Document:.*', '', response_text)
583
+
584
+ # # Remove any unwanted escape characters like \u and \u00
585
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
586
+
587
+ # # Ensure proper spacing between words and dates
588
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
589
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
590
+
591
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
592
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
593
+
594
+ # # Clean up the text by removing extra whitespace
595
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
596
+
597
+ # # Format the final response with bullet points
598
+ # final_response = f"""
599
+ # Sure! Here is the response for your Query:
600
+ # • Document name - {document_name}
601
+ # • Page No - {page_number}
602
+ # • Responses - {cleaned_response}
603
+ # """
604
+
605
+ # return final_response
606
+
607
  import re
608
 
609
  def clean_response(response_text):
 
612
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
613
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
614
 
615
+ # Extract the document name and page number
616
  document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
617
 
618
  if document_match:
 
638
  # Remove the phrase "Sure! The Responses are as follows:" from the actual content
639
  response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
640
 
641
+ # Extract and limit to top 5 results
642
+ result_lines = response_text.splitlines()[:5]
643
+ top_5_results = "\n".join(result_lines)
644
+
645
  # Clean up the text by removing extra whitespace
646
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
647
 
648
+ # Format the final response with bullet points in Italian
649
  final_response = f"""
650
+ Nome del documento: {document_name}
651
+ Numero di pagina: {page_number}
652
+
653
+ Risultati principali (Top 5):
654
+ {top_5_results}
655
 
656
+ Risultato effettivo:
657
+ {cleaned_response}
658
+ """
659
 
660
 
661