Pijush2023 commited on
Commit
8b5d3bd
·
verified ·
1 Parent(s): 072f1b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -10
app.py CHANGED
@@ -604,6 +604,55 @@ import re
604
 
605
  # return final_response
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  def clean_response(response_text):
608
  # Remove system and user tags
609
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -623,31 +672,35 @@ def clean_response(response_text):
623
  # Remove the entire 'Document(metadata=...' and any mention of it from the response
624
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
625
 
626
- # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
 
 
 
 
 
 
 
 
 
 
627
  top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
628
 
629
  # Remove any unwanted escape characters like \u and \u00
630
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
631
 
632
- # Ensure proper spacing between words and dates
633
- response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
634
- response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
635
-
636
- # Remove the phrase "Sure! The Responses are as follows:" from the actual content
637
- response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
638
-
639
  # Clean up the text by removing extra whitespace
640
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
641
 
642
  # Format the final response with bullet points
643
  top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
644
 
 
645
  final_response = (
646
  f"Sure! Here is the response for your Query:\n"
647
  f"• Document name - {document_name}\n"
648
  f"• Page No - {page_number}\n"
649
- f"• Top 5 Fetched Results:\n{top_five_formatted}"
650
- f"• Actual Response - {cleaned_response}"
651
  )
652
 
653
  return final_response
 
604
 
605
  # return final_response
606
 
607
+ # def clean_response(response_text):
608
+ # # Remove system and user tags
609
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
610
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
611
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
612
+
613
+ # # Extract the document name and page number
614
+ # document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
615
+
616
+ # if document_match:
617
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
618
+ # page_number = document_match.group(2) # Get the page number
619
+ # else:
620
+ # document_name = "Unknown"
621
+ # page_number = "Unknown"
622
+
623
+ # # Remove the entire 'Document(metadata=...' and any mention of it from the response
624
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
625
+
626
+ # # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
627
+ # top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
628
+
629
+ # # Remove any unwanted escape characters like \u and \u00
630
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
631
+
632
+ # # Ensure proper spacing between words and dates
633
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
634
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
635
+
636
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
637
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
638
+
639
+ # # Clean up the text by removing extra whitespace
640
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
641
+
642
+ # # Format the final response with bullet points
643
+ # top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
644
+
645
+ # final_response = (
646
+ # f"Sure! Here is the response for your Query:\n"
647
+ # f"• Document name - {document_name}\n"
648
+ # f"• Page No - {page_number}\n"
649
+ # f"• Top 5 Fetched Results:\n{top_five_formatted}"
650
+ # f"• Actual Response - {cleaned_response}"
651
+ # )
652
+
653
+ # return final_response
654
+
655
+
656
  def clean_response(response_text):
657
  # Remove system and user tags
658
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
672
  # Remove the entire 'Document(metadata=...' and any mention of it from the response
673
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
674
 
675
+ # Extract the actual content from the response
676
+ content_match = re.search(r'page_content="(.+?)"', response_text)
677
+ if content_match:
678
+ actual_content = content_match.group(1)
679
+ else:
680
+ actual_content = "No content available."
681
+
682
+ # Limit the actual content to a short, precise snippet
683
+ actual_content = actual_content[:200] + "..." if len(actual_content) > 200 else actual_content
684
+
685
+ # Extract top 5 fetched results (based on some identifier you have)
686
  top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
687
 
688
  # Remove any unwanted escape characters like \u and \u00
689
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
690
 
 
 
 
 
 
 
 
691
  # Clean up the text by removing extra whitespace
692
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
693
 
694
  # Format the final response with bullet points
695
  top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
696
 
697
+ # Final formatted response
698
  final_response = (
699
  f"Sure! Here is the response for your Query:\n"
700
  f"• Document name - {document_name}\n"
701
  f"• Page No - {page_number}\n"
702
+ f"• Top 5 Fetched Results:\n{top_five_formatted if top_five_formatted.strip() else 'No results found.'}"
703
+ f"• Actual Response - {actual_content}"
704
  )
705
 
706
  return final_response