Pijush2023 commited on
Commit
9628571
·
verified ·
1 Parent(s): d6acf59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -11
app.py CHANGED
@@ -527,12 +527,55 @@ import re
527
  # return final_response
528
 
529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  def clean_response(response_text):
531
  # Remove system and user tags
532
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
533
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
534
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
535
-
536
  # Extract the document name and page number
537
  document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
538
  if document_match:
@@ -541,27 +584,25 @@ def clean_response(response_text):
541
  else:
542
  document_name = "Unknown"
543
  page_number = "Unknown"
544
-
545
  # Remove the 'Document(metadata=...' part and keep only the page content
546
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
547
-
548
  # Remove any unwanted escape characters like \u and \u00
549
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
550
 
551
- # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
552
  response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
553
  response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
554
 
555
- # Clean up the text by removing extra whitespace
556
- cleaned_response = response_text.strip()
557
- cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
558
 
559
- # Ensure the response is conversational and organized by removing any prefixes like "Document:"
560
- cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
561
 
562
- # Return the cleaned response with bullet points
563
  final_response = f"""
564
- Sure! The Responses are as follows:
565
  • Document name - {document_name}
566
  • Page No - {page_number}
567
  • Response - {cleaned_response}
 
527
  # return final_response
528
 
529
 
530
+ # def clean_response(response_text):
531
+ # # Remove system and user tags
532
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
533
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
534
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
535
+
536
+ # # Extract the document name and page number
537
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
538
+ # if document_match:
539
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
540
+ # page_number = document_match.group(2) # Get the page number
541
+ # else:
542
+ # document_name = "Unknown"
543
+ # page_number = "Unknown"
544
+
545
+ # # Remove the 'Document(metadata=...' part and keep only the page content
546
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
547
+
548
+ # # Remove any unwanted escape characters like \u and \u00
549
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
550
+
551
+ # # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
552
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
553
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
554
+
555
+ # # Clean up the text by removing extra whitespace
556
+ # cleaned_response = response_text.strip()
557
+ # cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
558
+
559
+ # # Ensure the response is conversational and organized by removing any prefixes like "Document:"
560
+ # cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
561
+
562
+ # # Return the cleaned response with bullet points
563
+ # final_response = f"""
564
+ # Sure! The Responses are as follows:
565
+ # • Document name - {document_name}
566
+ # • Page No - {page_number}
567
+ # • Response - {cleaned_response}
568
+ # """
569
+
570
+ # return final_response
571
+
572
+
573
  def clean_response(response_text):
574
  # Remove system and user tags
575
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
576
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
577
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
578
+
579
  # Extract the document name and page number
580
  document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
581
  if document_match:
 
584
  else:
585
  document_name = "Unknown"
586
  page_number = "Unknown"
587
+
588
  # Remove the 'Document(metadata=...' part and keep only the page content
589
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
590
+
591
  # Remove any unwanted escape characters like \u and \u00
592
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
593
 
594
+ # Ensure proper spacing between words and dates
595
  response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
596
  response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
597
 
598
+ # Remove the phrase "Sure! The Responses are as follows:" from the actual content
599
+ response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
 
600
 
601
+ # Clean up the text by removing extra whitespace
602
+ cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
603
 
604
+ # Format the final response with bullet points
605
  final_response = f"""
 
606
  • Document name - {document_name}
607
  • Page No - {page_number}
608
  • Response - {cleaned_response}