Pijush2023 commited on
Commit
c783a79
·
verified ·
1 Parent(s): 184e03e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -13
app.py CHANGED
@@ -490,24 +490,41 @@ import re
490
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
492
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
493
- # #-------
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
495
- # # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
 
496
  # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
497
  # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
498
- # #--------
499
- # # Clean up the text by removing extra whitespace
500
- # cleaned_response = response_text.strip()
501
- # cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
502
-
503
- # # Ensure the response is conversational and organized
504
- # cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
505
-
506
- # return cleaned_response
507
 
 
 
508
 
 
 
509
 
 
 
 
 
 
 
 
510
 
 
511
 
512
  def clean_response(response_text):
513
  # Remove system and user tags
@@ -524,8 +541,11 @@ def clean_response(response_text):
524
  document_name = "Unknown"
525
  page_number = "Unknown"
526
 
527
- # Remove the 'Document(metadata=...' part and keep only the page content
528
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
 
 
 
529
 
530
  # Remove any unwanted escape characters like \u and \u00
531
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
@@ -542,7 +562,7 @@ def clean_response(response_text):
542
 
543
  # Format the final response with bullet points
544
  final_response = f"""
545
- Sure! The Responses are as follows:
546
  • Document name - {document_name}
547
  • Page No - {page_number}
548
  • Response - {cleaned_response}
 
490
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
492
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
493
+
494
+ # # Extract the document name and page number
495
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
496
+ # if document_match:
497
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
498
+ # page_number = document_match.group(2) # Get the page number
499
+ # else:
500
+ # document_name = "Unknown"
501
+ # page_number = "Unknown"
502
+
503
+ # # Remove the 'Document(metadata=...' part and keep only the page content
504
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
505
+
506
+ # # Remove any unwanted escape characters like \u and \u00
507
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
508
+
509
+ # # Ensure proper spacing between words and dates
510
  # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
511
  # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
 
 
 
 
 
 
 
 
 
512
 
513
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
514
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
515
 
516
+ # # Clean up the text by removing extra whitespace
517
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
518
 
519
+ # # Format the final response with bullet points
520
+ # final_response = f"""
521
+ # Sure! The Responses are as follows:
522
+ # • Document name - {document_name}
523
+ # • Page No - {page_number}
524
+ # • Response - {cleaned_response}
525
+ # """
526
 
527
+ # return final_response
528
 
529
  def clean_response(response_text):
530
  # Remove system and user tags
 
541
  document_name = "Unknown"
542
  page_number = "Unknown"
543
 
544
+ # Remove the entire 'Document(metadata=...' and any mention of it from the response
545
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
546
+
547
+ # Remove any mention of "Document:" in the response
548
+ response_text = re.sub(r'- Document:.*', '', response_text)
549
 
550
  # Remove any unwanted escape characters like \u and \u00
551
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
 
562
 
563
  # Format the final response with bullet points
564
  final_response = f"""
565
+ Sure! Here is the response for your Query:
566
  • Document name - {document_name}
567
  • Page No - {page_number}
568
  • Response - {cleaned_response}