Pijush2023 commited on
Commit
c2aca9d
·
verified ·
1 Parent(s): 96dfeb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -2
app.py CHANGED
@@ -484,13 +484,48 @@ import re
484
  # return cleaned_response
485
 
486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  def clean_response(response_text):
488
  # Remove system and user tags
489
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
490
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
492
 
493
- # Remove 'Document(metadata=...' part
 
 
 
 
 
 
 
 
 
494
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
495
 
496
  # Remove any unwanted escape characters like \u and \u00
@@ -507,7 +542,16 @@ def clean_response(response_text):
507
  # Ensure the response is conversational and organized
508
  cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
509
 
510
- return cleaned_response
 
 
 
 
 
 
 
 
 
511
 
512
 
513
 
 
484
  # return cleaned_response
485
 
486
 
487
+ # def clean_response(response_text):
488
+ # # Remove system and user tags
489
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
490
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
492
+
493
+ # # Remove 'Document(metadata=...' part
494
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
495
+
496
+ # # Remove any unwanted escape characters like \u and \u00
497
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
498
+
499
+ # # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
500
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
501
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
502
+
503
+ # # Clean up the text by removing extra whitespace
504
+ # cleaned_response = response_text.strip()
505
+ # cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
506
+
507
+ # # Ensure the response is conversational and organized
508
+ # cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
509
+
510
+ # return cleaned_response
511
+
512
+
513
  def clean_response(response_text):
514
  # Remove system and user tags
515
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
516
  response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
517
  response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
518
 
519
+ # Extract the document name and page number
520
+ document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
521
+ if document_match:
522
+ document_name = document_match.group(1).split('/')[-1] # Get the document name
523
+ page_number = document_match.group(2) # Get the page number
524
+ else:
525
+ document_name = "Unknown"
526
+ page_number = "Unknown"
527
+
528
+ # Remove the 'Document(metadata=...' part and keep only the page content
529
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
530
 
531
  # Remove any unwanted escape characters like \u and \u00
 
542
  # Ensure the response is conversational and organized
543
  cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
544
 
545
+ # Return the cleaned response with the document name and page number
546
+ final_response = f"""
547
+ Sure! The Responses are as follows:
548
+ Document name - {document_name}
549
+ Page No - {page_number}
550
+ Response - {cleaned_response}
551
+ """
552
+
553
+ return final_response
554
+
555
 
556
 
557