Pijush2023 commited on
Commit
d6acf59
·
verified ·
1 Parent(s): c2aca9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -8
app.py CHANGED
@@ -490,7 +490,16 @@ import re
490
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
492
 
493
- # # Remove 'Document(metadata=...' part
 
 
 
 
 
 
 
 
 
494
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
495
 
496
  # # Remove any unwanted escape characters like \u and \u00
@@ -507,7 +516,15 @@ import re
507
  # # Ensure the response is conversational and organized
508
  # cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
509
 
510
- # return cleaned_response
 
 
 
 
 
 
 
 
511
 
512
 
513
  def clean_response(response_text):
@@ -539,15 +556,15 @@ def clean_response(response_text):
539
  cleaned_response = response_text.strip()
540
  cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
541
 
542
- # Ensure the response is conversational and organized
543
- cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
544
 
545
- # Return the cleaned response with the document name and page number
546
  final_response = f"""
547
  Sure! The Responses are as follows:
548
- Document name - {document_name}
549
- Page No - {page_number}
550
- Response - {cleaned_response}
551
  """
552
 
553
  return final_response
 
490
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
492
 
493
+ # # Extract the document name and page number
494
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
495
+ # if document_match:
496
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
497
+ # page_number = document_match.group(2) # Get the page number
498
+ # else:
499
+ # document_name = "Unknown"
500
+ # page_number = "Unknown"
501
+
502
+ # # Remove the 'Document(metadata=...' part and keep only the page content
503
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
504
 
505
  # # Remove any unwanted escape characters like \u and \u00
 
516
  # # Ensure the response is conversational and organized
517
  # cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
518
 
519
+ # # Return the cleaned response with the document name and page number
520
+ # final_response = f"""
521
+ # Sure! The Responses are as follows:
522
+ # Document name - {document_name}
523
+ # Page No - {page_number}
524
+ # Response - {cleaned_response}
525
+ # """
526
+
527
+ # return final_response
528
 
529
 
530
  def clean_response(response_text):
 
556
  cleaned_response = response_text.strip()
557
  cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
558
 
559
+ # Ensure the response is conversational and organized by removing any prefixes like "Document:"
560
+ cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
561
 
562
+ # Return the cleaned response with bullet points
563
  final_response = f"""
564
  Sure! The Responses are as follows:
565
+ Document name - {document_name}
566
+ Page No - {page_number}
567
+ Response - {cleaned_response}
568
  """
569
 
570
  return final_response