Pijush2023 commited on
Commit
53237f9
·
verified ·
1 Parent(s): 248ba3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -46
app.py CHANGED
@@ -452,47 +452,6 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
452
 
453
  import re
454
 
455
- # def clean_response(response_text):
456
- # # Remove system and user tags
457
- # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
458
- # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
459
- # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
460
-
461
- # # Extract the document name and page number
462
- # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
463
- # if document_match:
464
- # document_name = document_match.group(1).split('/')[-1] # Get the document name
465
- # page_number = document_match.group(2) # Get the page number
466
- # else:
467
- # document_name = "Unknown"
468
- # page_number = "Unknown"
469
-
470
- # # Remove the 'Document(metadata=...' part and keep only the page content
471
- # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
472
-
473
- # # Remove any unwanted escape characters like \u and \u00
474
- # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
475
-
476
- # # Ensure proper spacing between words and dates
477
- # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
478
- # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
479
-
480
- # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
481
- # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
482
-
483
- # # Clean up the text by removing extra whitespace
484
- # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
485
-
486
- # # Format the final response with bullet points
487
- # final_response = f"""
488
- # Sure! The Responses are as follows:
489
- # • Document name - {document_name}
490
- # • Page No - {page_number}
491
- # • Response - {cleaned_response}
492
- # """
493
-
494
- # return final_response
495
-
496
  def clean_response(response_text):
497
  # Remove system and user tags
498
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -508,11 +467,8 @@ def clean_response(response_text):
508
  document_name = "Unknown"
509
  page_number = "Unknown"
510
 
511
- # Remove the entire 'Document(metadata=...' and any mention of it from the response
512
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
513
-
514
- # Remove any mention of "Document:" in the response
515
- response_text = re.sub(r'- Document:.*', '', response_text)
516
 
517
  # Remove any unwanted escape characters like \u and \u00
518
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
@@ -529,7 +485,7 @@ def clean_response(response_text):
529
 
530
  # Format the final response with bullet points
531
  final_response = f"""
532
- Sure! Here is the response for your Query:
533
  • Document name - {document_name}
534
  • Page No - {page_number}
535
  • Response - {cleaned_response}
@@ -537,6 +493,50 @@ Sure! Here is the response for your Query:
537
 
538
  return final_response
539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
 
542
 
 
452
 
453
  import re
454
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  def clean_response(response_text):
456
  # Remove system and user tags
457
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
467
  document_name = "Unknown"
468
  page_number = "Unknown"
469
 
470
+ # Remove the 'Document(metadata=...' part and keep only the page content
471
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
 
 
 
472
 
473
  # Remove any unwanted escape characters like \u and \u00
474
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
 
485
 
486
  # Format the final response with bullet points
487
  final_response = f"""
488
+ Sure! The Responses are as follows:
489
  • Document name - {document_name}
490
  • Page No - {page_number}
491
  • Response - {cleaned_response}
 
493
 
494
  return final_response
495
 
496
+ # def clean_response(response_text):
497
+ # # Remove system and user tags
498
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
499
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
500
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
501
+
502
+ # # Extract the document name and page number
503
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
504
+ # if document_match:
505
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
506
+ # page_number = document_match.group(2) # Get the page number
507
+ # else:
508
+ # document_name = "Unknown"
509
+ # page_number = "Unknown"
510
+
511
+ # # Remove the entire 'Document(metadata=...' and any mention of it from the response
512
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
513
+
514
+ # # Remove any mention of "Document:" in the response
515
+ # response_text = re.sub(r'- Document:.*', '', response_text)
516
+
517
+ # # Remove any unwanted escape characters like \u and \u00
518
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
519
+
520
+ # # Ensure proper spacing between words and dates
521
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
522
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
523
+
524
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
525
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
526
+
527
+ # # Clean up the text by removing extra whitespace
528
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
529
+
530
+ # # Format the final response with bullet points
531
+ # final_response = f"""
532
+ # Sure! Here is the response for your Query:
533
+ # • Document name - {document_name}
534
+ # • Page No - {page_number}
535
+ # • Response - {cleaned_response}
536
+ # """
537
+
538
+ # return final_response
539
+
540
 
541
 
542