Pijush2023 commited on
Commit
5a6a6ba
·
verified ·
1 Parent(s): 2978881

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -47
app.py CHANGED
@@ -470,47 +470,6 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
470
 
471
  import re
472
 
473
- def clean_response(response_text):
474
- # Remove system and user tags
475
- response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
476
- response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
477
- response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
478
-
479
- # Extract the document name and page number
480
- document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
481
- if document_match:
482
- document_name = document_match.group(1).split('/')[-1] # Get the document name
483
- page_number = document_match.group(2) # Get the page number
484
- else:
485
- document_name = "Unknown"
486
- page_number = "Unknown"
487
-
488
- # Remove the 'Document(metadata=...' part and keep only the page content
489
- response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
490
-
491
- # Remove any unwanted escape characters like \u and \u00
492
- response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
493
-
494
- # Ensure proper spacing between words and dates
495
- response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
496
- response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
497
-
498
- # Remove the phrase "Sure! The Responses are as follows:" from the actual content
499
- response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
500
-
501
- # Clean up the text by removing extra whitespace
502
- cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
503
-
504
- # Format the final response with bullet points
505
- final_response = f"""
506
- Sure! The Responses are as follows:
507
- • Document name - {document_name}
508
- • Page No - {page_number}
509
- • Response - {cleaned_response}
510
- """
511
-
512
- return final_response
513
-
514
  # def clean_response(response_text):
515
  # # Remove system and user tags
516
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -519,7 +478,6 @@ Sure! The Responses are as follows:
519
 
520
  # # Extract the document name and page number
521
  # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
522
- # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
523
  # if document_match:
524
  # document_name = document_match.group(1).split('/')[-1] # Get the document name
525
  # page_number = document_match.group(2) # Get the page number
@@ -527,11 +485,8 @@ Sure! The Responses are as follows:
527
  # document_name = "Unknown"
528
  # page_number = "Unknown"
529
 
530
- # # Remove the entire 'Document(metadata=...' and any mention of it from the response
531
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
532
-
533
- # # Remove any mention of "Document:" in the response
534
- # response_text = re.sub(r'- Document:.*', '', response_text)
535
 
536
  # # Remove any unwanted escape characters like \u and \u00
537
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
@@ -548,7 +503,7 @@ Sure! The Responses are as follows:
548
 
549
  # # Format the final response with bullet points
550
  # final_response = f"""
551
- # Sure! Here is the response for your Query:
552
  # • Document name - {document_name}
553
  # • Page No - {page_number}
554
  # • Response - {cleaned_response}
@@ -556,6 +511,51 @@ Sure! The Responses are as follows:
556
 
557
  # return final_response
558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
 
561
 
 
470
 
471
  import re
472
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  # def clean_response(response_text):
474
  # # Remove system and user tags
475
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
478
 
479
  # # Extract the document name and page number
480
  # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
 
481
  # if document_match:
482
  # document_name = document_match.group(1).split('/')[-1] # Get the document name
483
  # page_number = document_match.group(2) # Get the page number
 
485
  # document_name = "Unknown"
486
  # page_number = "Unknown"
487
 
488
+ # # Remove the 'Document(metadata=...' part and keep only the page content
489
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
 
 
 
490
 
491
  # # Remove any unwanted escape characters like \u and \u00
492
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
 
503
 
504
  # # Format the final response with bullet points
505
  # final_response = f"""
506
+ # Sure! The Responses are as follows:
507
  # • Document name - {document_name}
508
  # • Page No - {page_number}
509
  # • Response - {cleaned_response}
 
511
 
512
  # return final_response
513
 
514
+ def clean_response(response_text):
515
+ # Remove system and user tags
516
+ response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
517
+ response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
518
+ response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
519
+
520
+ # Extract the document name and page number
521
+ document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
522
+ document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
523
+ if document_match:
524
+ document_name = document_match.group(1).split('/')[-1] # Get the document name
525
+ page_number = document_match.group(2) # Get the page number
526
+ else:
527
+ document_name = "Unknown"
528
+ page_number = "Unknown"
529
+
530
+ # Remove the entire 'Document(metadata=...' and any mention of it from the response
531
+ response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
532
+
533
+ # Remove any mention of "Document:" in the response
534
+ response_text = re.sub(r'- Document:.*', '', response_text)
535
+
536
+ # Remove any unwanted escape characters like \u and \u00
537
+ response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
538
+
539
+ # Ensure proper spacing between words and dates
540
+ response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
541
+ response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
542
+
543
+ # Remove the phrase "Sure! The Responses are as follows:" from the actual content
544
+ response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
545
+
546
+ # Clean up the text by removing extra whitespace
547
+ cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
548
+
549
+ # Format the final response with bullet points
550
+ final_response = f"""
551
+ Sure! Here is the response for your Query:
552
+ • Document name - {document_name}
553
+ • Page No - {page_number}
554
+ • Response - {cleaned_response}
555
+ """
556
+
557
+ return final_response
558
+
559
 
560
 
561