Pijush2023 commited on
Commit
a799bca
·
verified ·
1 Parent(s): 0df7f00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -55
app.py CHANGED
@@ -484,47 +484,7 @@ import re
484
  # return cleaned_response
485
 
486
 
487
- # def clean_response(response_text):
488
- # # Remove system and user tags
489
- # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
490
- # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
491
- # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
492
-
493
- # # Extract the document name and page number
494
- # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
495
- # if document_match:
496
- # document_name = document_match.group(1).split('/')[-1] # Get the document name
497
- # page_number = document_match.group(2) # Get the page number
498
- # else:
499
- # document_name = "Unknown"
500
- # page_number = "Unknown"
501
-
502
- # # Remove the 'Document(metadata=...' part and keep only the page content
503
- # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
504
-
505
- # # Remove any unwanted escape characters like \u and \u00
506
- # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
507
 
508
- # # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
509
- # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
510
- # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
511
-
512
- # # Clean up the text by removing extra whitespace
513
- # cleaned_response = response_text.strip()
514
- # cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
515
-
516
- # # Ensure the response is conversational and organized
517
- # cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
518
-
519
- # # Return the cleaned response with the document name and page number
520
- # final_response = f"""
521
- # Sure! The Responses are as follows:
522
- # Document name - {document_name}
523
- # Page No - {page_number}
524
- # Response - {cleaned_response}
525
- # """
526
-
527
- # return final_response
528
 
529
 
530
  # def clean_response(response_text):
@@ -532,7 +492,7 @@ import re
532
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
533
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
534
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
535
-
536
  # # Extract the document name and page number
537
  # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
538
  # if document_match:
@@ -541,25 +501,24 @@ import re
541
  # else:
542
  # document_name = "Unknown"
543
  # page_number = "Unknown"
544
-
545
  # # Remove the 'Document(metadata=...' part and keep only the page content
546
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
547
-
548
  # # Remove any unwanted escape characters like \u and \u00
549
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
550
 
551
- # # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
552
  # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
553
  # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
554
 
555
- # # Clean up the text by removing extra whitespace
556
- # cleaned_response = response_text.strip()
557
- # cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
558
 
559
- # # Ensure the response is conversational and organized by removing any prefixes like "Document:"
560
- # cleaned_response = re.sub(r"^Sure! The Responses are as follows: - Document: \"", '', cleaned_response)
561
 
562
- # # Return the cleaned response with bullet points
563
  # final_response = f"""
564
  # Sure! The Responses are as follows:
565
  # • Document name - {document_name}
@@ -569,7 +528,6 @@ import re
569
 
570
  # return final_response
571
 
572
-
573
  def clean_response(response_text):
574
  # Remove system and user tags
575
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -591,9 +549,10 @@ def clean_response(response_text):
591
  # Remove any unwanted escape characters like \u and \u00
592
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
593
 
594
- # Ensure proper spacing between words and dates
595
- response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
596
- response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
 
597
 
598
  # Remove the phrase "Sure! The Responses are as follows:" from the actual content
599
  response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
@@ -618,7 +577,6 @@ Sure! The Responses are as follows:
618
 
619
 
620
 
621
-
622
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
623
  gpt4o_mini_template_details = f"""
624
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
 
484
  # return cleaned_response
485
 
486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
 
490
  # def clean_response(response_text):
 
492
  # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
493
  # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
494
  # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
495
+
496
  # # Extract the document name and page number
497
  # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
498
  # if document_match:
 
501
  # else:
502
  # document_name = "Unknown"
503
  # page_number = "Unknown"
504
+
505
  # # Remove the 'Document(metadata=...' part and keep only the page content
506
  # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
507
+
508
  # # Remove any unwanted escape characters like \u and \u00
509
  # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
510
 
511
+ # # Ensure proper spacing between words and dates
512
  # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
513
  # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
514
 
515
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
516
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
 
517
 
518
+ # # Clean up the text by removing extra whitespace
519
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
520
 
521
+ # # Format the final response with bullet points
522
  # final_response = f"""
523
  # Sure! The Responses are as follows:
524
  # • Document name - {document_name}
 
528
 
529
  # return final_response
530
 
 
531
  def clean_response(response_text):
532
  # Remove system and user tags
533
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
549
  # Remove any unwanted escape characters like \u and \u00
550
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
551
 
552
+ # Add spaces between words and dates
553
+ response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text) # Add space between letter and number
554
+ response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text) # Add space between number and letter
555
+ response_text = re.sub(r'([A-Z])([A-Z][a-z])', r'\1 \2', response_text) # Add space between capital letters and words
556
 
557
  # Remove the phrase "Sure! The Responses are as follows:" from the actual content
558
  response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
 
577
 
578
 
579
 
 
580
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
581
  gpt4o_mini_template_details = f"""
582
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.