Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -484,47 +484,7 @@ import re
|
|
484 |
# return cleaned_response
|
485 |
|
486 |
|
487 |
-
# def clean_response(response_text):
|
488 |
-
# # Remove system and user tags
|
489 |
-
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
490 |
-
# response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
491 |
-
# response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
492 |
-
|
493 |
-
# # Extract the document name and page number
|
494 |
-
# document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
|
495 |
-
# if document_match:
|
496 |
-
# document_name = document_match.group(1).split('/')[-1] # Get the document name
|
497 |
-
# page_number = document_match.group(2) # Get the page number
|
498 |
-
# else:
|
499 |
-
# document_name = "Unknown"
|
500 |
-
# page_number = "Unknown"
|
501 |
-
|
502 |
-
# # Remove the 'Document(metadata=...' part and keep only the page content
|
503 |
-
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
504 |
-
|
505 |
-
# # Remove any unwanted escape characters like \u and \u00
|
506 |
-
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
507 |
|
508 |
-
# # Add spaces between words and dates (e.g., "born04/04/1963" becomes "born 04/04/1963")
|
509 |
-
# response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
510 |
-
# response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
511 |
-
|
512 |
-
# # Clean up the text by removing extra whitespace
|
513 |
-
# cleaned_response = response_text.strip()
|
514 |
-
# cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
|
515 |
-
|
516 |
-
# # Ensure the response is conversational and organized
|
517 |
-
# cleaned_response = cleaned_response.replace('1.', '\n1.').replace('2.', '\n2.').replace('3.', '\n3.').replace('4.', '\n4.').replace('5.', '\n5.')
|
518 |
-
|
519 |
-
# # Return the cleaned response with the document name and page number
|
520 |
-
# final_response = f"""
|
521 |
-
# Sure! The Responses are as follows:
|
522 |
-
# Document name - {document_name}
|
523 |
-
# Page No - {page_number}
|
524 |
-
# Response - {cleaned_response}
|
525 |
-
# """
|
526 |
-
|
527 |
-
# return final_response
|
528 |
|
529 |
|
530 |
# def clean_response(response_text):
|
@@ -532,7 +492,7 @@ import re
|
|
532 |
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
533 |
# response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
534 |
# response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
535 |
-
|
536 |
# # Extract the document name and page number
|
537 |
# document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
|
538 |
# if document_match:
|
@@ -541,25 +501,24 @@ import re
|
|
541 |
# else:
|
542 |
# document_name = "Unknown"
|
543 |
# page_number = "Unknown"
|
544 |
-
|
545 |
# # Remove the 'Document(metadata=...' part and keep only the page content
|
546 |
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
547 |
-
|
548 |
# # Remove any unwanted escape characters like \u and \u00
|
549 |
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
550 |
|
551 |
-
# #
|
552 |
# response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
553 |
# response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
554 |
|
555 |
-
# #
|
556 |
-
#
|
557 |
-
# cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
|
558 |
|
559 |
-
# #
|
560 |
-
# cleaned_response = re.sub(r
|
561 |
|
562 |
-
# #
|
563 |
# final_response = f"""
|
564 |
# Sure! The Responses are as follows:
|
565 |
# • Document name - {document_name}
|
@@ -569,7 +528,6 @@ import re
|
|
569 |
|
570 |
# return final_response
|
571 |
|
572 |
-
|
573 |
def clean_response(response_text):
|
574 |
# Remove system and user tags
|
575 |
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
@@ -591,9 +549,10 @@ def clean_response(response_text):
|
|
591 |
# Remove any unwanted escape characters like \u and \u00
|
592 |
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
593 |
|
594 |
-
#
|
595 |
-
response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
596 |
-
response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
|
|
597 |
|
598 |
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
599 |
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
@@ -618,7 +577,6 @@ Sure! The Responses are as follows:
|
|
618 |
|
619 |
|
620 |
|
621 |
-
|
622 |
# Define a new template specifically for GPT-4o-mini in VDB Details mode
|
623 |
gpt4o_mini_template_details = f"""
|
624 |
As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
|
|
|
484 |
# return cleaned_response
|
485 |
|
486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
|
489 |
|
490 |
# def clean_response(response_text):
|
|
|
492 |
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
493 |
# response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
494 |
# response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
495 |
+
|
496 |
# # Extract the document name and page number
|
497 |
# document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
|
498 |
# if document_match:
|
|
|
501 |
# else:
|
502 |
# document_name = "Unknown"
|
503 |
# page_number = "Unknown"
|
504 |
+
|
505 |
# # Remove the 'Document(metadata=...' part and keep only the page content
|
506 |
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
507 |
+
|
508 |
# # Remove any unwanted escape characters like \u and \u00
|
509 |
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
510 |
|
511 |
+
# # Ensure proper spacing between words and dates
|
512 |
# response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
513 |
# response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
514 |
|
515 |
+
# # Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
516 |
+
# response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
|
|
517 |
|
518 |
+
# # Clean up the text by removing extra whitespace
|
519 |
+
# cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
520 |
|
521 |
+
# # Format the final response with bullet points
|
522 |
# final_response = f"""
|
523 |
# Sure! The Responses are as follows:
|
524 |
# • Document name - {document_name}
|
|
|
528 |
|
529 |
# return final_response
|
530 |
|
|
|
531 |
def clean_response(response_text):
|
532 |
# Remove system and user tags
|
533 |
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
|
|
549 |
# Remove any unwanted escape characters like \u and \u00
|
550 |
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
551 |
|
552 |
+
# Add spaces between words and dates
|
553 |
+
response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text) # Add space between letter and number
|
554 |
+
response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text) # Add space between number and letter
|
555 |
+
response_text = re.sub(r'([A-Z])([A-Z][a-z])', r'\1 \2', response_text) # Add space between capital letters and words
|
556 |
|
557 |
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
558 |
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
|
|
577 |
|
578 |
|
579 |
|
|
|
580 |
# Define a new template specifically for GPT-4o-mini in VDB Details mode
|
581 |
gpt4o_mini_template_details = f"""
|
582 |
As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
|