Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -452,47 +452,6 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
|
|
452 |
|
453 |
import re
|
454 |
|
455 |
-
def clean_response(response_text):
|
456 |
-
# Remove system and user tags
|
457 |
-
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
458 |
-
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
459 |
-
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
460 |
-
|
461 |
-
# Extract the document name and page number
|
462 |
-
document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
|
463 |
-
if document_match:
|
464 |
-
document_name = document_match.group(1).split('/')[-1] # Get the document name
|
465 |
-
page_number = document_match.group(2) # Get the page number
|
466 |
-
else:
|
467 |
-
document_name = "Unknown"
|
468 |
-
page_number = "Unknown"
|
469 |
-
|
470 |
-
# Remove the 'Document(metadata=...' part and keep only the page content
|
471 |
-
response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
472 |
-
|
473 |
-
# Remove any unwanted escape characters like \u and \u00
|
474 |
-
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
475 |
-
|
476 |
-
# Ensure proper spacing between words and dates
|
477 |
-
response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
478 |
-
response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
479 |
-
|
480 |
-
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
481 |
-
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
482 |
-
|
483 |
-
# Clean up the text by removing extra whitespace
|
484 |
-
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
485 |
-
|
486 |
-
# Format the final response with bullet points
|
487 |
-
final_response = f"""
|
488 |
-
Sure! The Responses are as follows:
|
489 |
-
• Document name - {document_name}
|
490 |
-
• Page No - {page_number}
|
491 |
-
• Response - {cleaned_response}
|
492 |
-
"""
|
493 |
-
|
494 |
-
return final_response
|
495 |
-
|
496 |
# def clean_response(response_text):
|
497 |
# # Remove system and user tags
|
498 |
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
@@ -508,11 +467,8 @@ Sure! The Responses are as follows:
|
|
508 |
# document_name = "Unknown"
|
509 |
# page_number = "Unknown"
|
510 |
|
511 |
-
# # Remove the
|
512 |
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
513 |
-
|
514 |
-
# # Remove any mention of "Document:" in the response
|
515 |
-
# response_text = re.sub(r'- Document:.*', '', response_text)
|
516 |
|
517 |
# # Remove any unwanted escape characters like \u and \u00
|
518 |
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
@@ -529,7 +485,7 @@ Sure! The Responses are as follows:
|
|
529 |
|
530 |
# # Format the final response with bullet points
|
531 |
# final_response = f"""
|
532 |
-
# Sure!
|
533 |
# • Document name - {document_name}
|
534 |
# • Page No - {page_number}
|
535 |
# • Response - {cleaned_response}
|
@@ -537,6 +493,50 @@ Sure! The Responses are as follows:
|
|
537 |
|
538 |
# return final_response
|
539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
|
541 |
|
542 |
|
|
|
452 |
|
453 |
import re
|
454 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
# def clean_response(response_text):
|
456 |
# # Remove system and user tags
|
457 |
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
|
|
467 |
# document_name = "Unknown"
|
468 |
# page_number = "Unknown"
|
469 |
|
470 |
+
# # Remove the 'Document(metadata=...' part and keep only the page content
|
471 |
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
|
|
|
|
|
|
472 |
|
473 |
# # Remove any unwanted escape characters like \u and \u00
|
474 |
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
|
|
485 |
|
486 |
# # Format the final response with bullet points
|
487 |
# final_response = f"""
|
488 |
+
# Sure! The Responses are as follows:
|
489 |
# • Document name - {document_name}
|
490 |
# • Page No - {page_number}
|
491 |
# • Response - {cleaned_response}
|
|
|
493 |
|
494 |
# return final_response
|
495 |
|
496 |
+
def clean_response(response_text):
|
497 |
+
# Remove system and user tags
|
498 |
+
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
499 |
+
response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
500 |
+
response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
501 |
+
|
502 |
+
# Extract the document name and page number
|
503 |
+
document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
|
504 |
+
if document_match:
|
505 |
+
document_name = document_match.group(1).split('/')[-1] # Get the document name
|
506 |
+
page_number = document_match.group(2) # Get the page number
|
507 |
+
else:
|
508 |
+
document_name = "Unknown"
|
509 |
+
page_number = "Unknown"
|
510 |
+
|
511 |
+
# Remove the entire 'Document(metadata=...' and any mention of it from the response
|
512 |
+
response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
513 |
+
|
514 |
+
# Remove any mention of "Document:" in the response
|
515 |
+
response_text = re.sub(r'- Document:.*', '', response_text)
|
516 |
+
|
517 |
+
# Remove any unwanted escape characters like \u and \u00
|
518 |
+
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
519 |
+
|
520 |
+
# Ensure proper spacing between words and dates
|
521 |
+
response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
522 |
+
response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
523 |
+
|
524 |
+
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
525 |
+
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
526 |
+
|
527 |
+
# Clean up the text by removing extra whitespace
|
528 |
+
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
529 |
+
|
530 |
+
# Format the final response with bullet points
|
531 |
+
final_response = f"""
|
532 |
+
Sure! Here is the response for your Query:
|
533 |
+
• Document name - {document_name}
|
534 |
+
• Page No - {page_number}
|
535 |
+
• Response - {cleaned_response}
|
536 |
+
"""
|
537 |
+
|
538 |
+
return final_response
|
539 |
+
|
540 |
|
541 |
|
542 |
|