Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -353,15 +353,32 @@ QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], temp
|
|
353 |
# Sure! Here's the information:
|
354 |
# """
|
355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
phi_custom_template = """
|
357 |
<|system|>
|
358 |
-
|
359 |
-
-
|
360 |
-
-
|
361 |
-
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
<|end|>
|
366 |
<|user|>
|
367 |
{context}
|
@@ -604,55 +621,6 @@ import re
|
|
604 |
|
605 |
# return final_response
|
606 |
|
607 |
-
# def clean_response(response_text):
|
608 |
-
# # Remove system and user tags
|
609 |
-
# response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
610 |
-
# response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
611 |
-
# response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
|
612 |
-
|
613 |
-
# # Extract the document name and page number
|
614 |
-
# document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
|
615 |
-
|
616 |
-
# if document_match:
|
617 |
-
# document_name = document_match.group(1).split('/')[-1] # Get the document name
|
618 |
-
# page_number = document_match.group(2) # Get the page number
|
619 |
-
# else:
|
620 |
-
# document_name = "Unknown"
|
621 |
-
# page_number = "Unknown"
|
622 |
-
|
623 |
-
# # Remove the entire 'Document(metadata=...' and any mention of it from the response
|
624 |
-
# response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
625 |
-
|
626 |
-
# # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
|
627 |
-
# top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
|
628 |
-
|
629 |
-
# # Remove any unwanted escape characters like \u and \u00
|
630 |
-
# response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
631 |
-
|
632 |
-
# # Ensure proper spacing between words and dates
|
633 |
-
# response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
634 |
-
# response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
635 |
-
|
636 |
-
# # Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
637 |
-
# response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
638 |
-
|
639 |
-
# # Clean up the text by removing extra whitespace
|
640 |
-
# cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
641 |
-
|
642 |
-
# # Format the final response with bullet points
|
643 |
-
# top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
|
644 |
-
|
645 |
-
# final_response = (
|
646 |
-
# f"Sure! Here is the response for your Query:\n"
|
647 |
-
# f"• Document name - {document_name}\n"
|
648 |
-
# f"• Page No - {page_number}\n"
|
649 |
-
# f"• Top 5 Fetched Results:\n{top_five_formatted}"
|
650 |
-
# f"• Actual Response - {cleaned_response}"
|
651 |
-
# )
|
652 |
-
|
653 |
-
# return final_response
|
654 |
-
|
655 |
-
|
656 |
def clean_response(response_text):
|
657 |
# Remove system and user tags
|
658 |
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
@@ -672,35 +640,31 @@ def clean_response(response_text):
|
|
672 |
# Remove the entire 'Document(metadata=...' and any mention of it from the response
|
673 |
response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
674 |
|
675 |
-
# Extract the
|
676 |
-
content_match = re.search(r'page_content="(.+?)"', response_text)
|
677 |
-
if content_match:
|
678 |
-
actual_content = content_match.group(1)
|
679 |
-
else:
|
680 |
-
actual_content = "No content available."
|
681 |
-
|
682 |
-
# Limit the actual content to a short, precise snippet
|
683 |
-
actual_content = actual_content[:200] + "..." if len(actual_content) > 200 else actual_content
|
684 |
-
|
685 |
-
# Extract top 5 fetched results (based on some identifier you have)
|
686 |
top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
|
687 |
|
688 |
# Remove any unwanted escape characters like \u and \u00
|
689 |
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
690 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
691 |
# Clean up the text by removing extra whitespace
|
692 |
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
693 |
|
694 |
# Format the final response with bullet points
|
695 |
top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
|
696 |
|
697 |
-
# Final formatted response
|
698 |
final_response = (
|
699 |
f"Sure! Here is the response for your Query:\n"
|
700 |
f"• Document name - {document_name}\n"
|
701 |
f"• Page No - {page_number}\n"
|
702 |
-
f"• Top 5 Fetched Results:\n{top_five_formatted
|
703 |
-
f"• Actual Response - {
|
704 |
)
|
705 |
|
706 |
return final_response
|
@@ -709,6 +673,9 @@ def clean_response(response_text):
|
|
709 |
|
710 |
|
711 |
|
|
|
|
|
|
|
712 |
# Define a new template specifically for GPT-4o-mini in VDB Details mode
|
713 |
gpt4o_mini_template_details = f"""
|
714 |
As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
|
|
|
353 |
# Sure! Here's the information:
|
354 |
# """
|
355 |
|
356 |
+
# phi_custom_template = """
|
357 |
+
# <|system|>
|
358 |
+
# Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
|
359 |
+
# - Nome del documento: (il nome del documento)
|
360 |
+
# - Numero di pagina: (numero di pagina)
|
361 |
+
# - Contenuto effettivo: (contenuto rilevante del documento)
|
362 |
+
# Alla fine, fornisci una sezione separata per la risposta nel seguente formato:
|
363 |
+
# - Risposta: (la risposta alla domanda)
|
364 |
+
# Se non riesci a trovare la risposta nel documento, rispondi semplicemente con "Questa domanda è al di là delle mie conoscenze". Ecco i dettagli del documento da considerare:
|
365 |
+
# <|end|>
|
366 |
+
# <|user|>
|
367 |
+
# {context}
|
368 |
+
# Question: {question}<|end|>
|
369 |
+
# <|assistant|>
|
370 |
+
# Sure! The Responses are as follows:
|
371 |
+
# """
|
372 |
+
|
373 |
phi_custom_template = """
|
374 |
<|system|>
|
375 |
+
You are an expert in the Italian language and a native Italian speaker. Your task is to provide concise, direct and short answers based on the document provided. You should return the information in the following format:
|
376 |
+
- Document name: (the name of the document)
|
377 |
+
- Page number: (page number)
|
378 |
+
- Top 5 Relevant Answer
|
379 |
+
-Actual Answer
|
380 |
+
|
381 |
+
If you can't find the answer in the document, simply reply with "This question is beyond my knowledge." Here are the document details to consider:
|
382 |
<|end|>
|
383 |
<|user|>
|
384 |
{context}
|
|
|
621 |
|
622 |
# return final_response
|
623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
624 |
def clean_response(response_text):
|
625 |
# Remove system and user tags
|
626 |
response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
|
|
|
640 |
# Remove the entire 'Document(metadata=...' and any mention of it from the response
|
641 |
response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
|
642 |
|
643 |
+
# Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
644 |
top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
|
645 |
|
646 |
# Remove any unwanted escape characters like \u and \u00
|
647 |
response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
|
648 |
|
649 |
+
# Ensure proper spacing between words and dates
|
650 |
+
response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
|
651 |
+
response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
|
652 |
+
|
653 |
+
# Remove the phrase "Sure! The Responses are as follows:" from the actual content
|
654 |
+
response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
|
655 |
+
|
656 |
# Clean up the text by removing extra whitespace
|
657 |
cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
|
658 |
|
659 |
# Format the final response with bullet points
|
660 |
top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
|
661 |
|
|
|
662 |
final_response = (
|
663 |
f"Sure! Here is the response for your Query:\n"
|
664 |
f"• Document name - {document_name}\n"
|
665 |
f"• Page No - {page_number}\n"
|
666 |
+
f"• Top 5 Fetched Results:\n{top_five_formatted}"
|
667 |
+
f"• Actual Response - {cleaned_response}"
|
668 |
)
|
669 |
|
670 |
return final_response
|
|
|
673 |
|
674 |
|
675 |
|
676 |
+
|
677 |
+
|
678 |
+
|
679 |
# Define a new template specifically for GPT-4o-mini in VDB Details mode
|
680 |
gpt4o_mini_template_details = f"""
|
681 |
As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
|