Pijush2023 commited on
Commit
87009cb
·
verified ·
1 Parent(s): 8b5d3bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -70
app.py CHANGED
@@ -353,15 +353,32 @@ QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], temp
353
  # Sure! Here's the information:
354
  # """
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  phi_custom_template = """
357
  <|system|>
358
- Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
359
- - Nome del documento: (il nome del documento)
360
- - Numero di pagina: (numero di pagina)
361
- - Contenuto effettivo: (contenuto rilevante del documento)
362
- Alla fine, fornisci una sezione separata per la risposta nel seguente formato:
363
- - Risposta: (la risposta alla domanda)
364
- Se non riesci a trovare la risposta nel documento, rispondi semplicemente con "Questa domanda è al di là delle mie conoscenze". Ecco i dettagli del documento da considerare:
365
  <|end|>
366
  <|user|>
367
  {context}
@@ -604,55 +621,6 @@ import re
604
 
605
  # return final_response
606
 
607
- # def clean_response(response_text):
608
- # # Remove system and user tags
609
- # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
610
- # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
611
- # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
612
-
613
- # # Extract the document name and page number
614
- # document_match = re.search(r"Document\(metadata=\{'source': '(.+?)', 'page': (\d+)\}", response_text)
615
-
616
- # if document_match:
617
- # document_name = document_match.group(1).split('/')[-1] # Get the document name
618
- # page_number = document_match.group(2) # Get the page number
619
- # else:
620
- # document_name = "Unknown"
621
- # page_number = "Unknown"
622
-
623
- # # Remove the entire 'Document(metadata=...' and any mention of it from the response
624
- # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
625
-
626
- # # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
627
- # top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
628
-
629
- # # Remove any unwanted escape characters like \u and \u00
630
- # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
631
-
632
- # # Ensure proper spacing between words and dates
633
- # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
634
- # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
635
-
636
- # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
637
- # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
638
-
639
- # # Clean up the text by removing extra whitespace
640
- # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
641
-
642
- # # Format the final response with bullet points
643
- # top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
644
-
645
- # final_response = (
646
- # f"Sure! Here is the response for your Query:\n"
647
- # f"• Document name - {document_name}\n"
648
- # f"• Page No - {page_number}\n"
649
- # f"• Top 5 Fetched Results:\n{top_five_formatted}"
650
- # f"• Actual Response - {cleaned_response}"
651
- # )
652
-
653
- # return final_response
654
-
655
-
656
  def clean_response(response_text):
657
  # Remove system and user tags
658
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -672,35 +640,31 @@ def clean_response(response_text):
672
  # Remove the entire 'Document(metadata=...' and any mention of it from the response
673
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
674
 
675
- # Extract the actual content from the response
676
- content_match = re.search(r'page_content="(.+?)"', response_text)
677
- if content_match:
678
- actual_content = content_match.group(1)
679
- else:
680
- actual_content = "No content available."
681
-
682
- # Limit the actual content to a short, precise snippet
683
- actual_content = actual_content[:200] + "..." if len(actual_content) > 200 else actual_content
684
-
685
- # Extract top 5 fetched results (based on some identifier you have)
686
  top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
687
 
688
  # Remove any unwanted escape characters like \u and \u00
689
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
690
 
 
 
 
 
 
 
 
691
  # Clean up the text by removing extra whitespace
692
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
693
 
694
  # Format the final response with bullet points
695
  top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
696
 
697
- # Final formatted response
698
  final_response = (
699
  f"Sure! Here is the response for your Query:\n"
700
  f"• Document name - {document_name}\n"
701
  f"• Page No - {page_number}\n"
702
- f"• Top 5 Fetched Results:\n{top_five_formatted if top_five_formatted.strip() else 'No results found.'}"
703
- f"• Actual Response - {actual_content}"
704
  )
705
 
706
  return final_response
@@ -709,6 +673,9 @@ def clean_response(response_text):
709
 
710
 
711
 
 
 
 
712
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
713
  gpt4o_mini_template_details = f"""
714
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
 
353
  # Sure! Here's the information:
354
  # """
355
 
356
+ # phi_custom_template = """
357
+ # <|system|>
358
+ # Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
359
+ # - Nome del documento: (il nome del documento)
360
+ # - Numero di pagina: (numero di pagina)
361
+ # - Contenuto effettivo: (contenuto rilevante del documento)
362
+ # Alla fine, fornisci una sezione separata per la risposta nel seguente formato:
363
+ # - Risposta: (la risposta alla domanda)
364
+ # Se non riesci a trovare la risposta nel documento, rispondi semplicemente con "Questa domanda è al di là delle mie conoscenze". Ecco i dettagli del documento da considerare:
365
+ # <|end|>
366
+ # <|user|>
367
+ # {context}
368
+ # Question: {question}<|end|>
369
+ # <|assistant|>
370
+ # Sure! The Responses are as follows:
371
+ # """
372
+
373
  phi_custom_template = """
374
  <|system|>
375
+ You are an expert in the Italian language and a native Italian speaker. Your task is to provide concise, direct and short answers based on the document provided. You should return the information in the following format:
376
+ - Document name: (the name of the document)
377
+ - Page number: (page number)
378
+ - Top 5 Relevant Answer
379
+ -Actual Answer
380
+
381
+ If you can't find the answer in the document, simply reply with "This question is beyond my knowledge." Here are the document details to consider:
382
  <|end|>
383
  <|user|>
384
  {context}
 
621
 
622
  # return final_response
623
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
  def clean_response(response_text):
625
  # Remove system and user tags
626
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
640
  # Remove the entire 'Document(metadata=...' and any mention of it from the response
641
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
642
 
643
+ # Extract top 5 fetched results (assuming the top 5 results are the first 5 lines of content)
 
 
 
 
 
 
 
 
 
 
644
  top_results = response_text.split('\n')[:5] # Adjust this as per your actual data structure
645
 
646
  # Remove any unwanted escape characters like \u and \u00
647
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
648
 
649
+ # Ensure proper spacing between words and dates
650
+ response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
651
+ response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
652
+
653
+ # Remove the phrase "Sure! The Responses are as follows:" from the actual content
654
+ response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
655
+
656
  # Clean up the text by removing extra whitespace
657
  cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
658
 
659
  # Format the final response with bullet points
660
  top_five_formatted = ''.join([f'{i+1}. {result.strip()}\n' for i, result in enumerate(top_results)])
661
 
 
662
  final_response = (
663
  f"Sure! Here is the response for your Query:\n"
664
  f"• Document name - {document_name}\n"
665
  f"• Page No - {page_number}\n"
666
+ f"• Top 5 Fetched Results:\n{top_five_formatted}"
667
+ f"• Actual Response - {cleaned_response}"
668
  )
669
 
670
  return final_response
 
673
 
674
 
675
 
676
+
677
+
678
+
679
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
680
  gpt4o_mini_template_details = f"""
681
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.