Pijush2023 commited on
Commit
34d5ece
·
verified ·
1 Parent(s): 5e878d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -72
app.py CHANGED
@@ -526,50 +526,6 @@ import re
526
 
527
  # return final_response
528
 
529
- # def clean_response(response_text):
530
- # # Remove system and user tags
531
- # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
532
- # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
533
- # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
534
-
535
- # # Extract the document name and page number
536
- # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
537
- # if document_match:
538
- # document_name = document_match.group(1).split('/')[-1] # Get the document name
539
- # page_number = document_match.group(2) # Get the page number
540
- # else:
541
- # document_name = "Unknown"
542
- # page_number = "Unknown"
543
-
544
- # # Remove the entire 'Document(metadata=...' and any mention of it from the response
545
- # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
546
-
547
- # # Remove any mention of "Document:" in the response
548
- # response_text = re.sub(r'- Document:.*', '', response_text)
549
-
550
- # # Remove any unwanted escape characters like \u and \u00
551
- # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
552
-
553
- # # Ensure proper spacing between words and dates
554
- # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
555
- # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
556
-
557
- # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
558
- # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
559
-
560
- # # Clean up the text by removing extra whitespace
561
- # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
562
-
563
- # # Format the final response with bullet points
564
- # final_response = f"""
565
- # Sure! Here is the response for your Query:
566
- # • Document name - {document_name}
567
- # • Page No - {page_number}
568
- # • Response - {cleaned_response}
569
- # """
570
-
571
- # return final_response
572
-
573
  def clean_response(response_text):
574
  # Remove system and user tags
575
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -585,40 +541,32 @@ def clean_response(response_text):
585
  document_name = "Unknown"
586
  page_number = "Unknown"
587
 
588
- # Remove document metadata and other unnecessary parts
589
- response_text = re.sub(r"Document\(metadata=\{.*?\}\)", '', response_text, flags=re.DOTALL)
 
 
 
590
 
591
- # Remove unwanted parts like "Nome del documento" and "Numero di pagina"
592
- response_text = re.sub(r"Nome del documento:.*\n?", '', response_text)
593
- response_text = re.sub(r"Numero di pagina:.*\n?", '', response_text)
594
 
595
- # Clean up and extract the relevant response content
596
- response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text) # Remove any unwanted escape characters
 
597
 
598
- # Ensure proper spacing and remove extra text around the "Contenuto effettivo" and "Risposta" parts
599
- response_text = re.sub(r'Contenuto effettivo: ', '', response_text)
600
- response_text = re.sub(r'Risposta: ', '', response_text)
601
- cleaned_response = response_text.strip()
602
 
603
- # Handle blank or empty responses
604
- if not cleaned_response:
605
- cleaned_response = "No relevant content found in the document."
606
 
607
- # Create a table format for the document name, page number, and response
608
  final_response = f"""
609
- <br>Here is the response of your Query <br>
610
- <table style="width:100%;border: 1px solid black;border-collapse: collapse;">
611
- <tr>
612
- <th style="border: 1px solid black;padding: 8px;text-align: left;">Document Name</th>
613
- <th style="border: 1px solid black;padding: 8px;text-align: left;">Page No</th>
614
- </tr>
615
- <tr>
616
- <td style="border: 1px solid black;padding: 8px;">{document_name}</td>
617
- <td style="border: 1px solid black;padding: 8px;">{page_number}</td>
618
- </tr>
619
- </table>
620
- <p><strong>Response:</strong> {cleaned_response}</p>
621
- """
622
 
623
  return final_response
624
 
@@ -633,6 +581,8 @@ def clean_response(response_text):
633
 
634
 
635
 
 
 
636
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
637
  gpt4o_mini_template_details = f"""
638
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.
 
526
 
527
  # return final_response
528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  def clean_response(response_text):
530
  # Remove system and user tags
531
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
541
  document_name = "Unknown"
542
  page_number = "Unknown"
543
 
544
+ # Remove the entire 'Document(metadata=...' and any mention of it from the response
545
+ response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
546
+
547
+ # Remove any mention of "Document:" in the response
548
+ response_text = re.sub(r'- Document:.*', '', response_text)
549
 
550
+ # Remove any unwanted escape characters like \u and \u00
551
+ response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
 
552
 
553
+ # Ensure proper spacing between words and dates
554
+ response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
555
+ response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
556
 
557
+ # Remove the phrase "Sure! The Responses are as follows:" from the actual content
558
+ response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
 
 
559
 
560
+ # Clean up the text by removing extra whitespace
561
+ cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
 
562
 
563
+ # Format the final response with bullet points
564
  final_response = f"""
565
+ Sure! Here is the response for your Query:
566
+ Document name - {document_name}
567
+ • Page No - {page_number}
568
+ Response - {cleaned_response}
569
+ """
 
 
 
 
 
 
 
 
570
 
571
  return final_response
572
 
 
581
 
582
 
583
 
584
+
585
+
586
  # Define a new template specifically for GPT-4o-mini in VDB Details mode
587
  gpt4o_mini_template_details = f"""
588
  As a highly specialized assistant, I provide precise, detailed, and informative responses. On this bright day of {current_date}, I'm equipped to assist with all your queries about Birmingham, Alabama, offering detailed insights tailored to your needs.