Pijush2023 commited on
Commit
e9a0eca
·
verified ·
1 Parent(s): 463a37c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -47
app.py CHANGED
@@ -353,6 +353,24 @@ QA_CHAIN_PROMPT_2 = PromptTemplate(input_variables=["context", "question"], temp
353
  # Sure! Here's the information:
354
  # """
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  phi_custom_template = """
357
  <|system|>
358
  Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
@@ -369,6 +387,13 @@ Question: {question}<|end|>
369
  <|assistant|>
370
  Sure! The Responses are as follows:
371
  """
 
 
 
 
 
 
 
372
 
373
  def generate_bot_response(history, choice, retrieval_mode, model_choice):
374
  if not history:
@@ -445,47 +470,6 @@ def bot(history, choice, tts_choice, retrieval_mode, model_choice):
445
 
446
  import re
447
 
448
- # def clean_response(response_text):
449
- # # Remove system and user tags
450
- # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
451
- # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
452
- # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
453
-
454
- # # Extract the document name and page number
455
- # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
456
- # if document_match:
457
- # document_name = document_match.group(1).split('/')[-1] # Get the document name
458
- # page_number = document_match.group(2) # Get the page number
459
- # else:
460
- # document_name = "Unknown"
461
- # page_number = "Unknown"
462
-
463
- # # Remove the 'Document(metadata=...' part and keep only the page content
464
- # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
465
-
466
- # # Remove any unwanted escape characters like \u and \u00
467
- # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
468
-
469
- # # Ensure proper spacing between words and dates
470
- # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
471
- # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
472
-
473
- # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
474
- # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
475
-
476
- # # Clean up the text by removing extra whitespace
477
- # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
478
-
479
- # # Format the final response with bullet points
480
- # final_response = f"""
481
- # Sure! The Responses are as follows:
482
- # • Document name - {document_name}
483
- # • Page No - {page_number}
484
- # • Response - {cleaned_response}
485
- # """
486
-
487
- # return final_response
488
-
489
  def clean_response(response_text):
490
  # Remove system and user tags
491
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
@@ -494,7 +478,6 @@ def clean_response(response_text):
494
 
495
  # Extract the document name and page number
496
  document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
497
- document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
498
  if document_match:
499
  document_name = document_match.group(1).split('/')[-1] # Get the document name
500
  page_number = document_match.group(2) # Get the page number
@@ -502,11 +485,8 @@ def clean_response(response_text):
502
  document_name = "Unknown"
503
  page_number = "Unknown"
504
 
505
- # Remove the entire 'Document(metadata=...' and any mention of it from the response
506
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
507
-
508
- # Remove any mention of "Document:" in the response
509
- response_text = re.sub(r'- Document:.*', '', response_text)
510
 
511
  # Remove any unwanted escape characters like \u and \u00
512
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
@@ -523,7 +503,7 @@ def clean_response(response_text):
523
 
524
  # Format the final response with bullet points
525
  final_response = f"""
526
- Sure! Here is the response for your Query:
527
  • Document name - {document_name}
528
  • Page No - {page_number}
529
  • Response - {cleaned_response}
@@ -531,6 +511,51 @@ Sure! Here is the response for your Query:
531
 
532
  return final_response
533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
 
536
 
 
353
  # Sure! Here's the information:
354
  # """
355
 
356
+ # phi_custom_template = """
357
+ # <|system|>
358
+ # Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
359
+ # - Nome del documento: (il nome del documento)
360
+ # - Numero di pagina: (numero di pagina)
361
+ # - Contenuto effettivo: (contenuto rilevante del documento)
362
+ # Alla fine, fornisci una sezione separata per la risposta nel seguente formato:
363
+ # - Risposta: (la risposta alla domanda)
364
+ # Se non riesci a trovare la risposta nel documento, rispondi semplicemente con "Questa domanda è al di là delle mie conoscenze". Ecco i dettagli del documento da considerare:
365
+ # <|end|>
366
+ # <|user|>
367
+ # {context}
368
+ # Question: {question}<|end|>
369
+ # <|assistant|>
370
+ # Sure! The Responses are as follows:
371
+ # """
372
+
373
+
374
  phi_custom_template = """
375
  <|system|>
376
  Sei un esperto della lingua italiana e un madrelingua italiano. Il tuo compito è fornire risposte concise, dirette e brevi basate sul documento fornito. Dovresti restituire le informazioni nel seguente formato:
 
387
  <|assistant|>
388
  Sure! The Responses are as follows:
389
  """
390
+ <|end|>
391
+ <|user|>
392
+ {context}
393
+ Question: {question}<|end|>
394
+ <|assistant|>
395
+ Sure! The Responses are as follows:
396
+ """
397
 
398
  def generate_bot_response(history, choice, retrieval_mode, model_choice):
399
  if not history:
 
470
 
471
  import re
472
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  def clean_response(response_text):
474
  # Remove system and user tags
475
  response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
 
478
 
479
  # Extract the document name and page number
480
  document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
 
481
  if document_match:
482
  document_name = document_match.group(1).split('/')[-1] # Get the document name
483
  page_number = document_match.group(2) # Get the page number
 
485
  document_name = "Unknown"
486
  page_number = "Unknown"
487
 
488
+ # Remove the 'Document(metadata=...' part and keep only the page content
489
  response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
 
 
 
490
 
491
  # Remove any unwanted escape characters like \u and \u00
492
  response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
 
503
 
504
  # Format the final response with bullet points
505
  final_response = f"""
506
+ Sure! The Responses are as follows:
507
  • Document name - {document_name}
508
  • Page No - {page_number}
509
  • Response - {cleaned_response}
 
511
 
512
  return final_response
513
 
514
+ # def clean_response(response_text):
515
+ # # Remove system and user tags
516
+ # response_text = re.sub(r'<\|system\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
517
+ # response_text = re.sub(r'<\|user\|>.*?<\|end\|>', '', response_text, flags=re.DOTALL)
518
+ # response_text = re.sub(r'<\|assistant\|>', '', response_text, flags=re.DOTALL)
519
+
520
+ # # Extract the document name and page number
521
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
522
+ # document_match = re.search(r"Document\(metadata=\{'source':'(.+?)','page':(\d+)\}", response_text)
523
+ # if document_match:
524
+ # document_name = document_match.group(1).split('/')[-1] # Get the document name
525
+ # page_number = document_match.group(2) # Get the page number
526
+ # else:
527
+ # document_name = "Unknown"
528
+ # page_number = "Unknown"
529
+
530
+ # # Remove the entire 'Document(metadata=...' and any mention of it from the response
531
+ # response_text = re.sub(r'Document\(metadata=\{.*?\},page_content=', '', response_text, flags=re.DOTALL)
532
+
533
+ # # Remove any mention of "Document:" in the response
534
+ # response_text = re.sub(r'- Document:.*', '', response_text)
535
+
536
+ # # Remove any unwanted escape characters like \u and \u00
537
+ # response_text = re.sub(r'\\u[0-9A-Fa-f]{4}', '', response_text)
538
+
539
+ # # Ensure proper spacing between words and dates
540
+ # response_text = re.sub(r'([a-zA-Z])(\d)', r'\1 \2', response_text)
541
+ # response_text = re.sub(r'(\d)([a-zA-Z])', r'\1 \2', response_text)
542
+
543
+ # # Remove the phrase "Sure! The Responses are as follows:" from the actual content
544
+ # response_text = re.sub(r'Sure! The Responses are as follows:', '', response_text).strip()
545
+
546
+ # # Clean up the text by removing extra whitespace
547
+ # cleaned_response = re.sub(r'\s+', ' ', response_text).strip()
548
+
549
+ # # Format the final response with bullet points
550
+ # final_response = f"""
551
+ # Sure! Here is the response for your Query:
552
+ # • Document name - {document_name}
553
+ # • Page No - {page_number}
554
+ # • Response - {cleaned_response}
555
+ # """
556
+
557
+ # return final_response
558
+
559
 
560
 
561