gutgut

Paused

App Files Files Community

Carlos Rosas commited on Nov 28, 2024

Commit

c7a2ff9

verified ·

1 Parent(s): ca3da3d

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -29

app.py CHANGED Viewed

@@ -77,11 +77,11 @@ class pleiasBot:
         fiches, fiches_html = hybrid_search(user_message)
         detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
         # Convert inputs to tensor
         input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
         attention_mask = torch.ones_like(input_ids)
         try:
             output = model.generate(
                 input_ids,
@@ -95,7 +95,7 @@ class pleiasBot:
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
             # Decode the generated text
             generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
@@ -105,8 +105,11 @@ class pleiasBot:
                 analysis = parts[0].strip()
                 answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
                 # Format each section with matching h2 titles
-                analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n<div class="generation">' + format_references(analysis) + "</div>"
                 answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
             else:
                 analysis_text = ""
@@ -114,40 +117,40 @@ class pleiasBot:
             fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
             return analysis_text, answer_text, fiches_html
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             import traceback
             traceback.print_exc()
             return None, None, None
-def format_references(text):
-    ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*'  # Modified pattern to include the period and whitespace after ref
-    parts = []
-    current_pos = 0
-    ref_number = 1
-    for match in re.finditer(ref_pattern, text):
-        # Add text before the reference
-        text_before = text[current_pos:match.start()].rstrip()
-        parts.append(text_before)
-        # Extract reference components
-        ref_id = match.group(1)
-        ref_text = match.group(2).strip()
-        # Add the reference, keeping the existing structure but adding <br> where whitespace was
-        tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
-        parts.append(tooltip_html)
-        current_pos = match.end()
-        ref_number += 1
-    # Add any remaining text
-    parts.append(text[current_pos:])
-    return ''.join(parts)
 # Initialize the pleiasBot
 pleias_bot = pleiasBot()

         fiches, fiches_html = hybrid_search(user_message)
         detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
         # Convert inputs to tensor
         input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
         attention_mask = torch.ones_like(input_ids)
         try:
             output = model.generate(
                 input_ids,
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
             # Decode the generated text
             generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
                 analysis = parts[0].strip()
                 answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
+                # Add the prompt display to analysis section
+                prompt_display = f'<div class="generation" style="background-color: #f5f5f5; padding: 1em; margin-bottom: 1em; font-family: monospace; white-space: pre-wrap;">{detailed_prompt}</div>'
                 # Format each section with matching h2 titles
+                analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n' + prompt_display + '<div class="generation">' + format_references(analysis) + "</div>"
                 answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
             else:
                 analysis_text = ""
             fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
             return analysis_text, answer_text, fiches_html
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             import traceback
             traceback.print_exc()
             return None, None, None
+    def format_references(text):
+        ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*'  # Modified pattern to include the period and whitespace after ref
+        parts = []
+        current_pos = 0
+        ref_number = 1
+        for match in re.finditer(ref_pattern, text):
+            # Add text before the reference
+            text_before = text[current_pos:match.start()].rstrip()
+            parts.append(text_before)
+            # Extract reference components
+            ref_id = match.group(1)
+            ref_text = match.group(2).strip()
+            # Add the reference, keeping the existing structure but adding <br> where whitespace was
+            tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
+            parts.append(tooltip_html)
+            current_pos = match.end()
+            ref_number += 1
+        # Add any remaining text
+        parts.append(text[current_pos:])
+        return ''.join(parts)
 # Initialize the pleiasBot
 pleias_bot = pleiasBot()