Carlos Rosas commited on
Commit
c7a2ff9
·
verified ·
1 Parent(s): ca3da3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -29
app.py CHANGED
@@ -77,11 +77,11 @@ class pleiasBot:
77
  fiches, fiches_html = hybrid_search(user_message)
78
 
79
  detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
80
-
81
  # Convert inputs to tensor
82
  input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
83
  attention_mask = torch.ones_like(input_ids)
84
-
85
  try:
86
  output = model.generate(
87
  input_ids,
@@ -95,7 +95,7 @@ class pleiasBot:
95
  pad_token_id=tokenizer.pad_token_id,
96
  eos_token_id=tokenizer.eos_token_id
97
  )
98
-
99
  # Decode the generated text
100
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
101
 
@@ -105,8 +105,11 @@ class pleiasBot:
105
  analysis = parts[0].strip()
106
  answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
107
 
 
 
 
108
  # Format each section with matching h2 titles
109
- analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n<div class="generation">' + format_references(analysis) + "</div>"
110
  answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
111
  else:
112
  analysis_text = ""
@@ -114,40 +117,40 @@ class pleiasBot:
114
 
115
  fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
116
  return analysis_text, answer_text, fiches_html
117
-
118
  except Exception as e:
119
  print(f"Error during generation: {str(e)}")
120
  import traceback
121
  traceback.print_exc()
122
  return None, None, None
123
-
124
- def format_references(text):
125
- ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*' # Modified pattern to include the period and whitespace after ref
126
 
127
- parts = []
128
- current_pos = 0
129
- ref_number = 1
130
-
131
- for match in re.finditer(ref_pattern, text):
132
- # Add text before the reference
133
- text_before = text[current_pos:match.start()].rstrip()
134
- parts.append(text_before)
135
 
136
- # Extract reference components
137
- ref_id = match.group(1)
138
- ref_text = match.group(2).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Add the reference, keeping the existing structure but adding <br> where whitespace was
141
- tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
142
- parts.append(tooltip_html)
143
 
144
- current_pos = match.end()
145
- ref_number += 1
146
-
147
- # Add any remaining text
148
- parts.append(text[current_pos:])
149
-
150
- return ''.join(parts)
151
 
152
  # Initialize the pleiasBot
153
  pleias_bot = pleiasBot()
 
77
  fiches, fiches_html = hybrid_search(user_message)
78
 
79
  detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
80
+
81
  # Convert inputs to tensor
82
  input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
83
  attention_mask = torch.ones_like(input_ids)
84
+
85
  try:
86
  output = model.generate(
87
  input_ids,
 
95
  pad_token_id=tokenizer.pad_token_id,
96
  eos_token_id=tokenizer.eos_token_id
97
  )
98
+
99
  # Decode the generated text
100
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
101
 
 
105
  analysis = parts[0].strip()
106
  answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
107
 
108
+ # Add the prompt display to analysis section
109
+ prompt_display = f'<div class="generation" style="background-color: #f5f5f5; padding: 1em; margin-bottom: 1em; font-family: monospace; white-space: pre-wrap;">{detailed_prompt}</div>'
110
+
111
  # Format each section with matching h2 titles
112
+ analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n' + prompt_display + '<div class="generation">' + format_references(analysis) + "</div>"
113
  answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
114
  else:
115
  analysis_text = ""
 
117
 
118
  fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
119
  return analysis_text, answer_text, fiches_html
120
+
121
  except Exception as e:
122
  print(f"Error during generation: {str(e)}")
123
  import traceback
124
  traceback.print_exc()
125
  return None, None, None
 
 
 
126
 
127
+ def format_references(text):
128
+ ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*' # Modified pattern to include the period and whitespace after ref
 
 
 
 
 
 
129
 
130
+ parts = []
131
+ current_pos = 0
132
+ ref_number = 1
133
+
134
+ for match in re.finditer(ref_pattern, text):
135
+ # Add text before the reference
136
+ text_before = text[current_pos:match.start()].rstrip()
137
+ parts.append(text_before)
138
+
139
+ # Extract reference components
140
+ ref_id = match.group(1)
141
+ ref_text = match.group(2).strip()
142
+
143
+ # Add the reference, keeping the existing structure but adding <br> where whitespace was
144
+ tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
145
+ parts.append(tooltip_html)
146
+
147
+ current_pos = match.end()
148
+ ref_number += 1
149
 
150
+ # Add any remaining text
151
+ parts.append(text[current_pos:])
 
152
 
153
+ return ''.join(parts)
 
 
 
 
 
 
154
 
155
  # Initialize the pleiasBot
156
  pleias_bot = pleiasBot()