Carlos Rosas commited on
Commit
8dbdf3b
·
verified ·
1 Parent(s): ffd3ca6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -121,33 +121,40 @@ class pleiasBot:
121
  traceback.print_exc()
122
  return None, None, None
123
 
124
- def format_references(text):
125
- ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*'
126
 
127
- parts = []
128
- current_pos = 0
129
- ref_number = 1
130
-
131
- for match in re.finditer(ref_pattern, text):
132
- # Add text before the reference
133
- text_before = text[current_pos:match.start()].rstrip()
134
- parts.append(text_before)
135
-
136
- # Extract reference components
137
- ref_id = match.group(1)
138
- ref_text = match.group(2).strip()
139
-
140
- # Add the reference with sequential numbering
141
- tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
142
- parts.append(tooltip_html)
143
 
144
- current_pos = match.end()
145
- ref_number += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- # Add any remaining text
148
- parts.append(text[current_pos:])
149
 
150
- return ''.join(parts)
151
 
152
  # Initialize the pleiasBot
153
  pleias_bot = pleiasBot()
 
121
  traceback.print_exc()
122
  return None, None, None
123
 
124
+ def hybrid_search(text):
125
+ results = table.search(text, query_type="hybrid").limit(5).to_pandas()
126
 
127
+ # Use a list to maintain order
128
+ seen_hashes = []
129
+
130
+ document = []
131
+ document_html = []
132
+ for _, row in results.iterrows():
133
+ hash_id = str(row['hash'])
 
 
 
 
 
 
 
 
 
134
 
135
+ # Skip if we've already seen this hash
136
+ if hash_id in seen_hashes:
137
+ continue
138
+
139
+ seen_hashes.append(hash_id) # append instead of add to maintain order
140
+ title = row['section']
141
+ content = row['text']
142
+
143
+ source_text = f"<|source_start|><|source_id_start|>{hash_id}<|source_id_end|>{title}\n{content}<|source_end|>"
144
+ document.append(source_text)
145
+ document_html.append(f'<div class="source" id="{hash_id}"><p><b>{hash_id}</b> : {title}<br>{content}</div>')
146
+
147
+ # Print for debugging
148
+ print(f"Added source {hash_id}")
149
+ print(f"Length of source text: {len(source_text)}")
150
+
151
+ document = "\n".join(document)
152
+ document_html = '<div id="source_listing">' + "".join(document_html) + "</div>"
153
 
154
+ # Print total length for debugging
155
+ print(f"Total length of document: {len(document)}")
156
 
157
+ return document, document_html
158
 
159
  # Initialize the pleiasBot
160
  pleias_bot = pleiasBot()