add comments
Browse files
app.py
CHANGED
@@ -197,9 +197,13 @@ def auto_convert(file_objs, url, do_ocr, do_table_structure):
|
|
197 |
{} # return an empty state dictionary
|
198 |
)
|
199 |
# Convert the document to markdown.
|
|
|
200 |
markdown = convert_to_markdown(file_objs, url, do_ocr, do_table_structure)
|
|
|
201 |
combined_text = prefix + markdown
|
|
|
202 |
token_count, suggestions, _ = calculate_tokens_suggest_compression_ratio(combined_text, tokenizer, model)
|
|
|
203 |
min_ratio = min(suggestions)
|
204 |
max_ratio = max(suggestions)
|
205 |
default_ratio = suggestions[len(suggestions) // 2]
|
@@ -213,7 +217,9 @@ def auto_convert(file_objs, url, do_ocr, do_table_structure):
|
|
213 |
rag_text = combined_text[len(prefix):]
|
214 |
else:
|
215 |
rag_text = combined_text
|
|
|
216 |
rag_index = create_rag_index(rag_text)
|
|
|
217 |
state = {"rag_index": rag_index}
|
218 |
|
219 |
return (
|
|
|
197 |
{} # return an empty state dictionary
|
198 |
)
|
199 |
# Convert the document to markdown.
|
200 |
+
print("Converting to markdown")
|
201 |
markdown = convert_to_markdown(file_objs, url, do_ocr, do_table_structure)
|
202 |
+
print("Done")
|
203 |
combined_text = prefix + markdown
|
204 |
+
print("Calculating tokens")
|
205 |
token_count, suggestions, _ = calculate_tokens_suggest_compression_ratio(combined_text, tokenizer, model)
|
206 |
+
print("Done")
|
207 |
min_ratio = min(suggestions)
|
208 |
max_ratio = max(suggestions)
|
209 |
default_ratio = suggestions[len(suggestions) // 2]
|
|
|
217 |
rag_text = combined_text[len(prefix):]
|
218 |
else:
|
219 |
rag_text = combined_text
|
220 |
+
print("Creating RAG index")
|
221 |
rag_index = create_rag_index(rag_text)
|
222 |
+
print("Done")
|
223 |
state = {"rag_index": rag_index}
|
224 |
|
225 |
return (
|