Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import time
|
|
3 |
import datetime
|
4 |
from pathlib import Path
|
5 |
from typing import List
|
|
|
6 |
from PyPDF2 import PdfReader
|
7 |
import streamlit as st
|
8 |
import pandas as pd
|
@@ -121,6 +122,8 @@ if st.button("Convertir"):
|
|
121 |
if uploaded_files:
|
122 |
input_paths = []
|
123 |
generated_files = []
|
|
|
|
|
124 |
total_files = len(uploaded_files)
|
125 |
start_time = time.time() # Chronomètre de démarrage
|
126 |
|
@@ -155,29 +158,38 @@ if st.button("Convertir"):
|
|
155 |
status_placeholder.info(
|
156 |
f"🔄 Traitement de `{file_path.name}` ({i + 1}/{total_files})"
|
157 |
)
|
158 |
-
file_start_time = time.time()
|
159 |
|
160 |
# Conversion du fichier
|
161 |
conv_results = list(converter.convert_all([file_path], raises_on_error=False))
|
162 |
for conv_res in conv_results:
|
163 |
if conv_res.status == ConversionStatus.SUCCESS:
|
164 |
st.success(f"✅ Conversion réussie : `{conv_res.input.file}`")
|
|
|
|
|
165 |
for fmt in export_formats:
|
166 |
output_file = OUTPUT_DIR / f"{conv_res.input.file.stem}.{fmt}"
|
167 |
if fmt == "md":
|
168 |
with open(output_file, "w") as f:
|
169 |
-
f.write(
|
170 |
elif fmt == "json":
|
171 |
with open(output_file, "w", encoding="utf-8") as f:
|
172 |
json.dump(conv_res.document.export_to_dict(), f, ensure_ascii=False, indent=2)
|
173 |
elif fmt == "yaml":
|
174 |
with open(output_file, "w", encoding="utf-8") as f:
|
175 |
yaml.dump(conv_res.document.export_to_dict(), f, allow_unicode=True)
|
176 |
-
elif fmt == "multimodal":
|
177 |
-
# Simulation d'un export multimodal
|
178 |
-
multimodal_output = OUTPUT_DIR / f"{conv_res.input.file.stem}_multimodal.parquet"
|
179 |
-
st.write(f"Multimodal export simulé : `{multimodal_output}`")
|
180 |
generated_files.append(output_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
else:
|
182 |
st.error(f"❌ Échec de la conversion pour : `{conv_res.input.file}`")
|
183 |
|
@@ -192,6 +204,20 @@ if st.button("Convertir"):
|
|
192 |
content = f.read()
|
193 |
st.text_area(f"Prévisualisation : {generated_file.name}", value=content, height=200)
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
# Temps total écoulé
|
196 |
total_time = time.time() - start_time
|
197 |
st.success(f"✅ Conversion terminée en {int(total_time)} secondes !")
|
|
|
3 |
import datetime
|
4 |
from pathlib import Path
|
5 |
from typing import List
|
6 |
+
from PIL import Image
|
7 |
from PyPDF2 import PdfReader
|
8 |
import streamlit as st
|
9 |
import pandas as pd
|
|
|
122 |
if uploaded_files:
|
123 |
input_paths = []
|
124 |
generated_files = []
|
125 |
+
figures = []
|
126 |
+
tables = []
|
127 |
total_files = len(uploaded_files)
|
128 |
start_time = time.time() # Chronomètre de démarrage
|
129 |
|
|
|
158 |
status_placeholder.info(
|
159 |
f"🔄 Traitement de `{file_path.name}` ({i + 1}/{total_files})"
|
160 |
)
|
|
|
161 |
|
162 |
# Conversion du fichier
|
163 |
conv_results = list(converter.convert_all([file_path], raises_on_error=False))
|
164 |
for conv_res in conv_results:
|
165 |
if conv_res.status == ConversionStatus.SUCCESS:
|
166 |
st.success(f"✅ Conversion réussie : `{conv_res.input.file}`")
|
167 |
+
|
168 |
+
# Exporter les résultats
|
169 |
for fmt in export_formats:
|
170 |
output_file = OUTPUT_DIR / f"{conv_res.input.file.stem}.{fmt}"
|
171 |
if fmt == "md":
|
172 |
with open(output_file, "w") as f:
|
173 |
+
f.write(conv_res.document.export_to_markdown())
|
174 |
elif fmt == "json":
|
175 |
with open(output_file, "w", encoding="utf-8") as f:
|
176 |
json.dump(conv_res.document.export_to_dict(), f, ensure_ascii=False, indent=2)
|
177 |
elif fmt == "yaml":
|
178 |
with open(output_file, "w", encoding="utf-8") as f:
|
179 |
yaml.dump(conv_res.document.export_to_dict(), f, allow_unicode=True)
|
|
|
|
|
|
|
|
|
180 |
generated_files.append(output_file)
|
181 |
+
|
182 |
+
# Export des figures et tables
|
183 |
+
for element, _ in conv_res.document.iterate_items():
|
184 |
+
if isinstance(element, PictureItem):
|
185 |
+
fig_path = FIGURES_DIR / f"{conv_res.input.file.stem}_figure.png"
|
186 |
+
element.image.pil_image.save(fig_path)
|
187 |
+
figures.append(fig_path)
|
188 |
+
elif isinstance(element, TableItem):
|
189 |
+
table_path = TABLES_DIR / f"{conv_res.input.file.stem}_table.csv"
|
190 |
+
table_df = element.export_to_dataframe()
|
191 |
+
table_df.to_csv(table_path, index=False)
|
192 |
+
tables.append(table_path)
|
193 |
else:
|
194 |
st.error(f"❌ Échec de la conversion pour : `{conv_res.input.file}`")
|
195 |
|
|
|
204 |
content = f.read()
|
205 |
st.text_area(f"Prévisualisation : {generated_file.name}", value=content, height=200)
|
206 |
|
207 |
+
# Affichage des figures extraites
|
208 |
+
if figures:
|
209 |
+
st.subheader("🖼️ Figures extraites")
|
210 |
+
for fig in figures:
|
211 |
+
st.image(Image.open(fig), caption=fig.name)
|
212 |
+
|
213 |
+
# Affichage des tableaux extraits
|
214 |
+
if tables:
|
215 |
+
st.subheader("📋 Tableaux extraits")
|
216 |
+
for table in tables:
|
217 |
+
st.markdown(f"📄 **{table.name}**")
|
218 |
+
table_df = pd.read_csv(table)
|
219 |
+
st.dataframe(table_df)
|
220 |
+
|
221 |
# Temps total écoulé
|
222 |
total_time = time.time() - start_time
|
223 |
st.success(f"✅ Conversion terminée en {int(total_time)} secondes !")
|