Artemis-IA commited on
Commit
fec6a83
·
verified ·
1 Parent(s): 9c04880

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -6
app.py CHANGED
@@ -3,6 +3,7 @@ import time
3
  import datetime
4
  from pathlib import Path
5
  from typing import List
 
6
  from PyPDF2 import PdfReader
7
  import streamlit as st
8
  import pandas as pd
@@ -121,6 +122,8 @@ if st.button("Convertir"):
121
  if uploaded_files:
122
  input_paths = []
123
  generated_files = []
 
 
124
  total_files = len(uploaded_files)
125
  start_time = time.time() # Chronomètre de démarrage
126
 
@@ -155,29 +158,38 @@ if st.button("Convertir"):
155
  status_placeholder.info(
156
  f"🔄 Traitement de `{file_path.name}` ({i + 1}/{total_files})"
157
  )
158
- file_start_time = time.time()
159
 
160
  # Conversion du fichier
161
  conv_results = list(converter.convert_all([file_path], raises_on_error=False))
162
  for conv_res in conv_results:
163
  if conv_res.status == ConversionStatus.SUCCESS:
164
  st.success(f"✅ Conversion réussie : `{conv_res.input.file}`")
 
 
165
  for fmt in export_formats:
166
  output_file = OUTPUT_DIR / f"{conv_res.input.file.stem}.{fmt}"
167
  if fmt == "md":
168
  with open(output_file, "w") as f:
169
- f.write(f"## Contenu converti pour `{conv_res.input.file}`")
170
  elif fmt == "json":
171
  with open(output_file, "w", encoding="utf-8") as f:
172
  json.dump(conv_res.document.export_to_dict(), f, ensure_ascii=False, indent=2)
173
  elif fmt == "yaml":
174
  with open(output_file, "w", encoding="utf-8") as f:
175
  yaml.dump(conv_res.document.export_to_dict(), f, allow_unicode=True)
176
- elif fmt == "multimodal":
177
- # Simulation d'un export multimodal
178
- multimodal_output = OUTPUT_DIR / f"{conv_res.input.file.stem}_multimodal.parquet"
179
- st.write(f"Multimodal export simulé : `{multimodal_output}`")
180
  generated_files.append(output_file)
 
 
 
 
 
 
 
 
 
 
 
 
181
  else:
182
  st.error(f"❌ Échec de la conversion pour : `{conv_res.input.file}`")
183
 
@@ -192,6 +204,20 @@ if st.button("Convertir"):
192
  content = f.read()
193
  st.text_area(f"Prévisualisation : {generated_file.name}", value=content, height=200)
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  # Temps total écoulé
196
  total_time = time.time() - start_time
197
  st.success(f"✅ Conversion terminée en {int(total_time)} secondes !")
 
3
  import datetime
4
  from pathlib import Path
5
  from typing import List
6
+ from PIL import Image
7
  from PyPDF2 import PdfReader
8
  import streamlit as st
9
  import pandas as pd
 
122
  if uploaded_files:
123
  input_paths = []
124
  generated_files = []
125
+ figures = []
126
+ tables = []
127
  total_files = len(uploaded_files)
128
  start_time = time.time() # Chronomètre de démarrage
129
 
 
158
  status_placeholder.info(
159
  f"🔄 Traitement de `{file_path.name}` ({i + 1}/{total_files})"
160
  )
 
161
 
162
  # Conversion du fichier
163
  conv_results = list(converter.convert_all([file_path], raises_on_error=False))
164
  for conv_res in conv_results:
165
  if conv_res.status == ConversionStatus.SUCCESS:
166
  st.success(f"✅ Conversion réussie : `{conv_res.input.file}`")
167
+
168
+ # Exporter les résultats
169
  for fmt in export_formats:
170
  output_file = OUTPUT_DIR / f"{conv_res.input.file.stem}.{fmt}"
171
  if fmt == "md":
172
  with open(output_file, "w") as f:
173
+ f.write(conv_res.document.export_to_markdown())
174
  elif fmt == "json":
175
  with open(output_file, "w", encoding="utf-8") as f:
176
  json.dump(conv_res.document.export_to_dict(), f, ensure_ascii=False, indent=2)
177
  elif fmt == "yaml":
178
  with open(output_file, "w", encoding="utf-8") as f:
179
  yaml.dump(conv_res.document.export_to_dict(), f, allow_unicode=True)
 
 
 
 
180
  generated_files.append(output_file)
181
+
182
+ # Export des figures et tables
183
+ for element, _ in conv_res.document.iterate_items():
184
+ if isinstance(element, PictureItem):
185
+ fig_path = FIGURES_DIR / f"{conv_res.input.file.stem}_figure.png"
186
+ element.image.pil_image.save(fig_path)
187
+ figures.append(fig_path)
188
+ elif isinstance(element, TableItem):
189
+ table_path = TABLES_DIR / f"{conv_res.input.file.stem}_table.csv"
190
+ table_df = element.export_to_dataframe()
191
+ table_df.to_csv(table_path, index=False)
192
+ tables.append(table_path)
193
  else:
194
  st.error(f"❌ Échec de la conversion pour : `{conv_res.input.file}`")
195
 
 
204
  content = f.read()
205
  st.text_area(f"Prévisualisation : {generated_file.name}", value=content, height=200)
206
 
207
+ # Affichage des figures extraites
208
+ if figures:
209
+ st.subheader("🖼️ Figures extraites")
210
+ for fig in figures:
211
+ st.image(Image.open(fig), caption=fig.name)
212
+
213
+ # Affichage des tableaux extraits
214
+ if tables:
215
+ st.subheader("📋 Tableaux extraits")
216
+ for table in tables:
217
+ st.markdown(f"📄 **{table.name}**")
218
+ table_df = pd.read_csv(table)
219
+ st.dataframe(table_df)
220
+
221
  # Temps total écoulé
222
  total_time = time.time() - start_time
223
  st.success(f"✅ Conversion terminée en {int(total_time)} secondes !")