RicardoDataScience36 commited on
Commit
e4e1bf2
·
verified ·
1 Parent(s): 341f109

Update app.py

Browse files

He actualizado el archivo app.py para incluir la funcionalidad de llama-index-readers-docling como un método de conversión adicional.

Files changed (1) hide show
  1. app.py +42 -59
app.py CHANGED
@@ -1,71 +1,60 @@
1
-
2
  import gradio as gr
3
  from docling.document_converter import DocumentConverter
 
4
  import json
5
  import tempfile
6
  import os
7
 
8
- def convert_document(file, output_format):
9
- """
10
- Converts a document to Markdown or JSON format using Docling.
11
- Args:
12
- file: Uploaded file to convert.
13
- output_format: Desired output format (Markdown or JSON).
14
- Returns:
15
- Tuple containing the converted text, metadata, and downloadable file.
16
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  try:
18
- # Initialize the converter and load the document
19
  converter = DocumentConverter()
20
  result = converter.convert(file.name)
21
-
22
- # Create temporary file for download
23
  temp_dir = tempfile.gettempdir()
24
-
25
  if output_format == "Markdown":
26
  converted_text = result.document.export_to_markdown()
27
  file_extension = ".md"
28
  else:
29
  converted_text = result.document.export_to_json()
30
  file_extension = ".json"
31
-
32
- # Create output file
33
  output_filename = os.path.splitext(os.path.basename(file.name))[0] + file_extension
34
  output_path = os.path.join(temp_dir, output_filename)
35
-
36
- # Write content to file
37
  with open(output_path, 'w', encoding='utf-8') as f:
38
  f.write(converted_text)
39
-
40
  metadata = {
41
  "Filename": file.name,
42
  "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB",
43
  "Output Format": output_format,
44
- "Conversion Status": "Success"
 
45
  }
46
-
47
- return (
48
- converted_text,
49
- metadata,
50
- output_path,
51
- gr.update(visible=True),
52
- "✅ Document converted successfully!"
53
- )
54
-
55
  except Exception as e:
56
- error_metadata = {
57
- "Error": str(e),
58
- "Status": "Failed"
59
- }
60
- return (
61
- "",
62
- error_metadata,
63
- None,
64
- gr.update(visible=False),
65
- "❌ Error during conversion"
66
- )
67
-
68
- # Custom CSS
69
  custom_css = """
70
  :root {
71
  --primary-color: #2563eb;
@@ -132,7 +121,6 @@ body {
132
  }
133
  """
134
 
135
- # Create Gradio interface
136
  with gr.Blocks(css=custom_css) as app:
137
  gr.HTML(
138
  """
@@ -142,7 +130,6 @@ with gr.Blocks(css=custom_css) as app:
142
  </div>
143
  """
144
  )
145
-
146
  with gr.Row():
147
  with gr.Column(scale=1):
148
  with gr.Group():
@@ -158,19 +145,23 @@ with gr.Blocks(css=custom_css) as app:
158
  value="Markdown",
159
  elem_classes="gr-input"
160
  )
 
 
 
 
 
 
161
  convert_button = gr.Button(
162
  "🔄 Convert Document",
163
  variant="primary",
164
  elem_classes=["gr-button"]
165
  )
166
-
167
  status_message = gr.Textbox(
168
  label="Status",
169
  interactive=False,
170
  visible=False,
171
  elem_classes="gr-padded"
172
  )
173
-
174
  with gr.Column(scale=2):
175
  with gr.Group():
176
  gr.Markdown("### Conversion Output")
@@ -189,19 +180,11 @@ with gr.Blocks(css=custom_css) as app:
189
  visible=False,
190
  elem_classes="gr-padded"
191
  )
192
-
193
- # Event handlers
194
  convert_button.click(
195
- fn=convert_document,
196
- inputs=[file_input, format_input],
197
- outputs=[
198
- output_text,
199
- output_metadata,
200
- download_button,
201
- download_button,
202
- status_message
203
- ]
204
  )
205
 
206
- # Launch the app with share=True
207
- app.launch(debug=True, share=True)
 
 
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
+ from llama_index.readers.docling import DoclingReader
4
  import json
5
  import tempfile
6
  import os
7
 
8
+ def convert_document_docling(file, output_format):
9
+ try:
10
+ reader = DoclingReader(export_type=DoclingReader.ExportType.JSON if output_format == "JSON" else DoclingReader.ExportType.MARKDOWN)
11
+ docs = reader.load_data(file_path=file.name)
12
+ converted_text = docs[0].text
13
+ temp_dir = tempfile.gettempdir()
14
+ output_filename = os.path.splitext(os.path.basename(file.name))[0] + (".json" if output_format == "JSON" else ".md")
15
+ output_path = os.path.join(temp_dir, output_filename)
16
+ with open(output_path, 'w', encoding='utf-8') as f:
17
+ f.write(converted_text)
18
+ metadata = {
19
+ "Filename": file.name,
20
+ "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB",
21
+ "Output Format": output_format,
22
+ "Conversion Status": "Success",
23
+ "Method": "llama-index-readers-docling"
24
+ }
25
+ return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!"
26
+ except Exception as e:
27
+ error_metadata = {"Error": str(e), "Status": "Failed"}
28
+ return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion"
29
+
30
+ def convert_document_original(file, output_format):
31
  try:
 
32
  converter = DocumentConverter()
33
  result = converter.convert(file.name)
 
 
34
  temp_dir = tempfile.gettempdir()
 
35
  if output_format == "Markdown":
36
  converted_text = result.document.export_to_markdown()
37
  file_extension = ".md"
38
  else:
39
  converted_text = result.document.export_to_json()
40
  file_extension = ".json"
 
 
41
  output_filename = os.path.splitext(os.path.basename(file.name))[0] + file_extension
42
  output_path = os.path.join(temp_dir, output_filename)
 
 
43
  with open(output_path, 'w', encoding='utf-8') as f:
44
  f.write(converted_text)
 
45
  metadata = {
46
  "Filename": file.name,
47
  "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB",
48
  "Output Format": output_format,
49
+ "Conversion Status": "Success",
50
+ "Method": "docling"
51
  }
52
+ return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!"
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
+ error_metadata = {"Error": str(e), "Status": "Failed"}
55
+ return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion"
56
+
57
+
 
 
 
 
 
 
 
 
 
58
  custom_css = """
59
  :root {
60
  --primary-color: #2563eb;
 
121
  }
122
  """
123
 
 
124
  with gr.Blocks(css=custom_css) as app:
125
  gr.HTML(
126
  """
 
130
  </div>
131
  """
132
  )
 
133
  with gr.Row():
134
  with gr.Column(scale=1):
135
  with gr.Group():
 
145
  value="Markdown",
146
  elem_classes="gr-input"
147
  )
148
+ method_input = gr.Radio(
149
+ choices=["docling", "llama-index-readers-docling"],
150
+ label="Conversion Method",
151
+ value="docling",
152
+ elem_classes="gr-input"
153
+ )
154
  convert_button = gr.Button(
155
  "🔄 Convert Document",
156
  variant="primary",
157
  elem_classes=["gr-button"]
158
  )
 
159
  status_message = gr.Textbox(
160
  label="Status",
161
  interactive=False,
162
  visible=False,
163
  elem_classes="gr-padded"
164
  )
 
165
  with gr.Column(scale=2):
166
  with gr.Group():
167
  gr.Markdown("### Conversion Output")
 
180
  visible=False,
181
  elem_classes="gr-padded"
182
  )
183
+
 
184
  convert_button.click(
185
+ fn=lambda file, format, method: convert_document_docling(file, format) if method == "llama-index-readers-docling" else convert_document_original(file, format),
186
+ inputs=[file_input, format_input, method_input],
187
+ outputs=[output_text, output_metadata, download_button, download_button, status_message]
 
 
 
 
 
 
188
  )
189
 
190
+ app.launch(debug=True, share=True)