RicardoDataScience36 commited on
Commit
74fd6a0
·
verified ·
1 Parent(s): b0c6b7a

Upload 4 files

Browse files

Carga del proyecto

Files changed (4) hide show
  1. DOCLING GRADIO.png +0 -0
  2. README.md +27 -14
  3. app.py +207 -0
  4. requirements.txt +2 -0
DOCLING GRADIO.png ADDED
README.md CHANGED
@@ -1,14 +1,27 @@
1
- ---
2
- title: DoclingDocumentConverter
3
- emoji: 📈
4
- colorFrom: pink
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.10.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: Transform your documents into Markdown or JSON format with e
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docling - Conversor de Documentos (Gradio)
2
+
3
+ Esta aplicación Gradio permite convertir documentos a formato Markdown o JSON utilizando la librería `docling`.
4
+
5
+ ## Funcionalidades
6
+
7
+ - Convertir documentos DOC, DOCX, PDF y TXT a Markdown o JSON.
8
+ - Mostrar la información del documento convertido en la interfaz.
9
+ - Descargar el documento convertido en un archivo.
10
+
11
+ ## Cómo usar
12
+
13
+ 1. Ejecuta el archivo `app.py`.
14
+ 2. La aplicación se abrirá en tu navegador web.
15
+ 3. Sube el archivo que deseas convertir utilizando el botón "Upload Your Document".
16
+ 4. Selecciona el formato de salida (Markdown o JSON).
17
+ 5. Haz clic en el botón "Convert Document" para iniciar la conversión.
18
+ 6. Una vez completada la conversión, se mostrará el contenido convertido y la metadata del documento.
19
+ 7. Puedes descargar el documento convertido utilizando el botón "Download Converted File".
20
+
21
+ ## Dependencias
22
+
23
+ - gradio
24
+ - docling
25
+ - json
26
+ - tempfile
27
+ - os
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from docling.document_converter import DocumentConverter
4
+ import json
5
+ import tempfile
6
+ import os
7
+
8
+ def convert_document(file, output_format):
9
+ """
10
+ Converts a document to Markdown or JSON format using Docling.
11
+ Args:
12
+ file: Uploaded file to convert.
13
+ output_format: Desired output format (Markdown or JSON).
14
+ Returns:
15
+ Tuple containing the converted text, metadata, and downloadable file.
16
+ """
17
+ try:
18
+ # Initialize the converter and load the document
19
+ converter = DocumentConverter()
20
+ result = converter.convert(file.name)
21
+
22
+ # Create temporary file for download
23
+ temp_dir = tempfile.gettempdir()
24
+
25
+ if output_format == "Markdown":
26
+ converted_text = result.document.export_to_markdown()
27
+ file_extension = ".md"
28
+ else:
29
+ converted_text = result.document.export_to_json()
30
+ file_extension = ".json"
31
+
32
+ # Create output file
33
+ output_filename = os.path.splitext(os.path.basename(file.name))[0] + file_extension
34
+ output_path = os.path.join(temp_dir, output_filename)
35
+
36
+ # Write content to file
37
+ with open(output_path, 'w', encoding='utf-8') as f:
38
+ f.write(converted_text)
39
+
40
+ metadata = {
41
+ "Filename": file.name,
42
+ "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB",
43
+ "Output Format": output_format,
44
+ "Conversion Status": "Success"
45
+ }
46
+
47
+ return (
48
+ converted_text,
49
+ metadata,
50
+ output_path,
51
+ gr.update(visible=True),
52
+ "✅ Document converted successfully!"
53
+ )
54
+
55
+ except Exception as e:
56
+ error_metadata = {
57
+ "Error": str(e),
58
+ "Status": "Failed"
59
+ }
60
+ return (
61
+ "",
62
+ error_metadata,
63
+ None,
64
+ gr.update(visible=False),
65
+ "❌ Error during conversion"
66
+ )
67
+
68
+ # Custom CSS
69
+ custom_css = """
70
+ :root {
71
+ --primary-color: #2563eb;
72
+ --secondary-color: #1e40af;
73
+ --background-color: #1e1e1e;
74
+ --card-background: #262626;
75
+ --text-color: #ffffff;
76
+ --border-radius: 10px;
77
+ }
78
+
79
+ body {
80
+ background-color: var(--background-color);
81
+ color: var(--text-color);
82
+ }
83
+
84
+ .container {
85
+ max-width: 1200px;
86
+ margin: 0 auto;
87
+ padding: 2rem;
88
+ }
89
+
90
+ .gr-button {
91
+ background: var(--primary-color) !important;
92
+ border: none !important;
93
+ color: white !important;
94
+ padding: 10px 20px !important;
95
+ border-radius: var(--border-radius) !important;
96
+ transition: all 0.3s ease !important;
97
+ }
98
+
99
+ .gr-button:hover {
100
+ background: var(--secondary-color) !important;
101
+ transform: translateY(-2px);
102
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
103
+ }
104
+
105
+ .gr-form {
106
+ background-color: var(--card-background);
107
+ padding: 2rem;
108
+ border-radius: var(--border-radius);
109
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
110
+ }
111
+
112
+ .gr-input, .gr-textbox {
113
+ background-color: #333333 !important;
114
+ border: 1px solid #404040 !important;
115
+ color: var(--text-color) !important;
116
+ border-radius: var(--border-radius) !important;
117
+ }
118
+
119
+ .gr-padded {
120
+ padding: 1rem;
121
+ }
122
+
123
+ .gr-header {
124
+ margin-bottom: 2rem;
125
+ text-align: center;
126
+ }
127
+
128
+ .gr-subtitle {
129
+ color: #9ca3af;
130
+ font-size: 1.1rem;
131
+ margin-bottom: 1.5rem;
132
+ }
133
+ """
134
+
135
+ # Create Gradio interface
136
+ with gr.Blocks(css=custom_css) as app:
137
+ gr.HTML(
138
+ """
139
+ <div class="gr-header">
140
+ <h1 style='font-size: 2.5rem; color: #2563eb; margin-bottom: 1rem;'>📄 Docling Document Converter</h1>
141
+ <p class="gr-subtitle">Transform your documents into Markdown or JSON format with ease</p>
142
+ </div>
143
+ """
144
+ )
145
+
146
+ with gr.Row():
147
+ with gr.Column(scale=1):
148
+ with gr.Group():
149
+ gr.Markdown("### Input Settings")
150
+ file_input = gr.File(
151
+ label="Upload Your Document",
152
+ file_types=[".doc", ".docx", ".pdf", ".txt"],
153
+ elem_classes="gr-input"
154
+ )
155
+ format_input = gr.Radio(
156
+ choices=["Markdown", "JSON"],
157
+ label="Output Format",
158
+ value="Markdown",
159
+ elem_classes="gr-input"
160
+ )
161
+ convert_button = gr.Button(
162
+ "🔄 Convert Document",
163
+ variant="primary",
164
+ elem_classes=["gr-button"]
165
+ )
166
+
167
+ status_message = gr.Textbox(
168
+ label="Status",
169
+ interactive=False,
170
+ visible=False,
171
+ elem_classes="gr-padded"
172
+ )
173
+
174
+ with gr.Column(scale=2):
175
+ with gr.Group():
176
+ gr.Markdown("### Conversion Output")
177
+ output_text = gr.Textbox(
178
+ label="Converted Content",
179
+ placeholder="The converted text will appear here...",
180
+ lines=15,
181
+ elem_classes="gr-textbox"
182
+ )
183
+ output_metadata = gr.JSON(
184
+ label="Document Metadata",
185
+ elem_classes="gr-input"
186
+ )
187
+ download_button = gr.File(
188
+ label="Download Converted File",
189
+ visible=False,
190
+ elem_classes="gr-padded"
191
+ )
192
+
193
+ # Event handlers
194
+ convert_button.click(
195
+ fn=convert_document,
196
+ inputs=[file_input, format_input],
197
+ outputs=[
198
+ output_text,
199
+ output_metadata,
200
+ download_button,
201
+ download_button,
202
+ status_message
203
+ ]
204
+ )
205
+
206
+ # Launch the app with share=True
207
+ app.launch(debug=True, share=True)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ docling