Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,31 @@
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
3 |
import spaces
|
|
|
|
|
|
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def convert_document(file, method):
|
7 |
if method == "Docling":
|
8 |
-
|
9 |
-
|
10 |
-
result
|
11 |
-
|
12 |
-
# Check available attributes in DoclingDocument
|
13 |
-
available_attributes = dir(result.document)
|
14 |
-
document = result.document
|
15 |
-
|
16 |
-
|
17 |
-
# Output
|
18 |
-
converted_text = result.document.export_to_markdown()
|
19 |
-
|
20 |
-
return converted_text
|
21 |
elif method == "Marker":
|
22 |
-
return
|
23 |
else:
|
24 |
return 'unknown method'
|
25 |
|
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
3 |
import spaces
|
4 |
+
from marker.converters.pdf import PdfConverter
|
5 |
+
from marker.models import create_model_dict
|
6 |
+
from marker.config.parser import ConfigParser
|
7 |
|
8 |
+
# Docling
|
9 |
+
docling_converter = DocumentConverter()
|
10 |
+
|
11 |
+
# Marker
|
12 |
+
config_parser = ConfigParser({
|
13 |
+
"output_format": "json"
|
14 |
+
})
|
15 |
+
marker_converter = PdfConverter(
|
16 |
+
config=config_parser.generate_config_dict(),
|
17 |
+
artifact_dict=create_model_dict(),
|
18 |
+
processor_list=config_parser.get_processors(),
|
19 |
+
renderer=config_parser.get_renderer()
|
20 |
+
)
|
21 |
|
22 |
def convert_document(file, method):
|
23 |
if method == "Docling":
|
24 |
+
result = docling_converter.convert(file.name)
|
25 |
+
|
26 |
+
return result.document.export_to_markdown()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
elif method == "Marker":
|
28 |
+
return marker_converter(file.name)
|
29 |
else:
|
30 |
return 'unknown method'
|
31 |
|