Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,26 @@
|
|
1 |
import gradio as gr
|
2 |
import pypandoc
|
3 |
import os
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
try: pypandoc.get_pandoc_version()
|
7 |
-
except OSError: pypandoc.download_pandoc()
|
8 |
-
|
9 |
# Daftar format yang didukung
|
10 |
-
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]) or [
|
11 |
'BIBLATEX', 'BIBTEX', 'BITS', 'COMMONMARK', 'COMMONMARK_X', 'CREOLE', 'CSLJSON', 'CSV',
|
12 |
'DJOT', 'DOCBOOK', 'DOCX', 'DOKUWIKI', 'ENDNOTEXML', 'EPUB', 'FB2', 'GFM', 'HADDOCK',
|
13 |
'HTML', 'IPYNB', 'JATS', 'JIRA', 'JSON', 'LATEX', 'MAN', 'MARKDOWN', 'MARKDOWN_GITHUB',
|
@@ -25,26 +38,26 @@ output_supported_formats = [data.upper() for data in sorted([
|
|
25 |
"MARKDOWN_MMD", "MARKDOWN_PHPEXTRA", "MARKDOWN_STRICT", "MARKUA", "MEDIAWIKI", "MS",
|
26 |
"MUSE", "NATIVE", "ODT", "OPENDOCUMENT", "OPML", "ORG", "PDF", "PLAIN", "PPTX", "REVEALJS",
|
27 |
"RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
|
28 |
-
])
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
|
37 |
def convert_document(doc_file, target_format):
|
38 |
try:
|
39 |
target_format = target_format.lower()
|
40 |
|
41 |
-
#
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
|
49 |
# Get the base name of the file (without extension)
|
50 |
base_name = os.path.splitext(os.path.basename(doc_file))[0]
|
@@ -56,7 +69,10 @@ def convert_document(doc_file, target_format):
|
|
56 |
pypandoc.convert_file(
|
57 |
doc_file,
|
58 |
target_format.lower(), # Convert the format to lowercase
|
59 |
-
outputfile=output_file
|
|
|
|
|
|
|
60 |
)
|
61 |
|
62 |
return output_file
|
@@ -74,4 +90,9 @@ interface = gr.Interface(
|
|
74 |
title="Document Format Converter",
|
75 |
description="Upload a document and select any target format for conversion.",
|
76 |
css="footer {visibility: hidden}"
|
77 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import pypandoc
|
3 |
import os
|
4 |
+
from pdf2docx import Converter
|
5 |
+
|
6 |
+
os.system('sudo apt-get install texlive')
|
7 |
+
|
8 |
+
def ensure_pandoc_installed():
|
9 |
+
try:
|
10 |
+
# Periksa apakah pandoc sudah ada
|
11 |
+
pypandoc.get_pandoc_version()
|
12 |
+
print("Pandoc is already installed and accessible.")
|
13 |
+
except OSError:
|
14 |
+
# Unduh pandoc jika belum ada
|
15 |
+
print("Pandoc not found, downloading...")
|
16 |
+
pypandoc.download_pandoc()
|
17 |
+
print("Pandoc downloaded successfully.")
|
18 |
+
|
19 |
+
# Pastikan Pandoc terpasang
|
20 |
+
ensure_pandoc_installed()
|
21 |
|
|
|
|
|
|
|
22 |
# Daftar format yang didukung
|
23 |
+
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]).append('PDF') or [
|
24 |
'BIBLATEX', 'BIBTEX', 'BITS', 'COMMONMARK', 'COMMONMARK_X', 'CREOLE', 'CSLJSON', 'CSV',
|
25 |
'DJOT', 'DOCBOOK', 'DOCX', 'DOKUWIKI', 'ENDNOTEXML', 'EPUB', 'FB2', 'GFM', 'HADDOCK',
|
26 |
'HTML', 'IPYNB', 'JATS', 'JIRA', 'JSON', 'LATEX', 'MAN', 'MARKDOWN', 'MARKDOWN_GITHUB',
|
|
|
38 |
"MARKDOWN_MMD", "MARKDOWN_PHPEXTRA", "MARKDOWN_STRICT", "MARKUA", "MEDIAWIKI", "MS",
|
39 |
"MUSE", "NATIVE", "ODT", "OPENDOCUMENT", "OPML", "ORG", "PDF", "PLAIN", "PPTX", "REVEALJS",
|
40 |
"RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
|
41 |
+
])]
|
42 |
|
43 |
+
def convert_pdf_to_docx(pdf_file):
|
44 |
+
"""Konversi PDF ke DOCX menggunakan pdf2docx"""
|
45 |
+
output_docx = f"{os.path.splitext(pdf_file.name)[0]}.docx"
|
46 |
+
cv = Converter(pdf_file.name)
|
47 |
+
cv.convert(output_docx, start=0, end=None)
|
48 |
+
return output_docx
|
49 |
|
50 |
def convert_document(doc_file, target_format):
|
51 |
try:
|
52 |
target_format = target_format.lower()
|
53 |
|
54 |
+
# If the file is a PDF, convert it to DOCX first
|
55 |
+
if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
|
56 |
+
print("Converting PDF to DOCX...")
|
57 |
+
doc_file = convert_pdf_to_docx(doc_file) # Pass the file path directly
|
58 |
+
print("PDF converted to DOCX.")
|
59 |
+
elif hasattr(doc_file, 'name'): # If it's a file-like object
|
60 |
+
doc_file = doc_file.name # Get the file path from the file-like object
|
61 |
|
62 |
# Get the base name of the file (without extension)
|
63 |
base_name = os.path.splitext(os.path.basename(doc_file))[0]
|
|
|
69 |
pypandoc.convert_file(
|
70 |
doc_file,
|
71 |
target_format.lower(), # Convert the format to lowercase
|
72 |
+
outputfile=output_file,
|
73 |
+
extra_args=['-V geometry:margin=1.5cm',
|
74 |
+
# '--pdf-engine=/usr/bin/xelatex',
|
75 |
+
'--metadata', 'title="Converted Document by Flowly AI"']
|
76 |
)
|
77 |
|
78 |
return output_file
|
|
|
90 |
title="Document Format Converter",
|
91 |
description="Upload a document and select any target format for conversion.",
|
92 |
css="footer {visibility: hidden}"
|
93 |
+
)
|
94 |
+
|
95 |
+
# Jalankan aplikasi
|
96 |
+
if __name__ == "__main__":
|
97 |
+
interface.launch()
|
98 |
+
|