File size: 4,049 Bytes
73bb23a
 
 
3d40c3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73bb23a
 
3d40c3e
73bb23a
 
 
 
 
 
9ff72c7
73bb23a
 
 
 
 
 
 
 
 
 
9ff72c7
73bb23a
3d40c3e
 
 
 
 
 
73bb23a
 
 
 
 
3d40c3e
 
 
 
 
 
 
73bb23a
 
 
 
 
ff51773
73bb23a
 
 
 
 
3d40c3e
73bb23a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d40c3e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
import pypandoc
import os
from pdf2docx import Converter

os.system('sudo apt-get install texlive')

def ensure_pandoc_installed():
    try:
        # Periksa apakah pandoc sudah ada
        pypandoc.get_pandoc_version()
        print("Pandoc is already installed and accessible.")
    except OSError:
        # Unduh pandoc jika belum ada
        print("Pandoc not found, downloading...")
        pypandoc.download_pandoc()
        print("Pandoc downloaded successfully.")

# Pastikan Pandoc terpasang
ensure_pandoc_installed()

# Daftar format yang didukung
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]).append('PDF') or [
    'BIBLATEX', 'BIBTEX', 'BITS', 'COMMONMARK', 'COMMONMARK_X', 'CREOLE', 'CSLJSON', 'CSV',
    'DJOT', 'DOCBOOK', 'DOCX', 'DOKUWIKI', 'ENDNOTEXML', 'EPUB', 'FB2', 'GFM', 'HADDOCK',
    'HTML', 'IPYNB', 'JATS', 'JIRA', 'JSON', 'LATEX', 'MAN', 'MARKDOWN', 'MARKDOWN_GITHUB',
    'MARKDOWN_MMD', 'MARKDOWN_PHPEXTRA', 'MARKDOWN_STRICT', 'MDOC', 'MEDIAWIKI', 'MUSE',
    'NATIVE', 'ODT', 'OPML', 'ORG', 'PDF', 'POD', 'RIS', 'RST', 'RTF', 'T2T', 'TEXTILE',
    'TIKIWIKI', 'TSV', 'TWIKI', 'TYPST', 'VIMWIKI'
]) if data not in ['PDF']]

output_supported_formats = [data.upper() for data in sorted([
    "ANSI", "ASCIIDOC", "ASCIIDOC_LEGACY", "ASCIIDOCTOR", "BEAMER", "BIBLATEX", "BIBTEX", "CHUNKEDHTML", 
    "COMMONMARK", "COMMONMARK_X", "CONTEXT", "CSLJSON", "DJOT", "DOCBOOK", "DOCBOOK4", "DOCBOOK5", 
    "DOCX", "DOKUWIKI", "DZSLIDES", "EPUB", "EPUB2", "EPUB3", "FB2", "GFM", "HADDOCK", "HTML", 
    "HTML4", "HTML5", "ICML", "IPYNB", "JATS", "JATS_ARCHIVING", "JATS_ARTICLEAUTHORING", 
    "JATS_PUBLISHING", "JIRA", "JSON", "LATEX", "MAN", "MARKDOWN", "MARKDOWN_GITHUB", 
    "MARKDOWN_MMD", "MARKDOWN_PHPEXTRA", "MARKDOWN_STRICT", "MARKUA", "MEDIAWIKI", "MS", 
    "MUSE", "NATIVE", "ODT", "OPENDOCUMENT", "OPML", "ORG", "PDF", "PLAIN", "PPTX", "REVEALJS", 
    "RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
]) if data not in ['PDF']]

def convert_pdf_to_docx(pdf_file):
    """Konversi PDF ke DOCX menggunakan pdf2docx"""
    output_docx = f"{os.path.splitext(pdf_file.name)[0]}.docx"
    cv = Converter(pdf_file.name)
    cv.convert(output_docx, start=0, end=None)
    return output_docx

def convert_document(doc_file, target_format):
    try:
        target_format = target_format.lower()
        
        # If the file is a PDF, convert it to DOCX first
        if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
            print("Converting PDF to DOCX...")
            doc_file = convert_pdf_to_docx(doc_file)  # Pass the file path directly
            print("PDF converted to DOCX.")
        elif hasattr(doc_file, 'name'):  # If it's a file-like object
            doc_file = doc_file.name  # Get the file path from the file-like object

        # Get the base name of the file (without extension)
        base_name = os.path.splitext(os.path.basename(doc_file))[0]

        # Output file name
        output_file = f"flowly_ai_document_converter_{base_name}.{target_format.lower()}"

        # Use pypandoc to convert the file
        pypandoc.convert_file(
            doc_file, 
            target_format.lower(),  # Convert the format to lowercase
            outputfile=output_file,
        )

        return output_file
    except Exception as e:
        return f"Error: {e}"

# Antarmuka Gradio dengan tema kustom
interface = gr.Interface(
    fn=convert_document,
    inputs=[
        gr.File(label=f"Upload Document", file_types=[f'.{ext.lower()}' for ext in input_supported_formats]),
        gr.Dropdown(label="Select Output Format", choices=output_supported_formats)
    ],
    outputs=gr.File(label="Converted Document"),
    title="Document Format Converter",
    description="Upload a document and select any target format for conversion.",
    css="footer {visibility: hidden}"
)

# Jalankan aplikasi
if __name__ == "__main__":
    interface.launch()