Update app.py
Browse files
app.py
CHANGED
@@ -1,25 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import pypandoc
|
3 |
import os
|
4 |
-
from pdf2docx import Converter
|
5 |
-
|
6 |
-
def ensure_pandoc_installed():
|
7 |
-
try:
|
8 |
-
# Try to access pandoc's version
|
9 |
-
pypandoc.get_pandoc_version()
|
10 |
-
print("Pandoc is already installed and accessible.")
|
11 |
-
except OSError:
|
12 |
-
# Attempt to download pandoc
|
13 |
-
print("Pandoc not found, downloading...")
|
14 |
-
pypandoc.download_pandoc()
|
15 |
-
print("Pandoc downloaded successfully.")
|
16 |
-
|
17 |
-
# Explicitly set the path to the downloaded pandoc if needed
|
18 |
-
os.environ['PATH'] += os.pathsep + '/home/user/bin'
|
19 |
-
print("Pandoc path added to system environment.")
|
20 |
-
|
21 |
-
# Pastikan Pandoc terpasang
|
22 |
-
# ensure_pandoc_installed()
|
23 |
|
24 |
# Daftar format yang didukung
|
25 |
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]) or [
|
@@ -42,24 +24,24 @@ output_supported_formats = [data.upper() for data in sorted([
|
|
42 |
"RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
|
43 |
]) if data not in ['PDF']]
|
44 |
|
45 |
-
def convert_pdf_to_docx(pdf_file):
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
|
52 |
def convert_document(doc_file, target_format):
|
53 |
try:
|
54 |
target_format = target_format.lower()
|
55 |
|
56 |
-
# If the file is a PDF, convert it to DOCX first
|
57 |
-
if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
elif hasattr(doc_file, 'name'): # If it's a file-like object
|
62 |
-
|
63 |
|
64 |
# Get the base name of the file (without extension)
|
65 |
base_name = os.path.splitext(os.path.basename(doc_file))[0]
|
|
|
1 |
import gradio as gr
|
2 |
import pypandoc
|
3 |
import os
|
4 |
+
# from pdf2docx import Converter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Daftar format yang didukung
|
7 |
input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]) or [
|
|
|
24 |
"RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
|
25 |
]) if data not in ['PDF']]
|
26 |
|
27 |
+
# def convert_pdf_to_docx(pdf_file):
|
28 |
+
# """Konversi PDF ke DOCX menggunakan pdf2docx"""
|
29 |
+
# output_docx = f"{os.path.splitext(pdf_file.name)[0]}.docx"
|
30 |
+
# cv = Converter(pdf_file.name)
|
31 |
+
# cv.convert(output_docx, start=0, end=None)
|
32 |
+
# return output_docx
|
33 |
|
34 |
def convert_document(doc_file, target_format):
|
35 |
try:
|
36 |
target_format = target_format.lower()
|
37 |
|
38 |
+
# # If the file is a PDF, convert it to DOCX first
|
39 |
+
# if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
|
40 |
+
# print("Converting PDF to DOCX...")
|
41 |
+
# doc_file = convert_pdf_to_docx(doc_file) # Pass the file path directly
|
42 |
+
# print("PDF converted to DOCX.")
|
43 |
+
# elif hasattr(doc_file, 'name'): # If it's a file-like object
|
44 |
+
doc_file = doc_file.name # Get the file path from the file-like object
|
45 |
|
46 |
# Get the base name of the file (without extension)
|
47 |
base_name = os.path.splitext(os.path.basename(doc_file))[0]
|