Adityadn commited on
Commit
5f9d940
·
verified ·
1 Parent(s): 6a5a660

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -32
app.py CHANGED
@@ -1,25 +1,7 @@
1
  import gradio as gr
2
  import pypandoc
3
  import os
4
- from pdf2docx import Converter
5
-
6
- def ensure_pandoc_installed():
7
- try:
8
- # Try to access pandoc's version
9
- pypandoc.get_pandoc_version()
10
- print("Pandoc is already installed and accessible.")
11
- except OSError:
12
- # Attempt to download pandoc
13
- print("Pandoc not found, downloading...")
14
- pypandoc.download_pandoc()
15
- print("Pandoc downloaded successfully.")
16
-
17
- # Explicitly set the path to the downloaded pandoc if needed
18
- os.environ['PATH'] += os.pathsep + '/home/user/bin'
19
- print("Pandoc path added to system environment.")
20
-
21
- # Pastikan Pandoc terpasang
22
- # ensure_pandoc_installed()
23
 
24
  # Daftar format yang didukung
25
  input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]) or [
@@ -42,24 +24,24 @@ output_supported_formats = [data.upper() for data in sorted([
42
  "RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
43
  ]) if data not in ['PDF']]
44
 
45
- def convert_pdf_to_docx(pdf_file):
46
- """Konversi PDF ke DOCX menggunakan pdf2docx"""
47
- output_docx = f"{os.path.splitext(pdf_file.name)[0]}.docx"
48
- cv = Converter(pdf_file.name)
49
- cv.convert(output_docx, start=0, end=None)
50
- return output_docx
51
 
52
  def convert_document(doc_file, target_format):
53
  try:
54
  target_format = target_format.lower()
55
 
56
- # If the file is a PDF, convert it to DOCX first
57
- if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
58
- print("Converting PDF to DOCX...")
59
- doc_file = convert_pdf_to_docx(doc_file) # Pass the file path directly
60
- print("PDF converted to DOCX.")
61
- elif hasattr(doc_file, 'name'): # If it's a file-like object
62
- doc_file = doc_file.name # Get the file path from the file-like object
63
 
64
  # Get the base name of the file (without extension)
65
  base_name = os.path.splitext(os.path.basename(doc_file))[0]
 
1
  import gradio as gr
2
  import pypandoc
3
  import os
4
+ # from pdf2docx import Converter
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Daftar format yang didukung
7
  input_supported_formats = [data.upper() for data in sorted(list(pypandoc.get_pandoc_formats()[0]) or [
 
24
  "RST", "RTF", "S5", "SLIDEOUS", "SLIDY", "TEI", "TEXINFO", "TEXTILE", "TYPST", "XWIKI", "ZIMWIKI"
25
  ]) if data not in ['PDF']]
26
 
27
+ # def convert_pdf_to_docx(pdf_file):
28
+ # """Konversi PDF ke DOCX menggunakan pdf2docx"""
29
+ # output_docx = f"{os.path.splitext(pdf_file.name)[0]}.docx"
30
+ # cv = Converter(pdf_file.name)
31
+ # cv.convert(output_docx, start=0, end=None)
32
+ # return output_docx
33
 
34
  def convert_document(doc_file, target_format):
35
  try:
36
  target_format = target_format.lower()
37
 
38
+ # # If the file is a PDF, convert it to DOCX first
39
+ # if isinstance(doc_file, str) and doc_file.lower().endswith('.pdf'):
40
+ # print("Converting PDF to DOCX...")
41
+ # doc_file = convert_pdf_to_docx(doc_file) # Pass the file path directly
42
+ # print("PDF converted to DOCX.")
43
+ # elif hasattr(doc_file, 'name'): # If it's a file-like object
44
+ doc_file = doc_file.name # Get the file path from the file-like object
45
 
46
  # Get the base name of the file (without extension)
47
  base_name = os.path.splitext(os.path.basename(doc_file))[0]