richardprobe commited on
Commit
97442a6
·
verified ·
1 Parent(s): 7e3d531

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -1,9 +1,21 @@
 
 
1
  import gradio as gr
2
- import time
3
 
4
- def replace(text):
5
- return text.replace('World', 'Databricks')
 
 
 
 
 
 
 
6
 
7
- gr.Interface(fn=replace,
8
- inputs='textbox',
9
- outputs='textbox').launch(share=True);
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import pytesseract
3
  import gradio as gr
4
+ from PIL import Image
5
 
6
+ def pdf_to_text(pdf_file):
7
+ doc = fitz.open(pdf_file)
8
+ text = ""
9
+ for page in doc:
10
+ pix = page.get_pixmap()
11
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
12
+ text += pytesseract.image_to_string(img)
13
+ doc.close()
14
+ return text
15
 
16
+ def pdf_to_text_interface(pdf_file):
17
+ text = pdf_to_text(pdf_file)
18
+ return text
19
+
20
+ iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter")
21
+ iface.launch()