File size: 576 Bytes
97442a6
 
cb206a1
97442a6
cb206a1
97442a6
 
 
 
 
 
 
 
 
cb206a1
97442a6
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import fitz  # PyMuPDF
import pytesseract
import gradio as gr
from PIL import Image

def pdf_to_text(pdf_file):
    doc = fitz.open(pdf_file)
    text = ""
    for page in doc:
        pix = page.get_pixmap()
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        text += pytesseract.image_to_string(img)
    doc.close()
    return text

def pdf_to_text_interface(pdf_file):
    text = pdf_to_text(pdf_file)
    return text

iface = gr.Interface(fn=pdf_to_text_interface, inputs="file", outputs="text", title="PDF to Text Converter")
iface.launch()