Raj-Master's picture
Update app.py
e00cdff verified
raw
history blame contribute delete
930 Bytes
import os
os.system('python-poppler')
os.system('pip install paddlepaddle>=2.0.1')
# os.system('pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html')
os.system('pip install paddleocr')
import gradio as gr
from numpy import asarray
from pdf2image import convert_from_path
from paddleocr import PaddleOCR
from gradio_pdf import PDF
ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3")
def p(image,question):
result = ocr.ocr(asarray(image), cls=True)
ocr_text = " ".join([line[1][0] for line in result[0]])
return ocr_text
def qa(question: str, doc: str) -> str:
output=""
img = convert_from_path(doc)
for i in range(len(img)):
result=p(img[i], question)
output+="\n"+result
return output
demo = gr.Interface(
qa,
[gr.Textbox(label="Question"), PDF(label="Document")],
gr.Textbox(),
)
demo.launch()