import os os.system('python-poppler') os.system('pip install paddlepaddle>=2.0.1') # os.system('pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html') os.system('pip install paddleocr') import gradio as gr from numpy import asarray from pdf2image import convert_from_path from paddleocr import PaddleOCR from gradio_pdf import PDF ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3") def p(image,question): result = ocr.ocr(asarray(image), cls=True) ocr_text = " ".join([line[1][0] for line in result[0]]) return ocr_text def qa(question: str, doc: str) -> str: output="" img = convert_from_path(doc) for i in range(len(img)): result=p(img[i], question) output+="\n"+result return output demo = gr.Interface( qa, [gr.Textbox(label="Question"), PDF(label="Document")], gr.Textbox(), ) demo.launch()