File size: 930 Bytes
23460f1
42908a1
23460f1
 
f7a5ebf
 
e00cdff
5a3dc06
 
 
23460f1
 
5a3dc06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
os.system('python-poppler')
os.system('pip install paddlepaddle>=2.0.1')
# os.system('pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html')
os.system('pip install paddleocr')
import gradio as gr
from numpy import asarray
from pdf2image import convert_from_path
from paddleocr import PaddleOCR
from gradio_pdf import PDF



ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3") 

def p(image,question):
    result = ocr.ocr(asarray(image), cls=True)
    ocr_text = " ".join([line[1][0] for line in result[0]])

    return ocr_text


def qa(question: str, doc: str) -> str:
    output=""
    img = convert_from_path(doc)
    for i in range(len(img)):
        result=p(img[i], question)
        output+="\n"+result
    return output

 
demo = gr.Interface(
    qa,
    [gr.Textbox(label="Question"), PDF(label="Document")],
    gr.Textbox(),
)

demo.launch()