Spaces:
Runtime error
Runtime error
import gradio as gr | |
from pdf2image import convert_from_path | |
from paddleocr import PaddleOCR | |
from numpy import asarray | |
import gradio as gr | |
from gradio_pdf import PDF | |
from pdf2image import convert_from_path | |
from transformers import pipeline | |
from pathlib import Path | |
ocr = PaddleOCR(use_angle_cls=True, lang="ch", ocr_version="PP-OCRv3") | |
def p(image,question): | |
result = ocr.ocr(asarray(image), cls=True) | |
ocr_text = " ".join([line[1][0] for line in result[0]]) | |
return ocr_text | |
# up_file="/home/raj/Downloads/ICBC Aviation Leasing Company Limited_ND2A_220808.pdf" | |
# images = convert_from_path(up_file, fmt="jpeg") | |
# output = "" | |
# for idx, image in enumerate(images, start=1): | |
# # result = reader.readtext(image, detail = 0) | |
# # ocr_text = " ".join(result) | |
# result = ocr.ocr(asarray(image), cls=True) | |
# ocr_text = " ".join([line[1][0] for line in result[0]]) | |
# new_prompt = f""" | |
# {ocr_text} | |
# Above is OCR'ed text from a form PDF file. | |
# List out all the form key value which have data. Don't include fields that are empty. | |
# """ | |
# # llm_output = llm(prompt=new_prompt) | |
# # output += f"Page {idx}\n" | |
# # output += llm_output | |
# output += "\n\n " | |
# output=new_prompt | |
# print(output) | |
dir_ = Path(__file__).parent | |
# p = pipeline( | |
# "document-question-answering", | |
# model="impira/layoutlm-document-qa", | |
# ) | |
def qa(question: str, doc: str) -> str: | |
output="" | |
img = convert_from_path(doc) | |
for i in range(len(img)): | |
result=p(img[i], question) | |
output+="\n"+result | |
return output | |
demo = gr.Interface( | |
qa, | |
[gr.Textbox(label="Question"), PDF(label="Document")], | |
gr.Textbox(), | |
) | |
demo.launch() | |
# def greet(name): | |
# return "Hello " + name + "!!" | |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
# iface.launch() | |