Spaces:
Sleeping
Sleeping
import os | |
os.system('pip install torch torchvision') | |
os.system('pip install git+https://github.com/facebookresearch/detectron2.git') | |
os.system('sudo apt-get install tesseract-ocr') | |
os.system('pip install -q pytesseract') | |
import gradio as gr | |
import torch | |
from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering, pipeline | |
from PIL import Image | |
model_checkpoint = "pacman2223/test-mod" | |
image = Image.open("./sample_cv.png") | |
image.save("cv.png") | |
image = Image.open("./sample_hack.png") | |
image.save("hack.png") | |
def demo_process(img, question): | |
processor = AutoProcessor.from_pretrained(model_checkpoint) | |
model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint) | |
with torch.no_grad(): | |
encoding = processor(img, question, return_tensors="pt") | |
outputs = model(**encoding) | |
start_logits = outputs.start_logits | |
end_logits = outputs.end_logits | |
predicted_start_idx = start_logits.argmax(-1).item() | |
predicted_end_idx = end_logits.argmax(-1).item() | |
processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]) | |
predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1] | |
predicted_answer = processor.tokenizer.decode(predicted_answer_tokens) | |
return predicted_answer | |
# qa_pipeline = pipeline("document-question-answering", model="pacman2223/test-mod") | |
# qa_pipeline(img, question) | |
# return qa_pipeline | |
demo = gr.Interface( | |
fn=demo_process, | |
inputs=["image", "text"], | |
outputs="json", | |
title=f"BIP demonstration for `layoutlmv2` task", | |
description="""This model is trained with 1200 receipt images of Docqva dataset. <br>""", | |
examples=[["cv.png", "What are the relevant courses?"], ["hack.png", "When does the hackathon end?"]], | |
cache_examples=False, | |
) | |
demo.launch() | |