BIPv1 / app.py
pacman2223's picture
Update app.py
5ee98ed verified
import os
os.system('pip install torch torchvision')
os.system('pip install git+https://github.com/facebookresearch/detectron2.git')
os.system('sudo apt-get install tesseract-ocr')
os.system('pip install -q pytesseract')
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering, pipeline
from PIL import Image
model_checkpoint = "pacman2223/test-mod"
image = Image.open("./sample_cv.png")
image.save("cv.png")
image = Image.open("./sample_hack.png")
image.save("hack.png")
def demo_process(img, question):
processor = AutoProcessor.from_pretrained(model_checkpoint)
model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)
with torch.no_grad():
encoding = processor(img, question, return_tensors="pt")
outputs = model(**encoding)
start_logits = outputs.start_logits
end_logits = outputs.end_logits
predicted_start_idx = start_logits.argmax(-1).item()
predicted_end_idx = end_logits.argmax(-1).item()
processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1])
predicted_answer_tokens = encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + 1]
predicted_answer = processor.tokenizer.decode(predicted_answer_tokens)
return predicted_answer
# qa_pipeline = pipeline("document-question-answering", model="pacman2223/test-mod")
# qa_pipeline(img, question)
# return qa_pipeline
demo = gr.Interface(
fn=demo_process,
inputs=["image", "text"],
outputs="json",
title=f"BIP demonstration for `layoutlmv2` task",
description="""This model is trained with 1200 receipt images of Docqva dataset. <br>""",
examples=[["cv.png", "What are the relevant courses?"], ["hack.png", "When does the hackathon end?"]],
cache_examples=False,
)
demo.launch()