ocr-app / app.py
Arch10's picture
Create app.py
28fd050 verified
raw
history blame
1.48 kB
# Streamlit app for extracting text from an image using the General OCR Theory (GOT) 2.0 model
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
from PIL import Image
import requests
# Load the pre-trained GOT OCR 2.0 model and tokenizer
@st.cache_resource(show_spinner=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
return tokenizer, model.eval().cuda()
# Streamlit interface
st.title("OCR Application using General OCR Theory (GOT) 2.0")
st.write("Upload an image to extract text using the GOT OCR 2.0 model.")
# File upload handler
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Load model
tokenizer, model = load_model()
# Load the image into the model
with open(uploaded_file.name, 'wb') as f:
f.write(uploaded_file.getbuffer())
image_file = uploaded_file.name
# Perform OCR
with st.spinner("Extracting text..."):
res = model.chat(tokenizer, image_file, ocr_type='ocr')
# Display the result
st.write("Extracted Text:")
st.text(res)