ocr-app / app.py
Arch10's picture
Update app.py
aca06b8 verified
raw
history blame
1.49 kB
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
from PIL import Image
# Load the pre-trained GOT OCR 2.0 model and tokenizer
@st.cache_resource(show_spinner=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Check for GPU, fallback to CPU
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
model = model.eval().to(device) # Move the model to the appropriate device
return tokenizer, model, device
# Streamlit interface
st.title("OCR Application using General OCR Theory (GOT) 2.0")
st.write("Upload an image to extract text using the GOT OCR 2.0 model.")
# File upload handler
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Load model
tokenizer, model, device = load_model()
# Load the image
image = Image.open(uploaded_file)
image.save("temp_image.png") # Save the uploaded image to a temporary file
# Perform OCR
with st.spinner("Extracting text..."):
res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')
# Display the result
st.write("Extracted Text:")
st.text(res)