Spaces:

Arch10
/

ocr-app

Sleeping

File size: 1,490 Bytes

28fd050
 
 
 
 
 
 
 
 
aca06b8
 
 
 
28fd050
 
 
 
 
 
 
 
 
 
 
 
 
aca06b8
28fd050
aca06b8
 
 
28fd050
 
 
aca06b8
28fd050

import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
from PIL import Image

# Load the pre-trained GOT OCR 2.0 model and tokenizer
@st.cache_resource(show_spinner=True)
def load_model():
    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check for GPU, fallback to CPU
    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
    model = model.eval().to(device)  # Move the model to the appropriate device
    return tokenizer, model, device

# Streamlit interface
st.title("OCR Application using General OCR Theory (GOT) 2.0")
st.write("Upload an image to extract text using the GOT OCR 2.0 model.")

# File upload handler
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

if uploaded_file is not None:
    # Display the uploaded image
    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
    
    # Load model
    tokenizer, model, device = load_model()

    # Load the image
    image = Image.open(uploaded_file)
    image.save("temp_image.png")  # Save the uploaded image to a temporary file
    
    # Perform OCR
    with st.spinner("Extracting text..."):
        res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')

    # Display the result
    st.write("Extracted Text:")
    st.text(res)