# Streamlit app for extracting text from an image using the General OCR Theory (GOT) 2.0 model
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
from PIL import Image
import requests

# Load the pre-trained GOT OCR 2.0 model and tokenizer
@st.cache_resource(show_spinner=True)
def load_model():
    tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
    return tokenizer, model.eval().cuda()

# Streamlit interface
st.title("OCR Application using General OCR Theory (GOT) 2.0")
st.write("Upload an image to extract text using the GOT OCR 2.0 model.")

# File upload handler
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

if uploaded_file is not None:
    # Display the uploaded image
    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
    
    # Load model
    tokenizer, model = load_model()

    # Load the image into the model
    with open(uploaded_file.name, 'wb') as f:
        f.write(uploaded_file.getbuffer())
    
    image_file = uploaded_file.name

    # Perform OCR
    with st.spinner("Extracting text..."):
        res = model.chat(tokenizer, image_file, ocr_type='ocr')

    # Display the result
    st.write("Extracted Text:")
    st.text(res)