# Streamlit app for extracting text from an image using the General OCR Theory (GOT) 2.0 model import streamlit as st from transformers import AutoTokenizer, AutoModel import torch from PIL import Image import requests # Load the pre-trained GOT OCR 2.0 model and tokenizer @st.cache_resource(show_spinner=True) def load_model(): tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id) return tokenizer, model.eval().cuda() # Streamlit interface st.title("OCR Application using General OCR Theory (GOT) 2.0") st.write("Upload an image to extract text using the GOT OCR 2.0 model.") # File upload handler uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"]) if uploaded_file is not None: # Display the uploaded image st.image(uploaded_file, caption="Uploaded Image", use_column_width=True) # Load model tokenizer, model = load_model() # Load the image into the model with open(uploaded_file.name, 'wb') as f: f.write(uploaded_file.getbuffer()) image_file = uploaded_file.name # Perform OCR with st.spinner("Extracting text..."): res = model.chat(tokenizer, image_file, ocr_type='ocr') # Display the result st.write("Extracted Text:") st.text(res)