|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModel |
|
import torch |
|
from PIL import Image |
|
|
|
|
|
@st.cache_resource(show_spinner=True) |
|
def load_model(): |
|
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True) |
|
model = model.eval().to(device) |
|
return tokenizer, model, device |
|
|
|
|
|
st.title("OCR Application using General OCR Theory (GOT) 2.0") |
|
st.write("Upload an image to extract text using the GOT OCR 2.0 model.") |
|
|
|
|
|
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
tokenizer, model, device = load_model() |
|
|
|
|
|
image = Image.open(uploaded_file) |
|
image.save("temp_image.png") |
|
|
|
|
|
with st.spinner("Extracting text..."): |
|
res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr') |
|
|
|
|
|
st.write("Extracted Text:") |
|
st.text(res) |
|
|