|
|
|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModel |
|
import torch |
|
from PIL import Image |
|
import requests |
|
|
|
|
|
@st.cache_resource(show_spinner=True) |
|
def load_model(): |
|
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) |
|
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id) |
|
return tokenizer, model.eval().cuda() |
|
|
|
|
|
st.title("OCR Application using General OCR Theory (GOT) 2.0") |
|
st.write("Upload an image to extract text using the GOT OCR 2.0 model.") |
|
|
|
|
|
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
tokenizer, model = load_model() |
|
|
|
|
|
with open(uploaded_file.name, 'wb') as f: |
|
f.write(uploaded_file.getbuffer()) |
|
|
|
image_file = uploaded_file.name |
|
|
|
|
|
with st.spinner("Extracting text..."): |
|
res = model.chat(tokenizer, image_file, ocr_type='ocr') |
|
|
|
|
|
st.write("Extracted Text:") |
|
st.text(res) |
|
|