Spaces:

Arch10
/

ocr-app

Sleeping

ocr-app / app.py

Create app.py

28fd050 verified 7 months ago

1.48 kB

	# Streamlit app for extracting text from an image using the General OCR Theory (GOT) 2.0 model
	import streamlit as st
	from transformers import AutoTokenizer, AutoModel
	import torch
	from PIL import Image
	import requests

	# Load the pre-trained GOT OCR 2.0 model and tokenizer
	@st.cache_resource(show_spinner=True)
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
	model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
	return tokenizer, model.eval().cuda()

	# Streamlit interface
	st.title("OCR Application using General OCR Theory (GOT) 2.0")
	st.write("Upload an image to extract text using the GOT OCR 2.0 model.")

	# File upload handler
	uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

	if uploaded_file is not None:
	# Display the uploaded image
	st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)

	# Load model
	tokenizer, model = load_model()

	# Load the image into the model
	with open(uploaded_file.name, 'wb') as f:
	f.write(uploaded_file.getbuffer())

	image_file = uploaded_file.name

	# Perform OCR
	with st.spinner("Extracting text..."):
	res = model.chat(tokenizer, image_file, ocr_type='ocr')

	# Display the result
	st.write("Extracted Text:")
	st.text(res)