Spaces:

Arch10
/

ocr-app

Sleeping

ocr-app / app.py

Update app.py

aca06b8 verified 9 months ago

1.49 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModel
	import torch
	from PIL import Image

	# Load the pre-trained GOT OCR 2.0 model and tokenizer
	@st.cache_resource(show_spinner=True)
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Check for GPU, fallback to CPU
	model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
	model = model.eval().to(device) # Move the model to the appropriate device
	return tokenizer, model, device

	# Streamlit interface
	st.title("OCR Application using General OCR Theory (GOT) 2.0")
	st.write("Upload an image to extract text using the GOT OCR 2.0 model.")

	# File upload handler
	uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])

	if uploaded_file is not None:
	# Display the uploaded image
	st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)

	# Load model
	tokenizer, model, device = load_model()

	# Load the image
	image = Image.open(uploaded_file)
	image.save("temp_image.png") # Save the uploaded image to a temporary file

	# Perform OCR
	with st.spinner("Extracting text..."):
	res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')

	# Display the result
	st.write("Extracted Text:")
	st.text(res)