Spaces:

Arch10
/

ocr-app

Sleeping

Arch10 commited on Sep 26, 2024

Commit

aca06b8

verified ·

1 Parent(s): a7b5cf9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,16 +1,16 @@
-# Streamlit app for extracting text from an image using the General OCR Theory (GOT) 2.0 model
 import streamlit as st
 from transformers import AutoTokenizer, AutoModel
 import torch
 from PIL import Image
-import requests
 # Load the pre-trained GOT OCR 2.0 model and tokenizer
 @st.cache_resource(show_spinner=True)
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    return tokenizer, model.eval().cuda()
 # Streamlit interface
 st.title("OCR Application using General OCR Theory (GOT) 2.0")
@@ -24,17 +24,15 @@ if uploaded_file is not None:
     st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
     # Load model
-    tokenizer, model = load_model()
-    # Load the image into the model
-    with open(uploaded_file.name, 'wb') as f:
-        f.write(uploaded_file.getbuffer())
-    image_file = uploaded_file.name
     # Perform OCR
     with st.spinner("Extracting text..."):
-        res = model.chat(tokenizer, image_file, ocr_type='ocr')
     # Display the result
     st.write("Extracted Text:")

 import streamlit as st
 from transformers import AutoTokenizer, AutoModel
 import torch
 from PIL import Image
 # Load the pre-trained GOT OCR 2.0 model and tokenizer
 @st.cache_resource(show_spinner=True)
 def load_model():
     tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Check for GPU, fallback to CPU
+    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True)
+    model = model.eval().to(device)  # Move the model to the appropriate device
+    return tokenizer, model, device
 # Streamlit interface
 st.title("OCR Application using General OCR Theory (GOT) 2.0")
     st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
     # Load model
+    tokenizer, model, device = load_model()
+    # Load the image
+    image = Image.open(uploaded_file)
+    image.save("temp_image.png")  # Save the uploaded image to a temporary file
     # Perform OCR
     with st.spinner("Extracting text..."):
+        res = model.chat(tokenizer, "temp_image.png", ocr_type='ocr')
     # Display the result
     st.write("Extracted Text:")