Spaces:

DeepDiveDev
/

OCR

Runtime error

App Files Files Community

DeepDiveDev commited on Sep 30, 2024

Commit

df3681e

verified ·

1 Parent(s): 4a2ef6d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -17

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import streamlit as st
 from transformers import AutoModel, AutoTokenizer, MarianMTModel, MarianTokenizer
 from PIL import Image
@@ -8,8 +9,8 @@ import re
 import torch
 # Check if GPU is available, else default to CPU
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-st.write(f"Using device: {device.upper()}")
 # Load EasyOCR reader with English and Hindi language support
 reader = easyocr.Reader(['en', 'hi'])  # 'en' for English, 'hi' for Hindi
@@ -17,17 +18,14 @@ reader = easyocr.Reader(['en', 'hi'])  # 'en' for English, 'hi' for Hindi
 # Load the GOT-OCR2 model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('stepfun-ai/GOT-OCR2_0', trust_remote_code=True)
-# Load the model with low memory usage on CPU or auto-map for GPU if available
 model = AutoModel.from_pretrained(
     'stepfun-ai/GOT-OCR2_0',
     trust_remote_code=True,
     low_cpu_mem_usage=True,
-    device_map='auto' if device == 'cuda' else None,  # Use GPU if available, else None
     use_safetensors=True,
     pad_token_id=tokenizer.eos_token_id
 )
-# Move model to appropriate device (GPU or CPU)
 model = model.to(device)
 model = model.eval()
@@ -61,14 +59,12 @@ if image_file is not None:
     # Button to run OCR
     if st.button("Run OCR"):
         # Ensure model runs on CPU if GPU isn't available
-        if device == 'cuda':
-            res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
-        else:
-            with torch.no_grad():  # Disable gradient calculations to save memory on CPU
-                res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
         # Perform formatted text OCR
-        res_format = model.chat(tokenizer, temp_file_path, ocr_type='format')
         # Use EasyOCR for both English and Hindi text recognition
         result_easyocr = reader.readtext(temp_file_path, detail=0)
@@ -98,12 +94,12 @@ if image_file is not None:
         st.write(" ".join(translated_text))
         # Additional OCR types using GOT-OCR2
-        res_fine_grained = model.chat(tokenizer, temp_file_path, ocr_type='ocr', ocr_box='')
         st.subheader("Fine-Grained OCR Results:")
         st.write(res_fine_grained)
         # Render formatted OCR to HTML
-        res_render = model.chat(tokenizer, temp_file_path, ocr_type='format', render=True, save_render_file='./demo.html')
         st.subheader("Rendered OCR Results (HTML):")
         st.write(res_render)
@@ -118,6 +114,3 @@ if image_file is not None:
         # Clean up the temporary file after use
         os.remove(temp_file_path)
-# Note: No need for if __name__ == "__main__": st.run()

 import streamlit as st
 from transformers import AutoModel, AutoTokenizer, MarianMTModel, MarianTokenizer
 from PIL import Image
 import torch
 # Check if GPU is available, else default to CPU
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+st.write(f"Using device: {device}")
 # Load EasyOCR reader with English and Hindi language support
 reader = easyocr.Reader(['en', 'hi'])  # 'en' for English, 'hi' for Hindi
 # Load the GOT-OCR2 model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('stepfun-ai/GOT-OCR2_0', trust_remote_code=True)
+# Load the model and move it to the correct device (GPU if available, else CPU)
 model = AutoModel.from_pretrained(
     'stepfun-ai/GOT-OCR2_0',
     trust_remote_code=True,
     low_cpu_mem_usage=True,
     use_safetensors=True,
     pad_token_id=tokenizer.eos_token_id
 )
 model = model.to(device)
 model = model.eval()
     # Button to run OCR
     if st.button("Run OCR"):
         # Ensure model runs on CPU if GPU isn't available
+        with torch.no_grad():  # Disable gradient calculations to save memory on CPU
+            # Replace .cuda() with device handling based on CPU/GPU availability
+            res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr', device=device)
         # Perform formatted text OCR
+        res_format = model.chat(tokenizer, temp_file_path, ocr_type='format', device=device)
         # Use EasyOCR for both English and Hindi text recognition
         result_easyocr = reader.readtext(temp_file_path, detail=0)
         st.write(" ".join(translated_text))
         # Additional OCR types using GOT-OCR2
+        res_fine_grained = model.chat(tokenizer, temp_file_path, ocr_type='ocr', ocr_box='', device=device)
         st.subheader("Fine-Grained OCR Results:")
         st.write(res_fine_grained)
         # Render formatted OCR to HTML
+        res_render = model.chat(tokenizer, temp_file_path, ocr_type='format', render=True, save_render_file='./demo.html', device=device)
         st.subheader("Rendered OCR Results (HTML):")
         st.write(res_render)
         # Clean up the temporary file after use
         os.remove(temp_file_path)