Spaces:

DeepDiveDev
/

OCR

Runtime error

App Files Files Community

DeepDiveDev commited on Sep 30, 2024

Commit

8551568

verified ·

1 Parent(s): 650c4d6

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -4

app.py CHANGED Viewed

@@ -5,14 +5,31 @@ import tempfile
 import os
 import easyocr
 import re
 # Load EasyOCR reader with English and Hindi language support
 reader = easyocr.Reader(['en', 'hi'])  # 'en' for English, 'hi' for Hindi
 # Load the GOT-OCR2 model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('stepfun-ai/GOT-OCR2_0', trust_remote_code=True)
-model = AutoModel.from_pretrained('stepfun-ai/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model = model.eval().cuda()
 # Load MarianMT translation model for Hindi to English translation
 translation_tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-hi-en')
@@ -43,8 +60,12 @@ if image_file is not None:
     # Button to run OCR
     if st.button("Run OCR"):
-        # Use GOT-OCR2 model for plain text OCR (structured documents)
-        res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
         # Perform formatted text OCR
         res_format = model.chat(tokenizer, temp_file_path, ocr_type='format')
@@ -99,3 +120,4 @@ if image_file is not None:
         os.remove(temp_file_path)
 # Note: No need for if __name__ == "__main__": st.run()

 import os
 import easyocr
 import re
+import torch
+# Check if GPU is available, else default to CPU
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+st.write(f"Using device: {device.upper()}")
 # Load EasyOCR reader with English and Hindi language support
 reader = easyocr.Reader(['en', 'hi'])  # 'en' for English, 'hi' for Hindi
 # Load the GOT-OCR2 model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('stepfun-ai/GOT-OCR2_0', trust_remote_code=True)
+# Load the model with low memory usage on CPU or auto-map for GPU if available
+model = AutoModel.from_pretrained(
+    'stepfun-ai/GOT-OCR2_0',
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    device_map='auto' if device == 'cuda' else None,  # Use GPU if available, else None
+    use_safetensors=True,
+    pad_token_id=tokenizer.eos_token_id
+)
+# Move model to appropriate device (GPU or CPU)
+model = model.to(device)
+model = model.eval()
 # Load MarianMT translation model for Hindi to English translation
 translation_tokenizer = MarianTokenizer.from_pretrained('Helsinki-NLP/opus-mt-hi-en')
     # Button to run OCR
     if st.button("Run OCR"):
+        # Ensure model runs on CPU if GPU isn't available
+        if device == 'cuda':
+            res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
+        else:
+            with torch.no_grad():  # Disable gradient calculations to save memory on CPU
+                res_plain = model.chat(tokenizer, temp_file_path, ocr_type='ocr')
         # Perform formatted text OCR
         res_format = model.chat(tokenizer, temp_file_path, ocr_type='format')
         os.remove(temp_file_path)
 # Note: No need for if __name__ == "__main__": st.run()