Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Running

App Files Files Community

Vinay15 commited on Sep 29, 2024

Commit

d04c2a3

verified ·

1 Parent(s): 8434495

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -1,37 +1,51 @@
 import gradio as gr
-import torch
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-# Check GPU availability
-if torch.cuda.is_available():
-    print("CUDA is available! GPU is present.")
-    print(f"Number of GPUs: {torch.cuda.device_count()}")
-    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
-else:
-    print("CUDA is not available. Running on CPU.")
-# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-# Initialize the model
-if torch.cuda.is_available():
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    model = model.eval().cuda()
-else:
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, pad_token_id=tokenizer.eos_token_id)
-    model = model.eval()  # Keep model on CPU
 # Define the OCR function
 def perform_ocr(image):
-    # Convert PIL image to RGB format (if necessary)
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    # Perform OCR using the model
-    res = model.chat(tokenizer, image, ocr_type='ocr')
-    return res
 # Define the Gradio interface
 interface = gr.Interface(
@@ -43,4 +57,4 @@ interface = gr.Interface(
 )
 # Launch the Gradio app
-interface.launch()

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+import torch
+# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+# Try loading the model with error handling
+try:
+    model = AutoModel.from_pretrained(
+        'ucaslcl/GOT-OCR2_0',
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+        device_map='auto',  # Use 'auto' to decide whether to use CPU or GPU
+        use_safetensors=True,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    # Check if CUDA (GPU) is available, else fall back to CPU
+    if torch.cuda.is_available():
+        model = model.eval().cuda()
+        print("Model loaded on GPU.")
+    else:
+        model = model.eval().cpu()
+        print("CUDA not available, model loaded on CPU.")
+except Exception as e:
+    print(f"Error loading model: {e}")
 # Define the OCR function
 def perform_ocr(image):
+    try:
+        # Convert PIL image to RGB format (if necessary)
+        if image.mode != "RGB":
+            image = image.convert("RGB")
+        # Save the image to a temporary path
+        image_file_path = 'temp_image.jpg'
+        image.save(image_file_path)
+        # Perform OCR using the model
+        res = model.chat(tokenizer, image_file_path, ocr_type='ocr')
+        return res
+    except Exception as e:
+        return str(e)
 # Define the Gradio interface
 interface = gr.Interface(
 )
 # Launch the Gradio app
+interface.launch()