Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

Vinay15 commited on Sep 29, 2024

Commit

66ae2fc

verified ·

1 Parent(s): f7acb22

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,26 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-import torch
-# Check if CUDA is available
 if torch.cuda.is_available():
     print("CUDA is available! GPU is present.")
 else:
     print("CUDA is not available. Running on CPU.")
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model = model.eval().cuda() if torch.cuda.is_available() else model.eval()
 # Define the OCR function
 def perform_ocr(image):
@@ -20,12 +28,8 @@ def perform_ocr(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
-    # Save the image to a temporary file to pass to the model
-    temp_image_path = "temp_image.png"
-    image.save(temp_image_path)
     # Perform OCR using the model
-    res = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
     return res

 import gradio as gr
+import torch
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+# Check GPU availability
 if torch.cuda.is_available():
     print("CUDA is available! GPU is present.")
+    print(f"Number of GPUs: {torch.cuda.device_count()}")
+    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
 else:
     print("CUDA is not available. Running on CPU.")
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+# Initialize the model
+if torch.cuda.is_available():
+    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+    model = model.eval().cuda()
+else:
+    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, pad_token_id=tokenizer.eos_token_id)
+    model = model.eval()  # Keep model on CPU
 # Define the OCR function
 def perform_ocr(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
     # Perform OCR using the model
+    res = model.chat(tokenizer, image, ocr_type='ocr')
     return res