Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

Vinay15 commited on Sep 29, 2024

Commit

c96e388

verified ·

1 Parent(s): 802d2bc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,26 +1,18 @@
 import gradio as gr
-import torch
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-# Check GPU availability
 if torch.cuda.is_available():
     print("CUDA is available! GPU is present.")
-    print(f"Number of GPUs: {torch.cuda.device_count()}")
-    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
 else:
     print("CUDA is not available. Running on CPU.")
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-# Initialize the model
-if torch.cuda.is_available():
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-    model = model.eval().cuda()
-else:
-    model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, pad_token_id=tokenizer.eos_token_id)
-    model = model.eval()  # Keep model on CPU
 # Define the OCR function
 def perform_ocr(image):
@@ -28,8 +20,12 @@ def perform_ocr(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
     # Perform OCR using the model
-    res = model.chat(tokenizer, image, ocr_type='ocr')
     return res

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+import torch
+# Check if CUDA is available
 if torch.cuda.is_available():
     print("CUDA is available! GPU is present.")
 else:
     print("CUDA is not available. Running on CPU.")
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+model = model.eval().cuda() if torch.cuda.is_available() else model.eval()
 # Define the OCR function
 def perform_ocr(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
+    # Save the image to a temporary file to pass to the model
+    temp_image_path = "temp_image.png"
+    image.save(temp_image_path)
     # Perform OCR using the model
+    res = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
     return res