Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

App Files Files Community

Vinay15 commited on Sep 29, 2024

Commit

de0d96a

verified ·

1 Parent(s): c35e395

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -38

app.py CHANGED Viewed

@@ -1,51 +1,26 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-import torch
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-# Try loading the model with error handling
-try:
-    model = AutoModel.from_pretrained(
-        'ucaslcl/GOT-OCR2_0',
-        trust_remote_code=True,
-        low_cpu_mem_usage=True,
-        device_map='auto',  # Use 'auto' to decide whether to use CPU or GPU
-        use_safetensors=True,
-        pad_token_id=tokenizer.eos_token_id
-    )
-    # Check if CUDA (GPU) is available, else fall back to CPU
-    if torch.cuda.is_available():
-        model = model.eval().cuda()
-        print("Model loaded on GPU.")
-    else:
-        model = model.eval().cpu()
-        print("CUDA not available, model loaded on CPU.")
-except Exception as e:
-    print(f"Error loading model: {e}")
 # Define the OCR function
 def perform_ocr(image):
-    try:
-        # Convert PIL image to RGB format (if necessary)
-        if image.mode != "RGB":
-            image = image.convert("RGB")
-        # Save the image to a temporary path
-        image_file_path = 'temp_image.jpg'
-        image.save(image_file_path)
-        # Perform OCR using the model
-        res = model.chat(tokenizer, image_file_path, ocr_type='ocr')
-        return res
-    except Exception as e:
-        return str(e)
 # Define the Gradio interface
 interface = gr.Interface(
@@ -57,4 +32,4 @@ interface = gr.Interface(
 )
 # Launch the Gradio app
-interface.launch()

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+model = model.eval()  # Remove .cuda() to run on CPU
 # Define the OCR function
 def perform_ocr(image):
+    # Convert PIL image to RGB format (if necessary)
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    # Save the image to a temporary path
+    image_file_path = 'temp_image.jpg'
+    image.save(image_file_path)
+    # Perform OCR using the model
+    res = model.chat(tokenizer, image_file_path, ocr_type='ocr')
+    return res
 # Define the Gradio interface
 interface = gr.Interface(
 )
 # Launch the Gradio app
+interface.launch()