Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Running

Vinay15 commited on Sep 30, 2024

Commit

4f6d4b8

verified ·

1 Parent(s): f627171

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,17 +4,17 @@ from PIL import Image
 import gradio as gr
 import os
-# Load the OCR model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
                                   low_cpu_mem_usage=True,
                                   pad_token_id=tokenizer.eos_token_id).eval()
-# Ensure everything is on CPU
 device = torch.device('cpu')
 model = model.to(device)
-# Function to perform OCR on the image file
 def perform_ocr(image_file_path):
     # Open the image using PIL
     image = Image.open(image_file_path)
@@ -25,7 +25,7 @@ def perform_ocr(image_file_path):
     # Use torch.no_grad() to avoid unnecessary memory usage
     with torch.no_grad():
-        # Perform OCR using the model (pass the file path of the saved image)
         result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
     # Clean up the temporary image file
@@ -34,7 +34,7 @@ def perform_ocr(image_file_path):
     # Return the extracted text
     return result
-# Create the Gradio interface for file upload and OCR
 iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text",
                      title="OCR Application", description="Upload an image to extract text.")

 import gradio as gr
 import os
+# Load the OCR model and tokenizer, trust_remote_code=True allows custom model logic
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
                                   low_cpu_mem_usage=True,
                                   pad_token_id=tokenizer.eos_token_id).eval()
+# Move model to CPU
 device = torch.device('cpu')
 model = model.to(device)
+# Function to perform OCR on an image file
 def perform_ocr(image_file_path):
     # Open the image using PIL
     image = Image.open(image_file_path)
     # Use torch.no_grad() to avoid unnecessary memory usage
     with torch.no_grad():
+        # Perform OCR using the model on CPU (pass the file path of the saved image)
         result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
     # Clean up the temporary image file
     # Return the extracted text
     return result
+# Gradio interface for file upload and OCR
 iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text",
                      title="OCR Application", description="Upload an image to extract text.")