Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

File size: 1,219 Bytes

b42b1aa
8434495
f2460f7
c920662
9184993
dba283c
8434495
a8781ff
 
 
8434495
dba283c
 
a8781ff
3534c83
a8781ff
c920662
dba283c
 
9fca578
dba283c
 
 
 
 
 
a8781ff
8434495
dba283c
 
 
2b3d2ae
dba283c

import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import gradio as gr

# Load the OCR model and tokenizer with low memory usage in mind
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, 
                                  low_cpu_mem_usage=True, 
                                  pad_token_id=tokenizer.eos_token_id).eval()

# Ensure we are using CPU
device = torch.device('cpu')
model = model.to(device)

# Function to perform OCR on the image
def perform_ocr(image):
    # Open the image using PIL
    pil_image = Image.open(image)

    # Use torch.no_grad() to avoid unnecessary memory usage
    with torch.no_grad():
        # Perform OCR using the model (image passed as PIL image)
        result = model.chat(tokenizer, pil_image, ocr_type='ocr')
    
    # Return the extracted text
    return result

# Create the Gradio interface for file upload and OCR
iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text", 
                     title="OCR Application", description="Upload an image to extract text.")

# Launch the Gradio app
iface.launch()