Spaces:

Vinay15
/

OCR_and_Document_Search_Web_Application

Sleeping

File size: 1,411 Bytes

b42b1aa
8434495
f2460f7
c920662
d67cd1e
9184993
76581dc
8434495
a8781ff
 
 
8434495
dba283c
 
a8781ff
3534c83
76581dc
 
 
 
 
 
d67cd1e
 
9fca578
dba283c
 
d67cd1e
 
76581dc
d67cd1e
 
76581dc
dba283c
a8781ff
8434495
dba283c
 
 
2b3d2ae
dba283c

import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import gradio as gr
import os

# Load the OCR model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, 
                                  low_cpu_mem_usage=True, 
                                  pad_token_id=tokenizer.eos_token_id).eval()

# Ensure we are using CPU
device = torch.device('cpu')
model = model.to(device)

# Function to perform OCR on the image file
def perform_ocr(image_file_path):
    # Open the image using PIL
    image = Image.open(image_file_path)

    # Save the image temporarily
    temp_image_path = "temp_image.png"
    image.save(temp_image_path)

    # Use torch.no_grad() to avoid unnecessary memory usage
    with torch.no_grad():
        # Perform OCR using the model (pass the file path of the saved image)
        result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')

    # Clean up the temporary image file
    os.remove(temp_image_path)

    # Return the extracted text
    return result

# Create the Gradio interface for file upload and OCR
iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text", 
                     title="OCR Application", description="Upload an image to extract text.")

# Launch the Gradio app
iface.launch()