File size: 1,588 Bytes
b42b1aa
8434495
f2460f7
c920662
d67cd1e
9184993
b94f0a2
 
 
 
 
 
a8781ff
 
8434495
4f6d4b8
dba283c
a8781ff
3534c83
4f6d4b8
76581dc
 
 
 
 
d67cd1e
 
9fca578
dba283c
 
4f6d4b8
d67cd1e
76581dc
d67cd1e
 
76581dc
dba283c
a8781ff
8434495
4f6d4b8
dba283c
 
2b3d2ae
dba283c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import gradio as gr
import os

# Specify the revision ID you want to pin to
revision_id = "your_revision_id_here"  # Replace with the actual revision ID

# Load the OCR model and tokenizer with pinned revision
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', revision=revision_id, trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', revision=revision_id, trust_remote_code=True, 
                                  low_cpu_mem_usage=True, 
                                  pad_token_id=tokenizer.eos_token_id).eval()

# Move model to CPU
device = torch.device('cpu')
model = model.to(device)

# Function to perform OCR on an image file
def perform_ocr(image_file_path):
    # Open the image using PIL
    image = Image.open(image_file_path)

    # Save the image temporarily
    temp_image_path = "temp_image.png"
    image.save(temp_image_path)

    # Use torch.no_grad() to avoid unnecessary memory usage
    with torch.no_grad():
        # Perform OCR using the model on CPU (pass the file path of the saved image)
        result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')

    # Clean up the temporary image file
    os.remove(temp_image_path)

    # Return the extracted text
    return result

# Gradio interface for file upload and OCR
iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text", 
                     title="OCR Application", description="Upload an image to extract text.")

# Launch the Gradio app
iface.launch()