File size: 1,588 Bytes
b42b1aa 8434495 f2460f7 c920662 d67cd1e 9184993 b94f0a2 a8781ff 8434495 4f6d4b8 dba283c a8781ff 3534c83 4f6d4b8 76581dc d67cd1e 9fca578 dba283c 4f6d4b8 d67cd1e 76581dc d67cd1e 76581dc dba283c a8781ff 8434495 4f6d4b8 dba283c 2b3d2ae dba283c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import gradio as gr
import os
# Specify the revision ID you want to pin to
revision_id = "your_revision_id_here" # Replace with the actual revision ID
# Load the OCR model and tokenizer with pinned revision
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', revision=revision_id, trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', revision=revision_id, trust_remote_code=True,
low_cpu_mem_usage=True,
pad_token_id=tokenizer.eos_token_id).eval()
# Move model to CPU
device = torch.device('cpu')
model = model.to(device)
# Function to perform OCR on an image file
def perform_ocr(image_file_path):
# Open the image using PIL
image = Image.open(image_file_path)
# Save the image temporarily
temp_image_path = "temp_image.png"
image.save(temp_image_path)
# Use torch.no_grad() to avoid unnecessary memory usage
with torch.no_grad():
# Perform OCR using the model on CPU (pass the file path of the saved image)
result = model.chat(tokenizer, temp_image_path, ocr_type='ocr')
# Clean up the temporary image file
os.remove(temp_image_path)
# Return the extracted text
return result
# Gradio interface for file upload and OCR
iface = gr.Interface(fn=perform_ocr, inputs="file", outputs="text",
title="OCR Application", description="Upload an image to extract text.")
# Launch the Gradio app
iface.launch()
|