import gradio as gr from transformers import AutoTokenizer, AutoModelForImageClassification from PIL import Image import torch # Load the model and tokenizer tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0') model = AutoModelForImageClassification.from_pretrained('ucaslcl/GOT-OCR2_0') def perform_ocr(image): # Ensure the image is in the right format if isinstance(image, Image.Image): image = image.convert("RGB") else: raise ValueError("Input must be a PIL Image") # Use the model to perform OCR inputs = tokenizer(image, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Get the predictions predictions = outputs.logits.argmax(dim=1).item() return predictions # Create the Gradio interface iface = gr.Interface( fn=perform_ocr, inputs=gr.inputs.Image(type="pil"), outputs="text", title="OCR with GOT-OCR2.0", description="Upload an image for Optical Character Recognition." ) # Launch the interface iface.launch()