import gradio as gr import torch import numpy as np from PIL import Image from transformers import AutoProcessor, AutoModelForVision2Seq # Load the model & processor model_name = "Murasajo/Llama-3.2-VL-Finetuned-on-HandwrittenText" processor = AutoProcessor.from_pretrained(model_name) model = AutoModelForVision2Seq.from_pretrained(model_name) # Function to extract handwritten text def extract_text(image): try: # Convert input to PIL Image if isinstance(image, np.ndarray): if len(image.shape) == 2: # If grayscale (H, W), add channels image = np.stack([image] * 3, axis=-1) image = Image.fromarray(image) else: image = Image.open(image).convert("RGB") # Process image through model pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return extracted_text except Exception as e: return f"Error: {str(e)}" # Gradio Interface iface = gr.Interface( fn=extract_text, inputs="image", outputs="text", title="Handwritten Text OCR", description="Upload a handwritten document and extract text using AI.", ) # Run the app iface.launch()