DeepDiveDev's picture
Update app.py
429d160 verified
raw
history blame
1.34 kB
import gradio as gr
import torch
import numpy as np
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
# Load the model & processor
model_name = "Murasajo/Llama-3.2-VL-Finetuned-on-HandwrittenText"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForVision2Seq.from_pretrained(model_name)
# Function to extract handwritten text
def extract_text(image):
try:
# Convert input to PIL Image
if isinstance(image, np.ndarray):
if len(image.shape) == 2: # If grayscale (H, W), add channels
image = np.stack([image] * 3, axis=-1)
image = Image.fromarray(image)
else:
image = Image.open(image).convert("RGB")
# Process image through model
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return extracted_text
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
iface = gr.Interface(
fn=extract_text,
inputs="image",
outputs="text",
title="Handwritten Text OCR",
description="Upload a handwritten document and extract text using AI.",
)
# Run the app
iface.launch()