Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

File size: 1,344 Bytes

d010bf6
e6b9318
429d160
 
 
c6111b8
429d160
 
 
 
e6b9318
429d160
c6111b8
e6b9318
429d160
398e23b
429d160
d1bb7e2
e6b9318
398e23b
d010bf6
e6b9318
429d160
398e23b
 
 
3a8de33
429d160
 
e6b9318
 
c6111b8
398e23b
c6111b8
9164d6d
398e23b
9164d6d
429d160
 
c6111b8
 
429d160
2653a83

import gradio as gr
import torch
import numpy as np
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq

# Load the model & processor
model_name = "Murasajo/Llama-3.2-VL-Finetuned-on-HandwrittenText"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForVision2Seq.from_pretrained(model_name)

# Function to extract handwritten text
def extract_text(image):
    try:
        # Convert input to PIL Image
        if isinstance(image, np.ndarray):  
            if len(image.shape) == 2:  # If grayscale (H, W), add channels
                image = np.stack([image] * 3, axis=-1)
            image = Image.fromarray(image)
        else:
            image = Image.open(image).convert("RGB")

        # Process image through model
        pixel_values = processor(images=image, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values)
        extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        return extracted_text

    except Exception as e:
        return f"Error: {str(e)}"

# Gradio Interface
iface = gr.Interface(
    fn=extract_text,
    inputs="image",
    outputs="text",
    title="Handwritten Text OCR",
    description="Upload a handwritten document and extract text using AI.",
)

# Run the app
iface.launch()