Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

TransformoDocs-Demo / app.py

Update app.py

429d160 verified 5 months ago

1.34 kB

	import gradio as gr
	import torch
	import numpy as np
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForVision2Seq

	# Load the model & processor
	model_name = "Murasajo/Llama-3.2-VL-Finetuned-on-HandwrittenText"
	processor = AutoProcessor.from_pretrained(model_name)
	model = AutoModelForVision2Seq.from_pretrained(model_name)

	# Function to extract handwritten text
	def extract_text(image):
	try:
	# Convert input to PIL Image
	if isinstance(image, np.ndarray):
	if len(image.shape) == 2: # If grayscale (H, W), add channels
	image = np.stack([image] * 3, axis=-1)
	image = Image.fromarray(image)
	else:
	image = Image.open(image).convert("RGB")

	# Process image through model
	pixel_values = processor(images=image, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values)
	extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return extracted_text

	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio Interface
	iface = gr.Interface(
	fn=extract_text,
	inputs="image",
	outputs="text",
	title="Handwritten Text OCR",
	description="Upload a handwritten document and extract text using AI.",
	)

	# Run the app
	iface.launch()