Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

TransformoDocs-Demo / app.py

DeepDiveDev

Update app.py

d010bf6 verified 5 months ago

raw

history blame

2.27 kB

	import gradio as gr
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	from PIL import Image
	import numpy as np
	import torch

	# Load the primary OCR model (DeepDiveDev/transformodocs-ocr)
	processor1 = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
	model1 = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")

	# Load the fallback model (microsoft/trocr-base-handwritten) for handwritten text
	processor2 = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
	model2 = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

	# Function to extract text from handwritten images
	def extract_text(image):
	try:
	# Ensure input is a PIL Image
	if isinstance(image, np.ndarray):
	if len(image.shape) == 2: # Grayscale (H, W) -> Convert to RGB
	image = np.stack([image] * 3, axis=-1)
	image = Image.fromarray(image)
	elif isinstance(image, str): # If file path is given, open the image
	image = Image.open(image).convert("RGB")

	# Maintain aspect ratio while resizing (better for OCR)
	image.thumbnail((800, 800))

	# Process image with the first model
	pixel_values = processor1(images=image, return_tensors="pt").pixel_values.to(torch.float32)
	generated_ids = model1.generate(pixel_values)
	extracted_text = processor1.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# If output is short or incorrect, use the fallback model
	if len(extracted_text.strip()) < 2:
	inputs = processor2(images=image, return_tensors="pt").pixel_values.to(torch.float32)
	generated_ids = model2.generate(inputs)
	extracted_text = processor2.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return extracted_text if extracted_text else "No text detected."

	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio UI for OCR Extraction
	iface = gr.Interface(
	fn=extract_text,
	inputs=gr.Image(type="pil"), # Ensures input is a PIL image
	outputs="text",
	title="Handwritten OCR Extraction",
	description="Upload a handwritten image to extract text using AI OCR.",
	)

	iface.launch()