Spaces:
Sleeping
Sleeping
File size: 1,537 Bytes
d010bf6 d1bb7e2 c6111b8 3a8de33 e6b9318 c6111b8 398e23b e6b9318 a3df3f5 c6111b8 e6b9318 398e23b d1bb7e2 e6b9318 398e23b d010bf6 e6b9318 398e23b e6b9318 398e23b 3a8de33 398e23b e6b9318 c6111b8 398e23b c6111b8 9164d6d 398e23b 9164d6d 398e23b c6111b8 398e23b 2653a83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import numpy as np
import torch
# Load TrOCR model and processor
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
# Function to extract text from handwritten images
def extract_text(image):
try:
# Convert image to RGB if needed
if isinstance(image, np.ndarray):
if len(image.shape) == 2: # If grayscale (H, W), convert to RGB
image = np.stack([image] * 3, axis=-1)
image = Image.fromarray(image)
else:
image = Image.open(image).convert("RGB")
# Preprocessing (convert to grayscale for better OCR)
image = image.convert("L")
image = image.resize((640, 640))
# Process image
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return extracted_text if extracted_text.strip() else "No text detected."
except Exception as e:
return f"Error: {str(e)}"
# Gradio Interface
iface = gr.Interface(
fn=extract_text,
inputs="image",
outputs="text",
title="Handwritten OCR Extractor",
description="Upload a handwritten image to extract text.",
)
# Launch the app
iface.launch()
|