Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

TransformoDocs-Demo / app.py

DeepDiveDev

Update app.py

6ec889f verified 4 months ago

raw

history blame

3.77 kB

	import gradio as gr
	import easyocr
	from PIL import Image
	import pdf2image
	import tempfile
	import os
	import cv2
	import numpy as np
	import torch

	# Initialize the OCR reader (this will download models on first run)
	reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())

	def preprocess_image(img):
	"""Preprocess image to improve OCR accuracy for handwritten text"""
	# Convert PIL Image to numpy array
	img_array = np.array(img)

	# Check if the image is already grayscale
	if len(img_array.shape) == 2:
	gray = img_array
	else:
	gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)

	# Apply adaptive thresholding for better handling of different lighting conditions
	binary = cv2.adaptiveThreshold(
	gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
	)

	# Noise removal
	kernel = np.ones((1, 1), np.uint8)
	binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
	binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)

	return binary

	def extract_text_from_image(img):
	"""Extract text from an image using EasyOCR"""
	# Preprocess for better handwriting recognition
	processed_img = preprocess_image(img)

	# Use EasyOCR to extract text
	results = reader.readtext(processed_img)

	# Combine all detected text
	text = '\n'.join([result[1] for result in results])

	return text.strip()

	def extract_text_from_pdf(pdf_path):
	"""Extract text from all pages of a PDF file"""
	# Convert PDF to images
	with tempfile.TemporaryDirectory() as path:
	images = pdf2image.convert_from_path(pdf_path, output_folder=path)

	# Extract text from each page
	full_text = []
	for img in images:
	text = extract_text_from_image(img)
	full_text.append(text)

	return "\n\n--- Page Break ---\n\n".join(full_text)

	def process_file(file):
	"""Process the uploaded file (PDF or image)"""
	if file is None:
	return "No file uploaded. Please upload an image or PDF file."

	try:
	file_extension = os.path.splitext(file.name)[1].lower()

	if file_extension == ".pdf":
	# Process PDF
	return extract_text_from_pdf(file.name)
	elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
	# Process Image
	img = Image.open(file.name)
	return extract_text_from_image(img)
	else:
	return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
	except Exception as e:
	return f"Error processing file: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
	gr.Markdown("# Handwritten Text OCR Extraction Tool")
	gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.")

	with gr.Row():
	with gr.Column():
	file_input = gr.File(label="Upload Image or PDF")
	extract_button = gr.Button("Extract Text")

	with gr.Column():
	text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")

	extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output])

	gr.Markdown("### Notes:")
	gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
	gr.Markdown("- The system works best with dark text on light background")
	gr.Markdown("- The first run may take longer as it downloads the OCR models")
	gr.Markdown("- Multiple page PDFs will show page breaks in the output")

	# Launch the app
	if __name__ == "__main__":
	app.launch()