DeepDiveDev's picture
Update app.py
6ec889f verified
raw
history blame
3.77 kB
import gradio as gr
import easyocr
from PIL import Image
import pdf2image
import tempfile
import os
import cv2
import numpy as np
import torch
# Initialize the OCR reader (this will download models on first run)
reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
def preprocess_image(img):
"""Preprocess image to improve OCR accuracy for handwritten text"""
# Convert PIL Image to numpy array
img_array = np.array(img)
# Check if the image is already grayscale
if len(img_array.shape) == 2:
gray = img_array
else:
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
# Apply adaptive thresholding for better handling of different lighting conditions
binary = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
)
# Noise removal
kernel = np.ones((1, 1), np.uint8)
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
return binary
def extract_text_from_image(img):
"""Extract text from an image using EasyOCR"""
# Preprocess for better handwriting recognition
processed_img = preprocess_image(img)
# Use EasyOCR to extract text
results = reader.readtext(processed_img)
# Combine all detected text
text = '\n'.join([result[1] for result in results])
return text.strip()
def extract_text_from_pdf(pdf_path):
"""Extract text from all pages of a PDF file"""
# Convert PDF to images
with tempfile.TemporaryDirectory() as path:
images = pdf2image.convert_from_path(pdf_path, output_folder=path)
# Extract text from each page
full_text = []
for img in images:
text = extract_text_from_image(img)
full_text.append(text)
return "\n\n--- Page Break ---\n\n".join(full_text)
def process_file(file):
"""Process the uploaded file (PDF or image)"""
if file is None:
return "No file uploaded. Please upload an image or PDF file."
try:
file_extension = os.path.splitext(file.name)[1].lower()
if file_extension == ".pdf":
# Process PDF
return extract_text_from_pdf(file.name)
elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
# Process Image
img = Image.open(file.name)
return extract_text_from_image(img)
else:
return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
except Exception as e:
return f"Error processing file: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
gr.Markdown("# Handwritten Text OCR Extraction Tool")
gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.")
with gr.Row():
with gr.Column():
file_input = gr.File(label="Upload Image or PDF")
extract_button = gr.Button("Extract Text")
with gr.Column():
text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...")
extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output])
gr.Markdown("### Notes:")
gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
gr.Markdown("- The system works best with dark text on light background")
gr.Markdown("- The first run may take longer as it downloads the OCR models")
gr.Markdown("- Multiple page PDFs will show page breaks in the output")
# Launch the app
if __name__ == "__main__":
app.launch()