import gradio as gr import easyocr from PIL import Image import pdf2image import tempfile import os import cv2 import numpy as np import torch # Initialize the OCR reader (this will download models on first run) reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available()) def preprocess_image(img): """Preprocess image to improve OCR accuracy for handwritten text""" # Convert PIL Image to numpy array img_array = np.array(img) # Check if the image is already grayscale if len(img_array.shape) == 2: gray = img_array else: gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) # Apply adaptive thresholding for better handling of different lighting conditions binary = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) # Noise removal kernel = np.ones((1, 1), np.uint8) binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) return binary def extract_text_from_image(img): """Extract text from an image using EasyOCR""" # Preprocess for better handwriting recognition processed_img = preprocess_image(img) # Use EasyOCR to extract text results = reader.readtext(processed_img) # Combine all detected text text = '\n'.join([result[1] for result in results]) return text.strip() def extract_text_from_pdf(pdf_path): """Extract text from all pages of a PDF file""" # Convert PDF to images with tempfile.TemporaryDirectory() as path: images = pdf2image.convert_from_path(pdf_path, output_folder=path) # Extract text from each page full_text = [] for img in images: text = extract_text_from_image(img) full_text.append(text) return "\n\n--- Page Break ---\n\n".join(full_text) def process_file(file): """Process the uploaded file (PDF or image)""" if file is None: return "No file uploaded. Please upload an image or PDF file." try: file_extension = os.path.splitext(file.name)[1].lower() if file_extension == ".pdf": # Process PDF return extract_text_from_pdf(file.name) elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]: # Process Image img = Image.open(file.name) return extract_text_from_image(img) else: return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)." except Exception as e: return f"Error processing file: {str(e)}" # Create Gradio interface with gr.Blocks(title="Handwritten Text OCR Extractor") as app: gr.Markdown("# Handwritten Text OCR Extraction Tool") gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.") with gr.Row(): with gr.Column(): file_input = gr.File(label="Upload Image or PDF") extract_button = gr.Button("Extract Text") with gr.Column(): text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...") extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output]) gr.Markdown("### Notes:") gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit") gr.Markdown("- The system works best with dark text on light background") gr.Markdown("- The first run may take longer as it downloads the OCR models") gr.Markdown("- Multiple page PDFs will show page breaks in the output") # Launch the app if __name__ == "__main__": app.launch()