import gradio as gr import cv2 import numpy as np import pytesseract from PIL import Image import io import matplotlib.pyplot as plt # Configure pytesseract path (adjust this based on your installation) # pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Uncomment and modify for Windows def preprocess_image(image): """Preprocess the image to improve OCR accuracy for handwritten text""" # Convert to grayscale if it's a color image if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image.copy() # Apply adaptive thresholding thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) # Noise removal using morphological operations kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel) # Dilate to connect components kernel = np.ones((2, 2), np.uint8) dilated = cv2.dilate(opening, kernel, iterations=1) return dilated def perform_ocr(input_image): """Process the image and perform OCR""" if input_image is None: return "No image provided", None # Convert from RGB to BGR (OpenCV format) image_np = np.array(input_image) if len(image_np.shape) == 3: image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR) # Preprocess the image preprocessed = preprocess_image(image_np) # Convert back to PIL for visualization pil_preprocessed = Image.fromarray(preprocessed) # Use pytesseract with specific configurations for handwritten text custom_config = r'--oem 3 --psm 6 -l eng -c preserve_interword_spaces=1 tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,;:\'\"()[]{}!?-+*/=><_%$#@&|~^`\\ "' # Perform OCR extracted_text = pytesseract.image_to_string(pil_preprocessed, config=custom_config) # Return the extracted text and the preprocessed image for visualization return extracted_text, pil_preprocessed def ocr_pipeline(input_image): """Complete OCR pipeline with visualization""" extracted_text, preprocessed_image = perform_ocr(input_image) # Create visualization if input_image is not None and preprocessed_image is not None: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6)) ax1.imshow(input_image) ax1.set_title("Original Image") ax1.axis("off") ax2.imshow(preprocessed_image, cmap='gray') ax2.set_title("Preprocessed Image") ax2.axis("off") plt.tight_layout() # Convert plot to image buf = io.BytesIO() plt.savefig(buf, format='png') buf.seek(0) viz_img = Image.open(buf) plt.close(fig) return extracted_text, viz_img return extracted_text, None # Create the Gradio interface with gr.Blocks(title="Handwritten OCR App") as app: gr.Markdown("# Handwritten Text OCR Extraction") gr.Markdown(""" This app extracts text from handwritten notes. Upload an image containing handwritten text and the app will convert it to digital text. """) with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Upload Handwritten Image") run_button = gr.Button("Extract Text") with gr.Column(): output_text = gr.Textbox(label="Extracted Text", lines=15) processed_image = gr.Image(label="Preprocessing Visualization") run_button.click( fn=ocr_pipeline, inputs=input_image, outputs=[output_text, processed_image] ) gr.Markdown(""" ## Tips for better results: - Ensure good lighting and contrast in the image - Try to keep the text as horizontal as possible - Clear handwriting works best - For better results, you may need to crop the image to focus on specific sections """) # Add example images gr.Examples( examples=[ "handwritten_sample.jpg", # Replace with your example image paths ], inputs=input_image, ) # Launch the app if __name__ == "__main__": app.launch()