File size: 4,329 Bytes
ddf7acc
e638a74
 
 
 
 
 
300310b
e638a74
 
300310b
e638a74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300310b
e638a74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300310b
e638a74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300310b
e638a74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ddf7acc
e638a74
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
import cv2
import numpy as np
import pytesseract
from PIL import Image
import io
import matplotlib.pyplot as plt

# Configure pytesseract path (adjust this based on your installation)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # Uncomment and modify for Windows

def preprocess_image(image):
    """Preprocess the image to improve OCR accuracy for handwritten text"""
    # Convert to grayscale if it's a color image
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image.copy()
    
    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY_INV, 11, 2)
    
    # Noise removal using morphological operations
    kernel = np.ones((1, 1), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    
    # Dilate to connect components
    kernel = np.ones((2, 2), np.uint8)
    dilated = cv2.dilate(opening, kernel, iterations=1)
    
    return dilated

def perform_ocr(input_image):
    """Process the image and perform OCR"""
    if input_image is None:
        return "No image provided", None
    
    # Convert from RGB to BGR (OpenCV format)
    image_np = np.array(input_image)
    if len(image_np.shape) == 3:
        image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
    
    # Preprocess the image
    preprocessed = preprocess_image(image_np)
    
    # Convert back to PIL for visualization
    pil_preprocessed = Image.fromarray(preprocessed)
    
    # Use pytesseract with specific configurations for handwritten text
    custom_config = r'--oem 3 --psm 6 -l eng -c preserve_interword_spaces=1 tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,;:\'\"()[]{}!?-+*/=><_%$#@&|~^`\\ "' 
    
    # Perform OCR
    extracted_text = pytesseract.image_to_string(pil_preprocessed, config=custom_config)
    
    # Return the extracted text and the preprocessed image for visualization
    return extracted_text, pil_preprocessed

def ocr_pipeline(input_image):
    """Complete OCR pipeline with visualization"""
    
    extracted_text, preprocessed_image = perform_ocr(input_image)
    
    # Create visualization
    if input_image is not None and preprocessed_image is not None:
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
        ax1.imshow(input_image)
        ax1.set_title("Original Image")
        ax1.axis("off")
        
        ax2.imshow(preprocessed_image, cmap='gray')
        ax2.set_title("Preprocessed Image")
        ax2.axis("off")
        
        plt.tight_layout()
        
        # Convert plot to image
        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        viz_img = Image.open(buf)
        plt.close(fig)
        
        return extracted_text, viz_img
    
    return extracted_text, None

# Create the Gradio interface
with gr.Blocks(title="Handwritten OCR App") as app:
    gr.Markdown("# Handwritten Text OCR Extraction")
    gr.Markdown("""
    This app extracts text from handwritten notes. 
    Upload an image containing handwritten text and the app will convert it to digital text.
    """)
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Upload Handwritten Image")
            run_button = gr.Button("Extract Text")
        
        with gr.Column():
            output_text = gr.Textbox(label="Extracted Text", lines=15)
            processed_image = gr.Image(label="Preprocessing Visualization")
    
    run_button.click(
        fn=ocr_pipeline,
        inputs=input_image,
        outputs=[output_text, processed_image]
    )
    
    gr.Markdown("""
    ## Tips for better results:
    - Ensure good lighting and contrast in the image
    - Try to keep the text as horizontal as possible
    - Clear handwriting works best
    - For better results, you may need to crop the image to focus on specific sections
    """)
    
    # Add example images
    gr.Examples(
        examples=[
            "handwritten_sample.jpg",  # Replace with your example image paths
        ],
        inputs=input_image,
    )

# Launch the app
if __name__ == "__main__":
    app.launch()