DeepDiveDev's picture
Update app.py
e638a74 verified
raw
history blame
4.33 kB
import gradio as gr
import cv2
import numpy as np
import pytesseract
from PIL import Image
import io
import matplotlib.pyplot as plt
# Configure pytesseract path (adjust this based on your installation)
# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Uncomment and modify for Windows
def preprocess_image(image):
"""Preprocess the image to improve OCR accuracy for handwritten text"""
# Convert to grayscale if it's a color image
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
else:
gray = image.copy()
# Apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# Noise removal using morphological operations
kernel = np.ones((1, 1), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# Dilate to connect components
kernel = np.ones((2, 2), np.uint8)
dilated = cv2.dilate(opening, kernel, iterations=1)
return dilated
def perform_ocr(input_image):
"""Process the image and perform OCR"""
if input_image is None:
return "No image provided", None
# Convert from RGB to BGR (OpenCV format)
image_np = np.array(input_image)
if len(image_np.shape) == 3:
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
# Preprocess the image
preprocessed = preprocess_image(image_np)
# Convert back to PIL for visualization
pil_preprocessed = Image.fromarray(preprocessed)
# Use pytesseract with specific configurations for handwritten text
custom_config = r'--oem 3 --psm 6 -l eng -c preserve_interword_spaces=1 tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,;:\'\"()[]{}!?-+*/=><_%$#@&|~^`\\ "'
# Perform OCR
extracted_text = pytesseract.image_to_string(pil_preprocessed, config=custom_config)
# Return the extracted text and the preprocessed image for visualization
return extracted_text, pil_preprocessed
def ocr_pipeline(input_image):
"""Complete OCR pipeline with visualization"""
extracted_text, preprocessed_image = perform_ocr(input_image)
# Create visualization
if input_image is not None and preprocessed_image is not None:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
ax1.imshow(input_image)
ax1.set_title("Original Image")
ax1.axis("off")
ax2.imshow(preprocessed_image, cmap='gray')
ax2.set_title("Preprocessed Image")
ax2.axis("off")
plt.tight_layout()
# Convert plot to image
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
viz_img = Image.open(buf)
plt.close(fig)
return extracted_text, viz_img
return extracted_text, None
# Create the Gradio interface
with gr.Blocks(title="Handwritten OCR App") as app:
gr.Markdown("# Handwritten Text OCR Extraction")
gr.Markdown("""
This app extracts text from handwritten notes.
Upload an image containing handwritten text and the app will convert it to digital text.
""")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Upload Handwritten Image")
run_button = gr.Button("Extract Text")
with gr.Column():
output_text = gr.Textbox(label="Extracted Text", lines=15)
processed_image = gr.Image(label="Preprocessing Visualization")
run_button.click(
fn=ocr_pipeline,
inputs=input_image,
outputs=[output_text, processed_image]
)
gr.Markdown("""
## Tips for better results:
- Ensure good lighting and contrast in the image
- Try to keep the text as horizontal as possible
- Clear handwriting works best
- For better results, you may need to crop the image to focus on specific sections
""")
# Add example images
gr.Examples(
examples=[
"handwritten_sample.jpg", # Replace with your example image paths
],
inputs=input_image,
)
# Launch the app
if __name__ == "__main__":
app.launch()