Spaces:
Sleeping
Sleeping
import gradio as gr | |
import easyocr | |
from PIL import Image | |
import pdf2image | |
import tempfile | |
import os | |
import cv2 | |
import numpy as np | |
import torch | |
# Initialize the OCR reader (this will download models on first run) | |
reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available()) | |
def preprocess_image(img): | |
"""Preprocess image to improve OCR accuracy for handwritten text""" | |
# Convert PIL Image to numpy array | |
img_array = np.array(img) | |
# Check if the image is already grayscale | |
if len(img_array.shape) == 2: | |
gray = img_array | |
else: | |
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) | |
# Apply adaptive thresholding for better handling of different lighting conditions | |
binary = cv2.adaptiveThreshold( | |
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 | |
) | |
# Noise removal | |
kernel = np.ones((1, 1), np.uint8) | |
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) | |
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) | |
return binary | |
def extract_text_from_image(img): | |
"""Extract text from an image using EasyOCR""" | |
# Preprocess for better handwriting recognition | |
processed_img = preprocess_image(img) | |
# Use EasyOCR to extract text | |
results = reader.readtext(processed_img) | |
# Combine all detected text | |
text = '\n'.join([result[1] for result in results]) | |
return text.strip() | |
def extract_text_from_pdf(pdf_path): | |
"""Extract text from all pages of a PDF file""" | |
# Convert PDF to images | |
with tempfile.TemporaryDirectory() as path: | |
images = pdf2image.convert_from_path(pdf_path, output_folder=path) | |
# Extract text from each page | |
full_text = [] | |
for img in images: | |
text = extract_text_from_image(img) | |
full_text.append(text) | |
return "\n\n--- Page Break ---\n\n".join(full_text) | |
def process_file(file): | |
"""Process the uploaded file (PDF or image)""" | |
if file is None: | |
return "No file uploaded. Please upload an image or PDF file." | |
try: | |
file_extension = os.path.splitext(file.name)[1].lower() | |
if file_extension == ".pdf": | |
# Process PDF | |
return extract_text_from_pdf(file.name) | |
elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]: | |
# Process Image | |
img = Image.open(file.name) | |
return extract_text_from_image(img) | |
else: | |
return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)." | |
except Exception as e: | |
return f"Error processing file: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="Handwritten Text OCR Extractor") as app: | |
gr.Markdown("# Handwritten Text OCR Extraction Tool") | |
gr.Markdown("Upload an image or PDF containing handwritten text to extract the content.") | |
with gr.Row(): | |
with gr.Column(): | |
file_input = gr.File(label="Upload Image or PDF") | |
extract_button = gr.Button("Extract Text") | |
with gr.Column(): | |
text_output = gr.Textbox(label="Extracted Text", lines=10, placeholder="Extracted text will appear here...") | |
extract_button.click(fn=process_file, inputs=[file_input], outputs=[text_output]) | |
gr.Markdown("### Notes:") | |
gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit") | |
gr.Markdown("- The system works best with dark text on light background") | |
gr.Markdown("- The first run may take longer as it downloads the OCR models") | |
gr.Markdown("- Multiple page PDFs will show page breaks in the output") | |
# Launch the app | |
if __name__ == "__main__": | |
app.launch() |