Spaces:

sitammeur
/

TextSnap

Running on Zero

File size: 5,347 Bytes

from PIL import ImageDraw
import numpy as np
import re


# Use a color map for bounding boxes
colormap = [
    "#0000FF",
    "#FFA500",
    "#008000",
    "#800080",
    "#A52A2A",
    "#FFC0CB",
    "#808080",
    "#808000",
    "#00FFFF",
    "#FF0000",
    "#00FF00",
    "#4B0082",
    "#4B0082",
    "#EE82EE",
    "#00FFFF",
    "#FF00FF",
    "#FF7F50",
    "#FFD700",
    "#87CEEB",
]


# Text cleaning function
def clean_text(text):
    """
    Cleans the given text by removing unwanted tokens, extra spaces,
    and ensures proper spacing between words and after periods.

    Args:
        text (str): The input text to be cleaned.

    Returns:
        str: The cleaned and properly formatted text.
    """

    # Remove unwanted tokens
    text = text.replace("<pad>", "").replace("</s>", "").strip()

    # Split the text into lines and clean each line
    lines = text.split("\n")
    cleaned_lines = [line.strip() for line in lines if line.strip()]

    # Join the cleaned lines into a single string with a space between each line
    cleaned_text = " ".join(cleaned_lines)

    # Ensure proper spacing between words and after periods using regex
    cleaned_text = re.sub(
        r"\s+", " ", cleaned_text
    )  # Replace multiple spaces with a single space
    cleaned_text = re.sub(
        r"(?<=[.])(?=[^\s])", r" ", cleaned_text
    )  # Add space after a period if not followed by a space

    # Return the cleaned text
    return cleaned_text


# Convert hex color to RGBA with the given alpha
def hex_to_rgba(hex_color, alpha):
    """
    Convert a hexadecimal color code to RGBA format.

    Args:
        hex_color (str): The hexadecimal color code (e.g., "#FF0000").
        alpha (int): The alpha value for the RGBA color (0-255).

    Returns:
        tuple: A tuple representing the RGBA color values (red, green, blue, alpha).
    """
    hex_color = hex_color.lstrip("#")
    r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
    return (r, g, b, alpha)


# Draw OCR bounding boxes with enhanced visual elements
def draw_ocr_bboxes(image, prediction):
    """
    Draw bounding boxes with enhanced visual elements on the given image based on the OCR prediction.

    Args:
        image (PIL.Image.Image): The input image on which the bounding boxes will be drawn.
        prediction (dict): The OCR prediction containing 'quad_boxes' and 'labels'.

    Returns:
        PIL.Image.Image: The image with the bounding boxes drawn.
    """

    # Create a drawing object for the image with RGBA mode
    draw = ImageDraw.Draw(image, "RGBA")

    # Extract bounding boxes and labels from the prediction
    bboxes, labels = prediction["quad_boxes"], prediction["labels"]

    for i, (box, label) in enumerate(zip(bboxes, labels)):
        # Select color for the bounding box and label
        color = colormap[i % len(colormap)]
        new_box = (np.array(box)).tolist()

        # Define the outline width and corner radius for the bounding box
        box_outline_width = 3
        corner_radius = 10

        # Draw rounded corners for the bounding box
        for j in range(4):
            start_x, start_y = new_box[j * 2], new_box[j * 2 + 1]
            end_x, end_y = new_box[(j * 2 + 2) % 8], new_box[(j * 2 + 3) % 8]

            # Draw the arcs for the rounded corners
            draw.arc(
                [
                    (start_x - corner_radius, start_y - corner_radius),
                    (start_x + corner_radius, start_y + corner_radius),
                ],
                90 + j * 90,
                180 + j * 90,
                fill=color,
                width=box_outline_width,
            )
            draw.arc(
                [
                    (end_x - corner_radius, end_y - corner_radius),
                    (end_x + corner_radius, end_y + corner_radius),
                ],
                j * 90,
                90 + j * 90,
                fill=color,
                width=box_outline_width,
            )

            # Draw the lines connecting the arcs
            if j in [0, 1, 2]:
                draw.line(
                    [
                        (start_x + corner_radius if j != 1 else start_x, start_y),
                        (end_x - corner_radius if j != 1 else end_x, end_y),
                    ],
                    fill=color,
                    width=box_outline_width,
                )
            else:
                draw.line(
                    [
                        (start_x, start_y + corner_radius),
                        (end_x, end_y - corner_radius),
                    ],
                    fill=color,
                    width=box_outline_width,
                )

        # Calculate the position for the text label
        text_x, text_y = min(new_box[0::2]), min(new_box[1::2]) - 20
        text_w, text_h = draw.textsize(label)
        rgba_color = hex_to_rgba(color, 200)  # Semi-transparent background for text

        # Draw the background rectangle for the text
        draw.rectangle(
            [text_x, text_y, text_x + text_w + 10, text_y + text_h + 10],
            fill=rgba_color,
        )

        # Draw the text label
        draw.text((text_x + 5, text_y + 5), label, fill=(0, 0, 0, 255))

    # Return the image with the OCR boxes drawn
    return image