File size: 4,770 Bytes
153628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
from typing import Any, Dict, Optional

import numpy as np
from anyascii import anyascii
from PIL import Image, ImageDraw

from .fonts import get_font

__all__ = ["synthesize_page", "synthesize_kie_page"]


def synthesize_page(
    page: Dict[str, Any],
    draw_proba: bool = False,
    font_family: Optional[str] = None,
) -> np.ndarray:
    """Draw a the content of the element page (OCR response) on a blank page.

    Args:
    ----
        page: exported Page object to represent
        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
        font_size: size of the font, default font = 13
        font_family: family of the font

    Returns:
    -------
        the synthesized page
    """
    # Draw template
    h, w = page["dimensions"]
    response = 255 * np.ones((h, w, 3), dtype=np.int32)

    # Draw each word
    for block in page["blocks"]:
        for line in block["lines"]:
            for word in line["words"]:
                # Get absolute word geometry
                (xmin, ymin), (xmax, ymax) = word["geometry"]
                xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
                ymin, ymax = int(round(h * ymin)), int(round(h * ymax))

                # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
                font = get_font(font_family, int(0.75 * (ymax - ymin)))
                img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
                d = ImageDraw.Draw(img)
                # Draw in black the value of the word
                try:
                    d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
                except UnicodeEncodeError:
                    # When character cannot be encoded, use its anyascii version
                    d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))

                # Colorize if draw_proba
                if draw_proba:
                    p = int(255 * word["confidence"])
                    mask = np.where(np.array(img) == 0, 1, 0)
                    proba: np.ndarray = np.array([255 - p, 0, p])
                    color = mask * proba[np.newaxis, np.newaxis, :]
                    white_mask = 255 * (1 - mask)
                    img = color + white_mask

                # Write to response page
                response[ymin:ymax, xmin:xmax, :] = np.array(img)

    return response


def synthesize_kie_page(
    page: Dict[str, Any],
    draw_proba: bool = False,
    font_family: Optional[str] = None,
) -> np.ndarray:
    """Draw a the content of the element page (OCR response) on a blank page.

    Args:
    ----
        page: exported Page object to represent
        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
        font_size: size of the font, default font = 13
        font_family: family of the font

    Returns:
    -------
        the synthesized page
    """
    # Draw template
    h, w = page["dimensions"]
    response = 255 * np.ones((h, w, 3), dtype=np.int32)

    # Draw each word
    for predictions in page["predictions"].values():
        for prediction in predictions:
            # Get aboslute word geometry
            (xmin, ymin), (xmax, ymax) = prediction["geometry"]
            xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
            ymin, ymax = int(round(h * ymin)), int(round(h * ymax))

            # White drawing context adapted to font size, 0.75 factor to convert pts --> pix
            font = get_font(font_family, int(0.75 * (ymax - ymin)))
            img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
            d = ImageDraw.Draw(img)
            # Draw in black the value of the word
            try:
                d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
            except UnicodeEncodeError:
                # When character cannot be encoded, use its anyascii version
                d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))

            # Colorize if draw_proba
            if draw_proba:
                p = int(255 * prediction["confidence"])
                mask = np.where(np.array(img) == 0, 1, 0)
                proba: np.ndarray = np.array([255 - p, 0, p])
                color = mask * proba[np.newaxis, np.newaxis, :]
                white_mask = 255 * (1 - mask)
                img = color + white_mask

            # Write to response page
            response[ymin:ymax, xmin:xmax, :] = np.array(img)

    return response