Spaces:
Runtime error
Runtime error
File size: 4,770 Bytes
153628e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# Copyright (C) 2021-2024, Mindee.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
from typing import Any, Dict, Optional
import numpy as np
from anyascii import anyascii
from PIL import Image, ImageDraw
from .fonts import get_font
__all__ = ["synthesize_page", "synthesize_kie_page"]
def synthesize_page(
page: Dict[str, Any],
draw_proba: bool = False,
font_family: Optional[str] = None,
) -> np.ndarray:
"""Draw a the content of the element page (OCR response) on a blank page.
Args:
----
page: exported Page object to represent
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
font_size: size of the font, default font = 13
font_family: family of the font
Returns:
-------
the synthesized page
"""
# Draw template
h, w = page["dimensions"]
response = 255 * np.ones((h, w, 3), dtype=np.int32)
# Draw each word
for block in page["blocks"]:
for line in block["lines"]:
for word in line["words"]:
# Get absolute word geometry
(xmin, ymin), (xmax, ymax) = word["geometry"]
xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
# White drawing context adapted to font size, 0.75 factor to convert pts --> pix
font = get_font(font_family, int(0.75 * (ymax - ymin)))
img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
d = ImageDraw.Draw(img)
# Draw in black the value of the word
try:
d.text((0, 0), word["value"], font=font, fill=(0, 0, 0))
except UnicodeEncodeError:
# When character cannot be encoded, use its anyascii version
d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0))
# Colorize if draw_proba
if draw_proba:
p = int(255 * word["confidence"])
mask = np.where(np.array(img) == 0, 1, 0)
proba: np.ndarray = np.array([255 - p, 0, p])
color = mask * proba[np.newaxis, np.newaxis, :]
white_mask = 255 * (1 - mask)
img = color + white_mask
# Write to response page
response[ymin:ymax, xmin:xmax, :] = np.array(img)
return response
def synthesize_kie_page(
page: Dict[str, Any],
draw_proba: bool = False,
font_family: Optional[str] = None,
) -> np.ndarray:
"""Draw a the content of the element page (OCR response) on a blank page.
Args:
----
page: exported Page object to represent
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
font_size: size of the font, default font = 13
font_family: family of the font
Returns:
-------
the synthesized page
"""
# Draw template
h, w = page["dimensions"]
response = 255 * np.ones((h, w, 3), dtype=np.int32)
# Draw each word
for predictions in page["predictions"].values():
for prediction in predictions:
# Get aboslute word geometry
(xmin, ymin), (xmax, ymax) = prediction["geometry"]
xmin, xmax = int(round(w * xmin)), int(round(w * xmax))
ymin, ymax = int(round(h * ymin)), int(round(h * ymax))
# White drawing context adapted to font size, 0.75 factor to convert pts --> pix
font = get_font(font_family, int(0.75 * (ymax - ymin)))
img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255))
d = ImageDraw.Draw(img)
# Draw in black the value of the word
try:
d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0))
except UnicodeEncodeError:
# When character cannot be encoded, use its anyascii version
d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0))
# Colorize if draw_proba
if draw_proba:
p = int(255 * prediction["confidence"])
mask = np.where(np.array(img) == 0, 1, 0)
proba: np.ndarray = np.array([255 - p, 0, p])
color = mask * proba[np.newaxis, np.newaxis, :]
white_mask = 255 * (1 - mask)
img = color + white_mask
# Write to response page
response[ymin:ymax, xmin:xmax, :] = np.array(img)
return response
|