# Copyright (C) 2021-2024, Mindee. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any, Dict, Optional import numpy as np from anyascii import anyascii from PIL import Image, ImageDraw from .fonts import get_font __all__ = ["synthesize_page", "synthesize_kie_page"] def synthesize_page( page: Dict[str, Any], draw_proba: bool = False, font_family: Optional[str] = None, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. Args: ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_size: size of the font, default font = 13 font_family: family of the font Returns: ------- the synthesized page """ # Draw template h, w = page["dimensions"] response = 255 * np.ones((h, w, 3), dtype=np.int32) # Draw each word for block in page["blocks"]: for line in block["lines"]: for word in line["words"]: # Get absolute word geometry (xmin, ymin), (xmax, ymax) = word["geometry"] xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) # White drawing context adapted to font size, 0.75 factor to convert pts --> pix font = get_font(font_family, int(0.75 * (ymax - ymin))) img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) d = ImageDraw.Draw(img) # Draw in black the value of the word try: d.text((0, 0), word["value"], font=font, fill=(0, 0, 0)) except UnicodeEncodeError: # When character cannot be encoded, use its anyascii version d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0)) # Colorize if draw_proba if draw_proba: p = int(255 * word["confidence"]) mask = np.where(np.array(img) == 0, 1, 0) proba: np.ndarray = np.array([255 - p, 0, p]) color = mask * proba[np.newaxis, np.newaxis, :] white_mask = 255 * (1 - mask) img = color + white_mask # Write to response page response[ymin:ymax, xmin:xmax, :] = np.array(img) return response def synthesize_kie_page( page: Dict[str, Any], draw_proba: bool = False, font_family: Optional[str] = None, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. Args: ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_size: size of the font, default font = 13 font_family: family of the font Returns: ------- the synthesized page """ # Draw template h, w = page["dimensions"] response = 255 * np.ones((h, w, 3), dtype=np.int32) # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: # Get aboslute word geometry (xmin, ymin), (xmax, ymax) = prediction["geometry"] xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) # White drawing context adapted to font size, 0.75 factor to convert pts --> pix font = get_font(font_family, int(0.75 * (ymax - ymin))) img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) d = ImageDraw.Draw(img) # Draw in black the value of the word try: d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0)) except UnicodeEncodeError: # When character cannot be encoded, use its anyascii version d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0)) # Colorize if draw_proba if draw_proba: p = int(255 * prediction["confidence"]) mask = np.where(np.array(img) == 0, 1, 0) proba: np.ndarray = np.array([255 - p, 0, p]) color = mask * proba[np.newaxis, np.newaxis, :] white_mask = 255 * (1 - mask) img = color + white_mask # Write to response page response[ymin:ymax, xmin:xmax, :] = np.array(img) return response