File size: 5,583 Bytes
317211f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""Responsible for (pre)processing images and PDFs before they are passed to the OCR
engine and other miscellaneous actions concerning processing.
"""
import os
from pathlib import Path
from typing import List

# import cv2
# import numpy as np
import pyocr
from pdf2image import pdf2image
from PIL import Image  #, ImageOps

PDF_CONVERSION_DPI = 300
ROTATION_CONFIDENCE_THRESHOLD = 2.0

# def rotate_image(image: Image, angle: float):
#     """Rotates the given image by the given angle.

#     Args:
#         image(PIL.Image.Image): The image to be rotated.
#         angle(float): The angle to rotate the image by.

#     Returns: The rotated image.
#     """
#     image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
#     height, width, _ = image.shape  # Get the image height, width, and channels
#     # Compute the rotation matrix
#     rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)
#     # Apply the rotation to the image
#     rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
#     rotated_image = Image.fromarray(cv2.cvtColor(rotated_image, cv2.COLOR_BGR2RGB))
#     return rotated_image


# class PDF_CONVERTER(enum.Enum):
#     PDF2IMAGE = 1
#     IMAGEMAGICK = 2


def correct_orientation(image: Image.Image) -> Image.Image:
    """Corrects the orientation of an image if it is not upright.

    Args:
        image(PIL.Image.Image): The pillow image to be corrected.

    Returns: The corrected pillow image as a copy. The original image is not closed.
    """
    if not pyocr.tesseract.is_available():
        raise Exception("Tesseract is not available.")

    # image = ImageOps.exif_transpose(image)  # EXIF rotation is apparent, not actual
    orientation_info = {}
    try:
        orientation_info = pyocr.tesseract.detect_orientation(image)
    except pyocr.PyocrException as e:
        print("Orientation detection failed: {}".format(e))
    # output = pytesseract.image_to_osd(
    #     image, config=" --psm 0", output_type=pytesseract.Output.DICT
    # )
    angle = orientation_info.get("angle", 0)
    confidence = orientation_info.get("confidence", 100)
    # rotate = output["rotate"]
    # confidence = output["orientation_conf"]

    if confidence > ROTATION_CONFIDENCE_THRESHOLD:
        new_image = image.rotate(angle, expand=True)
    else:
        new_image = image.copy()
    return new_image


def convert_pdf_to_image_pdf2image(pdf_bytes: bytes) -> List[Image.Image]:
    """Converts a PDF to an image using pdf2image.

    Args:
        pdf_bytes(bytes): The bytes of the PDF to be converted.

    Returns: A list of pillow images corresponding to each page from the PDF.
    """
    images = pdf2image.convert_from_bytes(pdf_bytes, dpi=PDF_CONVERSION_DPI)
    return images


def convert_pdf_to_image_ImageMagick(filename: Path, dest_folder: Path) -> Path:
    """Converts a PDF to an image using ImageMagick.

    Args:
        filename(pathlib.Path): The path to the PDF to be converted.
        dest_folder(pathlib.Path): The destination folder for the converted pages. Pages
                                   are saved in the folder as page.jpg or as page-01.jpg,
                                   page-02.jpg, etc.

    Returns: dest_folder
    """
    os.system(f"magick convert"
                f"-density {PDF_CONVERSION_DPI}"
                f"{filename}"
                f"-quality 100"
                f"{dest_folder/'page.jpg'}")
    return dest_folder


def preprocess_image(image: Image.Image) -> Image.Image:
    """Preprocesses an image for future use with OCR.
    The following operations are performed:
      1. Orientation correction

    Args:
        image(PIL.Image.Image): The image to be preprocessed.

    Returns: The preprocessed pillow image.
    """
    rotated_image = correct_orientation(image)
    result = rotated_image
    image.close()
    return result

def preprocess_pdf_pdf2image(pdf_bytes: bytes) -> List[Image.Image]:
    """Preprocesses a PDF for future use with OCR.
    The following operations are performed:
      1. PDF to image conversion
      2. Orientation correction

    Args:
        pdf_bytes(bytes): The bytes of the PDF to be preprocessed.

    Returns: A list of pillow images corresponding to each page from the PDF.
    """
    images = convert_pdf_to_image_pdf2image(pdf_bytes)
    result = []
    for image in images:
        new_image = preprocess_image(image)
        image.close()
        result.append(new_image)
    return result

def preprocess_pdf_ImageMagick(filename: Path) -> List[Image.Image]:
    """Preprocesses a PDF for future use with OCR.
    The following operations are performed:
      1. PDF to image conversion
      2. Orientation correction

    Args:
        filename(pathlib.Path): The path to the PDF to be preprocessed.

    Returns: A list of pillow images corresponding to each page from the PDF.
    """
    dest_folder = convert_pdf_to_image_ImageMagick(filename, dest_folder)
    result = []
    for image in dest_folder.glob("*.jpg"):
        new_image = preprocess_image(image)
        image.close()
        result.append(new_image)
    return result

if __name__ == '__main__':
    filename = 'examples/upright.jpeg'
    image = Image.open(filename)
    new_image = preprocess_image(image)
    image.close()
    new_image.show()
    new_image.close()

    filename = 'examples/rotated.pdf'
    with open(filename, 'rb') as file:
        bytes_ = bytes(file.read())
    images = preprocess_pdf_pdf2image(bytes_)
    for image in images:
        image.show()
        image.close()