File size: 4,610 Bytes
fa0f216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import pickle
import random
import shutil

import cv2
import matplotlib.pyplot as plt
import numpy as np

from data.dataset import get_transform


def summarize_dataset(data: dict):
    print(f"Training authors: {len(data['train'].keys())} \t Testing authors: {len(data['test'].keys())}")
    training_images = sum([len(data['train'][k]) for k in data['train'].keys()])
    testing_images = sum([len(data['test'][k]) for k in data['test'].keys()])
    print(f"Training images: {training_images} \t Testing images: {testing_images}")


def compare_data(path_a: str, path_b: str):
    with open(path_a, 'rb') as f:
        data_a = pickle.load(f)
        summarize_dataset(data_a)

    with open(path_b, 'rb') as f:
        data_b = pickle.load(f)
        summarize_dataset(data_b)

    training_a = data_a['train']
    training_b = data_b['train']

    training_a = {int(k): v for k, v in training_a.items()}
    training_b = {int(k): v for k, v in training_b.items()}

    while True:
        author = random.choice(list(training_a.keys()))

        if author in training_b.keys():
            author_images_a = [np.array(im_dict["img"]) for im_dict in training_a[author]]
            author_images_b = [np.array(im_dict["img"]) for im_dict in training_b[author]]

            labels_a = [str(im_dict["label"]) for im_dict in training_a[author]]
            labels_b = [str(im_dict["label"]) for im_dict in training_b[author]]

            vis_a = np.hstack(author_images_a[:10])
            vis_b = np.hstack(author_images_b[:10])

            cv2.imshow("Author a", vis_a)
            cv2.imshow("Author b", vis_b)

            cv2.waitKey(0)

        else:
            print(f"Author: {author} not found in second dataset")


def show_dataset(path: str, samples: int = 10):
    with open(path, 'rb') as f:
        data = pickle.load(f)
        summarize_dataset(data)

    training = data['train']

    author = training['013']
    author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in author]

    for img in author_images:
        cv2.imshow('image', img)
        cv2.waitKey(0)

    for author in list(training.keys()):

        author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]]
        labels = [str(im_dict["label"]) for im_dict in training[author]]

        vis = np.hstack(author_images[:samples])
        print(f"Author: {author}")
        cv2.destroyAllWindows()
        cv2.imshow("vis", vis)
        cv2.waitKey(0)


def test_transform(path: str):
    with open(path, 'rb') as f:
        data = pickle.load(f)
        summarize_dataset(data)

    training = data['train']
    transform = get_transform(grayscale=True)

    for author_id in training.keys():
        author = training[author_id]
        for image_dict in author:
            original_image = image_dict['img'].convert('L')
            transformed_image = transform(original_image).detach().numpy()
            restored_image = (((transformed_image + 1) / 2) * 255).astype(np.uint8)
            restored_image = np.squeeze(restored_image)
            original_image = np.array(original_image)

            wrong_pixels = (original_image != restored_image).astype(np.uint8) * 255

            combined = np.hstack((restored_image, original_image, wrong_pixels))

            cv2.imshow("original", original_image)
            cv2.imshow("restored", restored_image)
            cv2.imshow("combined", combined)

            f, ax = plt.subplots(1, 2)
            ax[0].hist(original_image.flatten())
            ax[1].hist(restored_image.flatten())
            plt.show()

            cv2.waitKey(0)

def dump_words():
    data_path = r"..\files\IAM-32.pickle"

    p_mark = 'point'
    p = '.'

    with open(data_path, 'rb') as f:
        data = pickle.load(f)

    training = data['train']

    target_folder = f"../saved_images/debug/{p_mark}"

    if os.path.exists(target_folder):
        shutil.rmtree(target_folder)

    os.mkdir(target_folder)

    count = 0

    for author in list(training.keys()):

        author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]]
        labels = [str(im_dict["label"]) for im_dict in training[author]]

        for img, label in zip(author_images, labels):
            if p in label:
                cv2.imwrite(os.path.join(target_folder, f"{count}.png"), img)
                count += 1


if __name__ == "__main__":
    test_transform("../files/IAM-32.pickle")
    #show_dataset("../files/IAM-32.pickle")
    #compare_data(r"../files/IAM-32.pickle", r"../files/_IAM-32.pickle")