File size: 4,610 Bytes
fa0f216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import os
import pickle
import random
import shutil
import cv2
import matplotlib.pyplot as plt
import numpy as np
from data.dataset import get_transform
def summarize_dataset(data: dict):
print(f"Training authors: {len(data['train'].keys())} \t Testing authors: {len(data['test'].keys())}")
training_images = sum([len(data['train'][k]) for k in data['train'].keys()])
testing_images = sum([len(data['test'][k]) for k in data['test'].keys()])
print(f"Training images: {training_images} \t Testing images: {testing_images}")
def compare_data(path_a: str, path_b: str):
with open(path_a, 'rb') as f:
data_a = pickle.load(f)
summarize_dataset(data_a)
with open(path_b, 'rb') as f:
data_b = pickle.load(f)
summarize_dataset(data_b)
training_a = data_a['train']
training_b = data_b['train']
training_a = {int(k): v for k, v in training_a.items()}
training_b = {int(k): v for k, v in training_b.items()}
while True:
author = random.choice(list(training_a.keys()))
if author in training_b.keys():
author_images_a = [np.array(im_dict["img"]) for im_dict in training_a[author]]
author_images_b = [np.array(im_dict["img"]) for im_dict in training_b[author]]
labels_a = [str(im_dict["label"]) for im_dict in training_a[author]]
labels_b = [str(im_dict["label"]) for im_dict in training_b[author]]
vis_a = np.hstack(author_images_a[:10])
vis_b = np.hstack(author_images_b[:10])
cv2.imshow("Author a", vis_a)
cv2.imshow("Author b", vis_b)
cv2.waitKey(0)
else:
print(f"Author: {author} not found in second dataset")
def show_dataset(path: str, samples: int = 10):
with open(path, 'rb') as f:
data = pickle.load(f)
summarize_dataset(data)
training = data['train']
author = training['013']
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in author]
for img in author_images:
cv2.imshow('image', img)
cv2.waitKey(0)
for author in list(training.keys()):
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]]
labels = [str(im_dict["label"]) for im_dict in training[author]]
vis = np.hstack(author_images[:samples])
print(f"Author: {author}")
cv2.destroyAllWindows()
cv2.imshow("vis", vis)
cv2.waitKey(0)
def test_transform(path: str):
with open(path, 'rb') as f:
data = pickle.load(f)
summarize_dataset(data)
training = data['train']
transform = get_transform(grayscale=True)
for author_id in training.keys():
author = training[author_id]
for image_dict in author:
original_image = image_dict['img'].convert('L')
transformed_image = transform(original_image).detach().numpy()
restored_image = (((transformed_image + 1) / 2) * 255).astype(np.uint8)
restored_image = np.squeeze(restored_image)
original_image = np.array(original_image)
wrong_pixels = (original_image != restored_image).astype(np.uint8) * 255
combined = np.hstack((restored_image, original_image, wrong_pixels))
cv2.imshow("original", original_image)
cv2.imshow("restored", restored_image)
cv2.imshow("combined", combined)
f, ax = plt.subplots(1, 2)
ax[0].hist(original_image.flatten())
ax[1].hist(restored_image.flatten())
plt.show()
cv2.waitKey(0)
def dump_words():
data_path = r"..\files\IAM-32.pickle"
p_mark = 'point'
p = '.'
with open(data_path, 'rb') as f:
data = pickle.load(f)
training = data['train']
target_folder = f"../saved_images/debug/{p_mark}"
if os.path.exists(target_folder):
shutil.rmtree(target_folder)
os.mkdir(target_folder)
count = 0
for author in list(training.keys()):
author_images = [np.array(im_dict["img"]).astype(np.uint8) for im_dict in training[author]]
labels = [str(im_dict["label"]) for im_dict in training[author]]
for img, label in zip(author_images, labels):
if p in label:
cv2.imwrite(os.path.join(target_folder, f"{count}.png"), img)
count += 1
if __name__ == "__main__":
test_transform("../files/IAM-32.pickle")
#show_dataset("../files/IAM-32.pickle")
#compare_data(r"../files/IAM-32.pickle", r"../files/_IAM-32.pickle")
|