File size: 9,818 Bytes
9ba80f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#Added Retrain all clusters or only from new folder options
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from tensorflow.keras.models import load_model
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from joblib import dump, load
from sklearn.cluster import KMeans
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
import tensorflow as tf

# Define desired image size
img_size = (1000, 1000)

def load_images_from_folder(folder):
    """
    Load and resize images from the specified folder.

    :param folder: The path to the folder containing the images to load.
    :return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths.
    """
    images = []
    image_paths = []
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        if os.path.isdir(file_path):
            subfolder_images, subfolder_image_paths = load_images_from_folder(file_path)
            images.extend(subfolder_images)
            image_paths.extend(subfolder_image_paths)
        elif filename.endswith(('.png', '.jpg', '.jpeg')):
            img = cv2.imread(file_path, 0)
            img = cv2.resize(img, img_size)
            images.append(img)
            image_paths.append(file_path)
    return images, image_paths

def train_model(folder, model_file):
    """
    Train a model for the specified folder and save it to the specified file.

    :param folder: The path to the folder containing the training data.
    :param model_file: The path to the file where the trained model will be saved.
    """
    # Load and resize training data
    images, image_paths = load_images_from_folder(folder)
    images = np.array(images, dtype=object)

    # Check if there are enough images
    if len(images) > 0:
        # Normalize pixel values
        images = images.astype('float32') / 255.0

        # Create CNN model
        model = Sequential()
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1)))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        # Compile CNN model using SGD optimizer from tf.keras.optimizers.legacy
        opt = tf.keras.optimizers.legacy.SGD()
        model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

        # Convert images array to float32
        images = images.astype(np.float32)

        # Train CNN model
        try:
            history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150)
            # Save trained model to file
            print(model_file, 'here')
            model.save(model_file)
        except Exception as e:
            print(e)




def classify_images(folder, model_folder, n_clusters=5, new_only=False):
    """
    Classify images in the specified folder using the specified model and a k-means algorithm.

    :param folder: The path to the folder containing the images to classify.
    :param model_folder: The path to the folder containing the trained model.
    :param n_clusters: The number of clusters to form using the k-means algorithm.
    :param new_only: Whether to classify only images in a subfolder named "new".
    :return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
    """
    # Load trained model from file
    model_file = os.path.join(folder, os.path.basename(folder) + '.h5')
    model = load_model(model_file)

    # Load and resize images from specified folder
    if new_only:
        folder = os.path.join(folder, 'new')
    images, image_paths = load_images_from_folder(folder)
    images = np.array(images, dtype=object)

    # Normalize pixel values
    images = images.astype('float32') / 255.0

    # Obtain classification scores for each image
    scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200)

    # Use k-means algorithm to cluster images based on their classification scores
    if len(scores) >= n_clusters:
        kmeans = KMeans(n_clusters=n_clusters, n_init=20)
        kmeans.fit(scores)

        # Create 2D list of image file paths, where each inner list corresponds to a cluster
        clusters = [[] for _ in range(n_clusters)]
        for i, label in enumerate(kmeans.labels_):
            clusters[label].append(image_paths[i])
    else:
        clusters = [image_paths]

    # Return 2D list of image file paths
    return clusters




def remove_empty_folders_recursively(directory):
    """
    Remove and delete empty folders in the specified directory and all of its subdirectories.

    :param directory: The path to the directory to remove empty folders from.
    """
    for folder in os.listdir(directory):
        folder_path = os.path.join(directory, folder)
        if os.path.isdir(folder_path):
            # Recursively remove empty subfolders
            remove_empty_folders_recursively(folder_path)
            # Remove folder if it is empty
            if not os.listdir(folder_path):
                os.rmdir(folder_path)

def train_model_recursively(folder, model_folder, max_depth=None, depth=0):
    """
    Train a model for the specified folder and its subdirectories and save it to the specified file.

    :param folder: The path to the folder containing the training data.
    :param model_folder: The path to the folder where the trained models will be saved.
    :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
    :param depth: The current depth of recursion.
    """
    # Train model for current folder
    model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5')
    train_model(folder, model_file)

    # Recursively train models for subdirectories
    if max_depth is None or depth < max_depth:
        for subfolder in os.listdir(folder):
            subfolder_path = os.path.join(folder, subfolder)
            if os.path.isdir(subfolder_path):
                model_folder = subfolder_path
                print(model_folder,subfolder_path)
                #print(subfolder_path,folder,subfolder,model_folder)
                train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1)


def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0):
    """
    Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm.

    :param folder: The path to the folder containing the images to classify.
    :param model_folder: The path to the folder containing the trained models.
    :param n_clusters: The number of clusters to form using the k-means algorithm.
    :param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
    :param depth: The current depth of recursion.
    :return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
    """
    # Classify images in current folder
    clusters = classify_images(folder, model_folder, n_clusters)
    result = {folder: clusters}

    # Recursively classify images in subdirectories
    if max_depth is None or depth < max_depth:
        for subfolder in os.listdir(folder):
            subfolder_path = os.path.join(folder, subfolder)
            if os.path.isdir(subfolder_path):
                result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1))

    # Return result
    return result



def main():
    # Train models for textcv and buttoncv folders and their subdirectories
    train_model_recursively('textcv', 'textcv')
    train_model_recursively('buttoncv', 'buttoncv')

    # Check for and remove empty subfolders
    remove_empty_folders_recursively('textcv')
    remove_empty_folders_recursively('buttoncv')

    # Classify images in textcv and buttoncv folders and their subdirectories
    text_clusters = classify_images_recursively('textcv', 'models')
    button_clusters = classify_images_recursively('buttoncv', 'models')
    try:
        # Move images in textcv clusters to new folders
        for folder, clusters in text_clusters.items():
            for i, cluster in enumerate(clusters):
                cluster_folder = os.path.join(folder, f'cluster_{i}')
                os.makedirs(cluster_folder, exist_ok=True)
                for image_path in cluster:
                    new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
                    os.rename(image_path, new_image_path)

    except Exception as e:
        print(e)
    try:
        # Move images in buttoncv clusters to new folders
        for folder, clusters in button_clusters.items():
            for i, cluster in enumerate(clusters):
                cluster_folder = os.path.join(folder, f'cluster_{i}')
                os.makedirs(cluster_folder, exist_ok=True)
                for image_path in cluster:
                    new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
                    os.rename(image_path, new_image_path)
    except Exception as e:
        print(e)




if __name__ == '__main__':
    main()