File size: 9,818 Bytes
9ba80f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
#Added Retrain all clusters or only from new folder options
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from tensorflow.keras.models import load_model
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from joblib import dump, load
from sklearn.cluster import KMeans
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
import tensorflow as tf
# Define desired image size
img_size = (1000, 1000)
def load_images_from_folder(folder):
"""
Load and resize images from the specified folder.
:param folder: The path to the folder containing the images to load.
:return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths.
"""
images = []
image_paths = []
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
if os.path.isdir(file_path):
subfolder_images, subfolder_image_paths = load_images_from_folder(file_path)
images.extend(subfolder_images)
image_paths.extend(subfolder_image_paths)
elif filename.endswith(('.png', '.jpg', '.jpeg')):
img = cv2.imread(file_path, 0)
img = cv2.resize(img, img_size)
images.append(img)
image_paths.append(file_path)
return images, image_paths
def train_model(folder, model_file):
"""
Train a model for the specified folder and save it to the specified file.
:param folder: The path to the folder containing the training data.
:param model_file: The path to the file where the trained model will be saved.
"""
# Load and resize training data
images, image_paths = load_images_from_folder(folder)
images = np.array(images, dtype=object)
# Check if there are enough images
if len(images) > 0:
# Normalize pixel values
images = images.astype('float32') / 255.0
# Create CNN model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile CNN model using SGD optimizer from tf.keras.optimizers.legacy
opt = tf.keras.optimizers.legacy.SGD()
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
# Convert images array to float32
images = images.astype(np.float32)
# Train CNN model
try:
history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150)
# Save trained model to file
print(model_file, 'here')
model.save(model_file)
except Exception as e:
print(e)
def classify_images(folder, model_folder, n_clusters=5, new_only=False):
"""
Classify images in the specified folder using the specified model and a k-means algorithm.
:param folder: The path to the folder containing the images to classify.
:param model_folder: The path to the folder containing the trained model.
:param n_clusters: The number of clusters to form using the k-means algorithm.
:param new_only: Whether to classify only images in a subfolder named "new".
:return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
"""
# Load trained model from file
model_file = os.path.join(folder, os.path.basename(folder) + '.h5')
model = load_model(model_file)
# Load and resize images from specified folder
if new_only:
folder = os.path.join(folder, 'new')
images, image_paths = load_images_from_folder(folder)
images = np.array(images, dtype=object)
# Normalize pixel values
images = images.astype('float32') / 255.0
# Obtain classification scores for each image
scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200)
# Use k-means algorithm to cluster images based on their classification scores
if len(scores) >= n_clusters:
kmeans = KMeans(n_clusters=n_clusters, n_init=20)
kmeans.fit(scores)
# Create 2D list of image file paths, where each inner list corresponds to a cluster
clusters = [[] for _ in range(n_clusters)]
for i, label in enumerate(kmeans.labels_):
clusters[label].append(image_paths[i])
else:
clusters = [image_paths]
# Return 2D list of image file paths
return clusters
def remove_empty_folders_recursively(directory):
"""
Remove and delete empty folders in the specified directory and all of its subdirectories.
:param directory: The path to the directory to remove empty folders from.
"""
for folder in os.listdir(directory):
folder_path = os.path.join(directory, folder)
if os.path.isdir(folder_path):
# Recursively remove empty subfolders
remove_empty_folders_recursively(folder_path)
# Remove folder if it is empty
if not os.listdir(folder_path):
os.rmdir(folder_path)
def train_model_recursively(folder, model_folder, max_depth=None, depth=0):
"""
Train a model for the specified folder and its subdirectories and save it to the specified file.
:param folder: The path to the folder containing the training data.
:param model_folder: The path to the folder where the trained models will be saved.
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
:param depth: The current depth of recursion.
"""
# Train model for current folder
model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5')
train_model(folder, model_file)
# Recursively train models for subdirectories
if max_depth is None or depth < max_depth:
for subfolder in os.listdir(folder):
subfolder_path = os.path.join(folder, subfolder)
if os.path.isdir(subfolder_path):
model_folder = subfolder_path
print(model_folder,subfolder_path)
#print(subfolder_path,folder,subfolder,model_folder)
train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1)
def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0):
"""
Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm.
:param folder: The path to the folder containing the images to classify.
:param model_folder: The path to the folder containing the trained models.
:param n_clusters: The number of clusters to form using the k-means algorithm.
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed.
:param depth: The current depth of recursion.
:return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster.
"""
# Classify images in current folder
clusters = classify_images(folder, model_folder, n_clusters)
result = {folder: clusters}
# Recursively classify images in subdirectories
if max_depth is None or depth < max_depth:
for subfolder in os.listdir(folder):
subfolder_path = os.path.join(folder, subfolder)
if os.path.isdir(subfolder_path):
result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1))
# Return result
return result
def main():
# Train models for textcv and buttoncv folders and their subdirectories
train_model_recursively('textcv', 'textcv')
train_model_recursively('buttoncv', 'buttoncv')
# Check for and remove empty subfolders
remove_empty_folders_recursively('textcv')
remove_empty_folders_recursively('buttoncv')
# Classify images in textcv and buttoncv folders and their subdirectories
text_clusters = classify_images_recursively('textcv', 'models')
button_clusters = classify_images_recursively('buttoncv', 'models')
try:
# Move images in textcv clusters to new folders
for folder, clusters in text_clusters.items():
for i, cluster in enumerate(clusters):
cluster_folder = os.path.join(folder, f'cluster_{i}')
os.makedirs(cluster_folder, exist_ok=True)
for image_path in cluster:
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
os.rename(image_path, new_image_path)
except Exception as e:
print(e)
try:
# Move images in buttoncv clusters to new folders
for folder, clusters in button_clusters.items():
for i, cluster in enumerate(clusters):
cluster_folder = os.path.join(folder, f'cluster_{i}')
os.makedirs(cluster_folder, exist_ok=True)
for image_path in cluster:
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path))
os.rename(image_path, new_image_path)
except Exception as e:
print(e)
if __name__ == '__main__':
main()
|