import os |
import cv2 |
import numpy as np |
from sklearn.cluster import KMeans |
from tensorflow.keras.models import load_model |
from sklearn.svm import SVC |
from sklearn.model_selection import train_test_split |
from joblib import dump, load |
from sklearn.cluster import KMeans |
from keras.models import Sequential |
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten |
import tensorflow as tf |
img_size = (1000, 1000) |
def load_images_from_folder(folder): |
""" |
Load and resize images from the specified folder. |
:param folder: The path to the folder containing the images to load. |
:return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths. |
""" |
images = [] |
image_paths = [] |
for filename in os.listdir(folder): |
file_path = os.path.join(folder, filename) |
if os.path.isdir(file_path): |
subfolder_images, subfolder_image_paths = load_images_from_folder(file_path) |
images.extend(subfolder_images) |
image_paths.extend(subfolder_image_paths) |
elif filename.endswith(('.png', '.jpg', '.jpeg')): |
img = cv2.imread(file_path, 0) |
img = cv2.resize(img, img_size) |
images.append(img) |
image_paths.append(file_path) |
return images, image_paths |
def train_model(folder, model_file): |
""" |
Train a model for the specified folder and save it to the specified file. |
:param folder: The path to the folder containing the training data. |
:param model_file: The path to the file where the trained model will be saved. |
""" |
images, image_paths = load_images_from_folder(folder) |
images = np.array(images, dtype=object) |
if len(images) > 0: |
images = images.astype('float32') / 255.0 |
model = Sequential() |
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1))) |
model.add(MaxPooling2D((2, 2))) |
model.add(Conv2D(64, (3, 3), activation='relu')) |
model.add(MaxPooling2D((2, 2))) |
model.add(Conv2D(64, (3, 3), activation='relu')) |
model.add(Flatten()) |
model.add(Dense(64, activation='relu')) |
model.add(Dense(1, activation='sigmoid')) |
opt = tf.keras.optimizers.legacy.SGD() |
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) |
images = images.astype(np.float32) |
try: |
history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150) |
print(model_file, 'here') |
model.save(model_file) |
except Exception as e: |
print(e) |
def classify_images(folder, model_folder, n_clusters=5, new_only=False): |
""" |
Classify images in the specified folder using the specified model and a k-means algorithm. |
:param folder: The path to the folder containing the images to classify. |
:param model_folder: The path to the folder containing the trained model. |
:param n_clusters: The number of clusters to form using the k-means algorithm. |
:param new_only: Whether to classify only images in a subfolder named "new". |
:return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. |
""" |
model_file = os.path.join(folder, os.path.basename(folder) + '.h5') |
model = load_model(model_file) |
if new_only: |
folder = os.path.join(folder, 'new') |
images, image_paths = load_images_from_folder(folder) |
images = np.array(images, dtype=object) |
images = images.astype('float32') / 255.0 |
scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200) |
if len(scores) >= n_clusters: |
kmeans = KMeans(n_clusters=n_clusters, n_init=20) |
kmeans.fit(scores) |
clusters = [[] for _ in range(n_clusters)] |
for i, label in enumerate(kmeans.labels_): |
clusters[label].append(image_paths[i]) |
else: |
clusters = [image_paths] |
return clusters |
def remove_empty_folders_recursively(directory): |
""" |
Remove and delete empty folders in the specified directory and all of its subdirectories. |
:param directory: The path to the directory to remove empty folders from. |
""" |
for folder in os.listdir(directory): |
folder_path = os.path.join(directory, folder) |
if os.path.isdir(folder_path): |
remove_empty_folders_recursively(folder_path) |
if not os.listdir(folder_path): |
os.rmdir(folder_path) |
def train_model_recursively(folder, model_folder, max_depth=None, depth=0): |
""" |
Train a model for the specified folder and its subdirectories and save it to the specified file. |
:param folder: The path to the folder containing the training data. |
:param model_folder: The path to the folder where the trained models will be saved. |
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. |
:param depth: The current depth of recursion. |
""" |
model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5') |
train_model(folder, model_file) |
if max_depth is None or depth < max_depth: |
for subfolder in os.listdir(folder): |
subfolder_path = os.path.join(folder, subfolder) |
if os.path.isdir(subfolder_path): |
model_folder = subfolder_path |
print(model_folder,subfolder_path) |
train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1) |
def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0): |
""" |
Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm. |
:param folder: The path to the folder containing the images to classify. |
:param model_folder: The path to the folder containing the trained models. |
:param n_clusters: The number of clusters to form using the k-means algorithm. |
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. |
:param depth: The current depth of recursion. |
:return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. |
""" |
clusters = classify_images(folder, model_folder, n_clusters) |
result = {folder: clusters} |
if max_depth is None or depth < max_depth: |
for subfolder in os.listdir(folder): |
subfolder_path = os.path.join(folder, subfolder) |
if os.path.isdir(subfolder_path): |
result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1)) |
return result |
def main(): |
train_model_recursively('textcv', 'textcv') |
train_model_recursively('buttoncv', 'buttoncv') |
remove_empty_folders_recursively('textcv') |
remove_empty_folders_recursively('buttoncv') |
text_clusters = classify_images_recursively('textcv', 'models') |
button_clusters = classify_images_recursively('buttoncv', 'models') |
try: |
for folder, clusters in text_clusters.items(): |
for i, cluster in enumerate(clusters): |
cluster_folder = os.path.join(folder, f'cluster_{i}') |
os.makedirs(cluster_folder, exist_ok=True) |
for image_path in cluster: |
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) |
os.rename(image_path, new_image_path) |
except Exception as e: |
print(e) |
try: |
for folder, clusters in button_clusters.items(): |
for i, cluster in enumerate(clusters): |
cluster_folder = os.path.join(folder, f'cluster_{i}') |
os.makedirs(cluster_folder, exist_ok=True) |
for image_path in cluster: |
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) |
os.rename(image_path, new_image_path) |
except Exception as e: |
print(e) |
if __name__ == '__main__': |
main() |