|
|
|
import os |
|
import cv2 |
|
import numpy as np |
|
from sklearn.cluster import KMeans |
|
from tensorflow.keras.models import load_model |
|
from sklearn.svm import SVC |
|
from sklearn.model_selection import train_test_split |
|
from joblib import dump, load |
|
from sklearn.cluster import KMeans |
|
from keras.models import Sequential |
|
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten |
|
import tensorflow as tf |
|
|
|
|
|
img_size = (1000, 1000) |
|
|
|
def load_images_from_folder(folder): |
|
""" |
|
Load and resize images from the specified folder. |
|
|
|
:param folder: The path to the folder containing the images to load. |
|
:return: A tuple containing a list of loaded and resized images and a list of their corresponding file paths. |
|
""" |
|
images = [] |
|
image_paths = [] |
|
for filename in os.listdir(folder): |
|
file_path = os.path.join(folder, filename) |
|
if os.path.isdir(file_path): |
|
subfolder_images, subfolder_image_paths = load_images_from_folder(file_path) |
|
images.extend(subfolder_images) |
|
image_paths.extend(subfolder_image_paths) |
|
elif filename.endswith(('.png', '.jpg', '.jpeg')): |
|
img = cv2.imread(file_path, 0) |
|
img = cv2.resize(img, img_size) |
|
images.append(img) |
|
image_paths.append(file_path) |
|
return images, image_paths |
|
|
|
def train_model(folder, model_file): |
|
""" |
|
Train a model for the specified folder and save it to the specified file. |
|
|
|
:param folder: The path to the folder containing the training data. |
|
:param model_file: The path to the file where the trained model will be saved. |
|
""" |
|
|
|
images, image_paths = load_images_from_folder(folder) |
|
images = np.array(images, dtype=object) |
|
|
|
|
|
if len(images) > 0: |
|
|
|
images = images.astype('float32') / 255.0 |
|
|
|
|
|
model = Sequential() |
|
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 1))) |
|
model.add(MaxPooling2D((2, 2))) |
|
model.add(Conv2D(64, (3, 3), activation='relu')) |
|
model.add(MaxPooling2D((2, 2))) |
|
model.add(Conv2D(64, (3, 3), activation='relu')) |
|
model.add(Flatten()) |
|
model.add(Dense(64, activation='relu')) |
|
model.add(Dense(1, activation='sigmoid')) |
|
|
|
|
|
opt = tf.keras.optimizers.legacy.SGD() |
|
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) |
|
|
|
|
|
images = images.astype(np.float32) |
|
|
|
|
|
try: |
|
history = model.fit(images.reshape(len(images), img_size[0], img_size[1], 1), np.ones(len(images)), epochs=2, batch_size=150) |
|
|
|
print(model_file, 'here') |
|
model.save(model_file) |
|
except Exception as e: |
|
print(e) |
|
|
|
|
|
|
|
|
|
def classify_images(folder, model_folder, n_clusters=5, new_only=False): |
|
""" |
|
Classify images in the specified folder using the specified model and a k-means algorithm. |
|
|
|
:param folder: The path to the folder containing the images to classify. |
|
:param model_folder: The path to the folder containing the trained model. |
|
:param n_clusters: The number of clusters to form using the k-means algorithm. |
|
:param new_only: Whether to classify only images in a subfolder named "new". |
|
:return: A 2D list of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. |
|
""" |
|
|
|
model_file = os.path.join(folder, os.path.basename(folder) + '.h5') |
|
model = load_model(model_file) |
|
|
|
|
|
if new_only: |
|
folder = os.path.join(folder, 'new') |
|
images, image_paths = load_images_from_folder(folder) |
|
images = np.array(images, dtype=object) |
|
|
|
|
|
images = images.astype('float32') / 255.0 |
|
|
|
|
|
scores = model.predict(images.reshape(len(images), img_size[0], img_size[1], 1), batch_size=200) |
|
|
|
|
|
if len(scores) >= n_clusters: |
|
kmeans = KMeans(n_clusters=n_clusters, n_init=20) |
|
kmeans.fit(scores) |
|
|
|
|
|
clusters = [[] for _ in range(n_clusters)] |
|
for i, label in enumerate(kmeans.labels_): |
|
clusters[label].append(image_paths[i]) |
|
else: |
|
clusters = [image_paths] |
|
|
|
|
|
return clusters |
|
|
|
|
|
|
|
|
|
def remove_empty_folders_recursively(directory): |
|
""" |
|
Remove and delete empty folders in the specified directory and all of its subdirectories. |
|
|
|
:param directory: The path to the directory to remove empty folders from. |
|
""" |
|
for folder in os.listdir(directory): |
|
folder_path = os.path.join(directory, folder) |
|
if os.path.isdir(folder_path): |
|
|
|
remove_empty_folders_recursively(folder_path) |
|
|
|
if not os.listdir(folder_path): |
|
os.rmdir(folder_path) |
|
|
|
def train_model_recursively(folder, model_folder, max_depth=None, depth=0): |
|
""" |
|
Train a model for the specified folder and its subdirectories and save it to the specified file. |
|
|
|
:param folder: The path to the folder containing the training data. |
|
:param model_folder: The path to the folder where the trained models will be saved. |
|
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. |
|
:param depth: The current depth of recursion. |
|
""" |
|
|
|
model_file = os.path.join(model_folder, os.path.basename(folder) + '.h5') |
|
train_model(folder, model_file) |
|
|
|
|
|
if max_depth is None or depth < max_depth: |
|
for subfolder in os.listdir(folder): |
|
subfolder_path = os.path.join(folder, subfolder) |
|
if os.path.isdir(subfolder_path): |
|
model_folder = subfolder_path |
|
print(model_folder,subfolder_path) |
|
|
|
train_model_recursively(subfolder_path, model_folder, max_depth, depth + 1) |
|
|
|
|
|
def classify_images_recursively(folder, model_folder, n_clusters=5, max_depth=None, depth=0): |
|
""" |
|
Classify images in the specified folder and its subdirectories using the specified model and a k-means algorithm. |
|
|
|
:param folder: The path to the folder containing the images to classify. |
|
:param model_folder: The path to the folder containing the trained models. |
|
:param n_clusters: The number of clusters to form using the k-means algorithm. |
|
:param max_depth: The maximum depth of recursion. If None, recursion will continue until all subdirectories have been processed. |
|
:param depth: The current depth of recursion. |
|
:return: A dictionary where the keys are folder paths and the values are 2D lists of image file paths, where each inner list corresponds to a cluster and contains the file paths of the images assigned to that cluster. |
|
""" |
|
|
|
clusters = classify_images(folder, model_folder, n_clusters) |
|
result = {folder: clusters} |
|
|
|
|
|
if max_depth is None or depth < max_depth: |
|
for subfolder in os.listdir(folder): |
|
subfolder_path = os.path.join(folder, subfolder) |
|
if os.path.isdir(subfolder_path): |
|
result.update(classify_images_recursively(subfolder_path, model_folder, n_clusters, max_depth, depth + 1)) |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
def main(): |
|
|
|
train_model_recursively('textcv', 'textcv') |
|
train_model_recursively('buttoncv', 'buttoncv') |
|
|
|
|
|
remove_empty_folders_recursively('textcv') |
|
remove_empty_folders_recursively('buttoncv') |
|
|
|
|
|
text_clusters = classify_images_recursively('textcv', 'models') |
|
button_clusters = classify_images_recursively('buttoncv', 'models') |
|
try: |
|
|
|
for folder, clusters in text_clusters.items(): |
|
for i, cluster in enumerate(clusters): |
|
cluster_folder = os.path.join(folder, f'cluster_{i}') |
|
os.makedirs(cluster_folder, exist_ok=True) |
|
for image_path in cluster: |
|
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) |
|
os.rename(image_path, new_image_path) |
|
|
|
except Exception as e: |
|
print(e) |
|
try: |
|
|
|
for folder, clusters in button_clusters.items(): |
|
for i, cluster in enumerate(clusters): |
|
cluster_folder = os.path.join(folder, f'cluster_{i}') |
|
os.makedirs(cluster_folder, exist_ok=True) |
|
for image_path in cluster: |
|
new_image_path = os.path.join(cluster_folder, os.path.basename(image_path)) |
|
os.rename(image_path, new_image_path) |
|
except Exception as e: |
|
print(e) |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|
|
|