|
|
|
import os |
|
import tensorflow as tf |
|
from tensorflow.keras.preprocessing.image import ImageDataGenerator |
|
from tensorflow.keras.models import Sequential |
|
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout |
|
from tensorflow.keras.optimizers import Adam |
|
from tensorflow.keras.models import load_model |
|
from tensorflow.keras.preprocessing.image import load_img, img_to_array |
|
import shutil |
|
from PIL import Image |
|
from tensorflow.keras.preprocessing.image import load_img, img_to_array |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
import numpy as np |
|
import pickle |
|
|
|
def clean_directory(directory, cache_file="cache.pkl"): |
|
if os.path.exists(cache_file): |
|
with open(cache_file, "rb") as f: |
|
num_classes = pickle.load(f) |
|
print("Loaded cached results.") |
|
return num_classes |
|
|
|
num_classes = 0 |
|
for subdir, dirs, files in os.walk(directory): |
|
if not dirs: |
|
num_classes += 1 |
|
valid_files = [] |
|
for file in files: |
|
file_path = os.path.join(subdir, file) |
|
try: |
|
img = Image.open(file_path) |
|
img.verify() |
|
valid_files.append(file) |
|
except (IOError, SyntaxError) as e: |
|
print(f"Removing corrupted file: {file_path}") |
|
os.remove(file_path) |
|
|
|
|
|
if not valid_files: |
|
print(f"Removing empty directory: {subdir}") |
|
shutil.rmtree(subdir) |
|
num_classes -= 1 |
|
|
|
|
|
with open(cache_file, "wb") as f: |
|
pickle.dump(num_classes, f) |
|
print("Saved results to cache.") |
|
|
|
return num_classes |
|
|
|
data_dir = 'Malign/extract' |
|
|
|
num_classes = clean_directory(data_dir) |
|
|
|
|
|
batch_size = 32 |
|
epochs = 50 |
|
image_size = (200, 200) |
|
|
|
train_datagen = ImageDataGenerator( |
|
rescale=1./255, |
|
validation_split=0.2 |
|
) |
|
|
|
train_generator = train_datagen.flow_from_directory( |
|
data_dir, |
|
target_size=image_size, |
|
batch_size=batch_size, |
|
class_mode='categorical', |
|
subset='training' |
|
) |
|
|
|
validation_generator = train_datagen.flow_from_directory( |
|
data_dir, |
|
target_size=image_size, |
|
batch_size=batch_size, |
|
class_mode='categorical', |
|
subset='validation' |
|
) |
|
|
|
|
|
model = Sequential() |
|
|
|
|
|
model.add(Conv2D(64, (3, 3), input_shape=(*image_size, 3))) |
|
model.add(Activation('relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
|
|
|
model.add(Conv2D(64, (3, 3))) |
|
model.add(Activation('relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
|
|
|
model.add(Conv2D(64, (3, 3))) |
|
model.add(Activation('relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 2))) |
|
|
|
|
|
model.add(Flatten()) |
|
model.add(Dense(128)) |
|
model.add(Dropout(0.5)) |
|
model.add(Activation('relu')) |
|
|
|
|
|
model.add(Dense(119)) |
|
model.add(Activation('softmax')) |
|
|
|
model.summary() |
|
|
|
model.compile( |
|
optimizer=Adam(learning_rate=0.001), |
|
loss='categorical_crossentropy', |
|
metrics=['accuracy'] |
|
) |
|
|
|
|
|
history = model.fit( |
|
train_generator, |
|
epochs=epochs, |
|
validation_data=validation_generator |
|
) |
|
|
|
|
|
model.save("malware_classifier_lime.h5") |
|
|
|
|
|
|
|
|