File size: 3,559 Bytes
b743670 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
#the goal of this script is to train the model and then save it
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import shutil
from PIL import Image
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
import cv2
#import seaborn as sns
import numpy as np
import pickle
def clean_directory(directory, cache_file="cache.pkl"):
if os.path.exists(cache_file):
with open(cache_file, "rb") as f:
num_classes = pickle.load(f)
print("Loaded cached results.")
return num_classes
num_classes = 0
for subdir, dirs, files in os.walk(directory):
if not dirs:
num_classes += 1
valid_files = []
for file in files:
file_path = os.path.join(subdir, file)
try:
img = Image.open(file_path)
img.verify() # Verify if the image is not corrupted
valid_files.append(file)
except (IOError, SyntaxError) as e:
print(f"Removing corrupted file: {file_path}")
os.remove(file_path)
# Remove empty directories
if not valid_files:
print(f"Removing empty directory: {subdir}")
shutil.rmtree(subdir)
num_classes -= 1
# Save the results in cache
with open(cache_file, "wb") as f:
pickle.dump(num_classes, f)
print("Saved results to cache.")
return num_classes
data_dir = 'Malign/extract'
num_classes = clean_directory(data_dir)
# Parameters
batch_size = 32
epochs = 50
image_size = (200, 200) # Set the desired image size for input to the model
# Data preprocessing
train_datagen = ImageDataGenerator(
rescale=1./255,
validation_split=0.2 # Split 20% of data for validation
)
train_generator = train_datagen.flow_from_directory(
data_dir,
target_size=image_size,
batch_size=batch_size,
class_mode='categorical',
subset='training'
)
validation_generator = train_datagen.flow_from_directory(
data_dir,
target_size=image_size,
batch_size=batch_size,
class_mode='categorical',
subset='validation'
)
# Model creation
model = Sequential()
# First convolution layer
model.add(Conv2D(64, (3, 3), input_shape=(*image_size, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Second convolution layer
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Third convolution layer
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Fully connected layers
model.add(Flatten())
model.add(Dense(128))
model.add(Dropout(0.5))
model.add(Activation('relu'))
# Output layer
model.add(Dense(119))
model.add(Activation('softmax'))
model.summary()
model.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)
# Model training
history = model.fit(
train_generator,
epochs=epochs,
validation_data=validation_generator
)
# Save the trained model
model.save("malware_classifier_lime.h5")
|