File size: 7,513 Bytes
e728bee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import os
import shutil
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import random
# Paths to dataset directories
data_dir = "C:/Users/ramya/OneDrive - iiit-b/Desktop/data_deepfake/Dataset/"
train_dir = os.path.join(data_dir, "Train")
val_dir = os.path.join(data_dir, "Validation")
temp_train_dir = os.path.join(data_dir, "Temp_Train")
temp_val_dir = os.path.join(data_dir, "Temp_Validation")
# Image dimensions
img_height, img_width = 128, 128
# Limit the number of images for training and validation
max_images_per_class = 12000 # Adjust as needed
def count_images(directory):
"""Count the number of real and fake images in a directory."""
real_count = len(os.listdir(os.path.join(directory, 'Real')))
fake_count = len(os.listdir(os.path.join(directory, 'Fake')))
return real_count, fake_count
def prepare_limited_dataset(source_dir, target_dir, max_images):
"""Create a temporary dataset with a limited number of images per class."""
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
os.makedirs(os.path.join(target_dir, 'Real'), exist_ok=True)
os.makedirs(os.path.join(target_dir, 'Fake'), exist_ok=True)
for class_dir in ['Real', 'Fake']:
class_path = os.path.join(source_dir, class_dir)
target_class_path = os.path.join(target_dir, class_dir)
all_images = os.listdir(class_path)
random.shuffle(all_images)
selected_images = all_images[:max_images]
for image_name in selected_images:
shutil.copy(os.path.join(class_path, image_name), target_class_path)
def get_processed_images_info(generator):
"""Calculate information about processed images from a generator."""
n_samples = generator.n
n_classes = len(generator.class_indices)
batch_size = generator.batch_size
steps_per_epoch = int(np.ceil(n_samples / batch_size))
class_distribution = {}
for class_name, class_index in generator.class_indices.items():
class_count = len(generator.classes[generator.classes == class_index])
class_distribution[class_name] = class_count
return {
'total_samples': n_samples,
'batch_size': batch_size,
'steps_per_epoch': steps_per_epoch,
'class_distribution': class_distribution
}
# Print initial image counts
print("\nInitial image counts:")
train_real, train_fake = count_images(train_dir)
val_real, val_fake = count_images(val_dir)
print(f"Training - Real: {train_real}, Fake: {train_fake}")
print(f"Validation - Real: {val_real}, Fake: {val_fake}")
# Prepare temporary directories with limited images
prepare_limited_dataset(train_dir, temp_train_dir, max_images_per_class)
prepare_limited_dataset(val_dir, temp_val_dir, max_images_per_class)
# Print filtered image counts
print("\nAfter filtering:")
train_real, train_fake = count_images(temp_train_dir)
val_real, val_fake = count_images(temp_val_dir)
print(f"Training - Real: {train_real}, Fake: {train_fake}")
print(f"Validation - Real: {val_real}, Fake: {val_fake}")
# Data generators for training and validation
datagen = ImageDataGenerator(rescale=1./255)
train_gen = datagen.flow_from_directory(
temp_train_dir,
target_size=(img_height, img_width),
batch_size=32,
class_mode='binary',
classes=['Real', 'Fake']
)
val_gen = datagen.flow_from_directory(
temp_val_dir,
target_size=(img_height, img_width),
batch_size=32,
class_mode='binary',
classes=['Real', 'Fake']
)
# Get training and validation information
train_info = get_processed_images_info(train_gen)
val_info = get_processed_images_info(val_gen)
print("\nTraining Data Processing Info:")
print(f"Total training samples: {train_info['total_samples']}")
print(f"Batch size: {train_info['batch_size']}")
print(f"Steps per epoch: {train_info['steps_per_epoch']}")
print("\nClass distribution in training:")
for class_name, count in train_info['class_distribution'].items():
print(f"{class_name}: {count} images")
print("\nValidation Data Processing Info:")
print(f"Total validation samples: {val_info['total_samples']}")
print(f"Batch size: {val_info['batch_size']}")
print(f"Steps per epoch: {val_info['steps_per_epoch']}")
print("\nClass distribution in validation:")
for class_name, count in val_info['class_distribution'].items():
print(f"{class_name}: {count} images")
# Define the CNN model
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(128, activation='relu'),
Dropout(0.5),
Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(
train_gen,
steps_per_epoch=train_info['steps_per_epoch'],
validation_data=val_gen,
validation_steps=val_info['steps_per_epoch'],
epochs=10
)
# Calculate total images processed
total_training_images_processed = train_info['total_samples'] * 10 # 10 epochs
total_validation_images_processed = val_info['total_samples'] * 10 # 10 epochs
print(f"\nTotal images processed during training: {total_training_images_processed}")
print(f"Total images processed during validation: {total_validation_images_processed}")
print(f"Combined total processed: {total_training_images_processed + total_validation_images_processed}")
# Save the model
model.save("deepfake_detector.h5")
# Functions for prediction
def predict_image(img_path):
"""Predict whether a single image is real or fake."""
img = image.load_img(img_path, target_size=(img_height, img_width))
img_array = image.img_to_array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0)
prediction = model.predict(img_array)
return "Fake" if prediction[0][0] > 0.5 else "Real"
def predict_video(video_path):
"""Predict whether a video is real or fake by analyzing frames."""
cap = cv2.VideoCapture(video_path)
fake_count, real_count = 0, 0
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Preprocess the frame
frame_resized = cv2.resize(frame, (img_height, img_width))
frame_array = np.array(frame_resized) / 255.0
frame_array = np.expand_dims(frame_array, axis=0)
# Predict
prediction = model.predict(frame_array)
if prediction[0][0] > 0.5:
fake_count += 1
else:
real_count += 1
cap.release()
return "Fake" if fake_count > real_count else "Real"
# Example usage
if __name__ == "__main__":
# Test an image
test_image_path = "C:/Users/ramya/OneDrive - iiit-b/Desktop/test1.jpg"
if os.path.exists(test_image_path):
image_result = predict_image(test_image_path)
print(f"\nTest image prediction: {image_result}")
# Test a video (uncomment and modify path as needed)
# test_video_path = "example_video.mp4"
# if os.path.exists(test_video_path):
# video_result = predict_video(test_video_path)
# print(f"Test video prediction: {video_result}")
|