|
import os |
|
import shutil |
|
import cv2 |
|
import numpy as np |
|
from tensorflow.keras.models import Sequential |
|
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout |
|
from tensorflow.keras.preprocessing.image import ImageDataGenerator |
|
from tensorflow.keras.models import load_model |
|
from tensorflow.keras.preprocessing import image |
|
import random |
|
|
|
|
|
data_dir = "C:/Users/ramya/OneDrive - iiit-b/Desktop/data_deepfake/Dataset/" |
|
train_dir = os.path.join(data_dir, "Train") |
|
val_dir = os.path.join(data_dir, "Validation") |
|
temp_train_dir = os.path.join(data_dir, "Temp_Train") |
|
temp_val_dir = os.path.join(data_dir, "Temp_Validation") |
|
|
|
|
|
img_height, img_width = 128, 128 |
|
|
|
|
|
max_images_per_class = 12000 |
|
|
|
def count_images(directory): |
|
"""Count the number of real and fake images in a directory.""" |
|
real_count = len(os.listdir(os.path.join(directory, 'Real'))) |
|
fake_count = len(os.listdir(os.path.join(directory, 'Fake'))) |
|
return real_count, fake_count |
|
|
|
def prepare_limited_dataset(source_dir, target_dir, max_images): |
|
"""Create a temporary dataset with a limited number of images per class.""" |
|
if os.path.exists(target_dir): |
|
shutil.rmtree(target_dir) |
|
os.makedirs(os.path.join(target_dir, 'Real'), exist_ok=True) |
|
os.makedirs(os.path.join(target_dir, 'Fake'), exist_ok=True) |
|
|
|
for class_dir in ['Real', 'Fake']: |
|
class_path = os.path.join(source_dir, class_dir) |
|
target_class_path = os.path.join(target_dir, class_dir) |
|
all_images = os.listdir(class_path) |
|
random.shuffle(all_images) |
|
selected_images = all_images[:max_images] |
|
|
|
for image_name in selected_images: |
|
shutil.copy(os.path.join(class_path, image_name), target_class_path) |
|
|
|
def get_processed_images_info(generator): |
|
"""Calculate information about processed images from a generator.""" |
|
n_samples = generator.n |
|
n_classes = len(generator.class_indices) |
|
batch_size = generator.batch_size |
|
steps_per_epoch = int(np.ceil(n_samples / batch_size)) |
|
|
|
class_distribution = {} |
|
for class_name, class_index in generator.class_indices.items(): |
|
class_count = len(generator.classes[generator.classes == class_index]) |
|
class_distribution[class_name] = class_count |
|
|
|
return { |
|
'total_samples': n_samples, |
|
'batch_size': batch_size, |
|
'steps_per_epoch': steps_per_epoch, |
|
'class_distribution': class_distribution |
|
} |
|
|
|
|
|
print("\nInitial image counts:") |
|
train_real, train_fake = count_images(train_dir) |
|
val_real, val_fake = count_images(val_dir) |
|
print(f"Training - Real: {train_real}, Fake: {train_fake}") |
|
print(f"Validation - Real: {val_real}, Fake: {val_fake}") |
|
|
|
|
|
prepare_limited_dataset(train_dir, temp_train_dir, max_images_per_class) |
|
prepare_limited_dataset(val_dir, temp_val_dir, max_images_per_class) |
|
|
|
|
|
print("\nAfter filtering:") |
|
train_real, train_fake = count_images(temp_train_dir) |
|
val_real, val_fake = count_images(temp_val_dir) |
|
print(f"Training - Real: {train_real}, Fake: {train_fake}") |
|
print(f"Validation - Real: {val_real}, Fake: {val_fake}") |
|
|
|
|
|
datagen = ImageDataGenerator(rescale=1./255) |
|
|
|
train_gen = datagen.flow_from_directory( |
|
temp_train_dir, |
|
target_size=(img_height, img_width), |
|
batch_size=32, |
|
class_mode='binary', |
|
classes=['Real', 'Fake'] |
|
) |
|
|
|
val_gen = datagen.flow_from_directory( |
|
temp_val_dir, |
|
target_size=(img_height, img_width), |
|
batch_size=32, |
|
class_mode='binary', |
|
classes=['Real', 'Fake'] |
|
) |
|
|
|
|
|
train_info = get_processed_images_info(train_gen) |
|
val_info = get_processed_images_info(val_gen) |
|
|
|
print("\nTraining Data Processing Info:") |
|
print(f"Total training samples: {train_info['total_samples']}") |
|
print(f"Batch size: {train_info['batch_size']}") |
|
print(f"Steps per epoch: {train_info['steps_per_epoch']}") |
|
print("\nClass distribution in training:") |
|
for class_name, count in train_info['class_distribution'].items(): |
|
print(f"{class_name}: {count} images") |
|
|
|
print("\nValidation Data Processing Info:") |
|
print(f"Total validation samples: {val_info['total_samples']}") |
|
print(f"Batch size: {val_info['batch_size']}") |
|
print(f"Steps per epoch: {val_info['steps_per_epoch']}") |
|
print("\nClass distribution in validation:") |
|
for class_name, count in val_info['class_distribution'].items(): |
|
print(f"{class_name}: {count} images") |
|
|
|
|
|
model = Sequential([ |
|
Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)), |
|
MaxPooling2D((2, 2)), |
|
Conv2D(64, (3, 3), activation='relu'), |
|
MaxPooling2D((2, 2)), |
|
Conv2D(128, (3, 3), activation='relu'), |
|
MaxPooling2D((2, 2)), |
|
Flatten(), |
|
Dense(128, activation='relu'), |
|
Dropout(0.5), |
|
Dense(1, activation='sigmoid') |
|
]) |
|
|
|
|
|
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) |
|
|
|
|
|
history = model.fit( |
|
train_gen, |
|
steps_per_epoch=train_info['steps_per_epoch'], |
|
validation_data=val_gen, |
|
validation_steps=val_info['steps_per_epoch'], |
|
epochs=10 |
|
) |
|
|
|
|
|
total_training_images_processed = train_info['total_samples'] * 10 |
|
total_validation_images_processed = val_info['total_samples'] * 10 |
|
|
|
print(f"\nTotal images processed during training: {total_training_images_processed}") |
|
print(f"Total images processed during validation: {total_validation_images_processed}") |
|
print(f"Combined total processed: {total_training_images_processed + total_validation_images_processed}") |
|
|
|
|
|
model.save("deepfake_detector.h5") |
|
|
|
|
|
def predict_image(img_path): |
|
"""Predict whether a single image is real or fake.""" |
|
img = image.load_img(img_path, target_size=(img_height, img_width)) |
|
img_array = image.img_to_array(img) / 255.0 |
|
img_array = np.expand_dims(img_array, axis=0) |
|
prediction = model.predict(img_array) |
|
return "Fake" if prediction[0][0] > 0.5 else "Real" |
|
|
|
def predict_video(video_path): |
|
"""Predict whether a video is real or fake by analyzing frames.""" |
|
cap = cv2.VideoCapture(video_path) |
|
fake_count, real_count = 0, 0 |
|
|
|
while cap.isOpened(): |
|
ret, frame = cap.read() |
|
if not ret: |
|
break |
|
|
|
|
|
frame_resized = cv2.resize(frame, (img_height, img_width)) |
|
frame_array = np.array(frame_resized) / 255.0 |
|
frame_array = np.expand_dims(frame_array, axis=0) |
|
|
|
|
|
prediction = model.predict(frame_array) |
|
if prediction[0][0] > 0.5: |
|
fake_count += 1 |
|
else: |
|
real_count += 1 |
|
|
|
cap.release() |
|
return "Fake" if fake_count > real_count else "Real" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
test_image_path = "C:/Users/ramya/OneDrive - iiit-b/Desktop/test1.jpg" |
|
if os.path.exists(test_image_path): |
|
image_result = predict_image(test_image_path) |
|
print(f"\nTest image prediction: {image_result}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|