import os
import shutil
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import random

# Paths to dataset directories
data_dir = "C:/Users/ramya/OneDrive - iiit-b/Desktop/data_deepfake/Dataset/"
train_dir = os.path.join(data_dir, "Train")
val_dir = os.path.join(data_dir, "Validation")
temp_train_dir = os.path.join(data_dir, "Temp_Train")
temp_val_dir = os.path.join(data_dir, "Temp_Validation")

# Image dimensions
img_height, img_width = 128, 128

# Limit the number of images for training and validation
max_images_per_class = 12000 # Adjust as needed

def count_images(directory):
    """Count the number of real and fake images in a directory."""
    real_count = len(os.listdir(os.path.join(directory, 'Real')))
    fake_count = len(os.listdir(os.path.join(directory, 'Fake')))
    return real_count, fake_count

def prepare_limited_dataset(source_dir, target_dir, max_images):
    """Create a temporary dataset with a limited number of images per class."""
    if os.path.exists(target_dir):
        shutil.rmtree(target_dir)
    os.makedirs(os.path.join(target_dir, 'Real'), exist_ok=True)
    os.makedirs(os.path.join(target_dir, 'Fake'), exist_ok=True)

    for class_dir in ['Real', 'Fake']:
        class_path = os.path.join(source_dir, class_dir)
        target_class_path = os.path.join(target_dir, class_dir)
        all_images = os.listdir(class_path)
        random.shuffle(all_images)
        selected_images = all_images[:max_images]

        for image_name in selected_images:
            shutil.copy(os.path.join(class_path, image_name), target_class_path)

def get_processed_images_info(generator):
    """Calculate information about processed images from a generator."""
    n_samples = generator.n
    n_classes = len(generator.class_indices)
    batch_size = generator.batch_size
    steps_per_epoch = int(np.ceil(n_samples / batch_size))

    class_distribution = {}
    for class_name, class_index in generator.class_indices.items():
        class_count = len(generator.classes[generator.classes == class_index])
        class_distribution[class_name] = class_count

    return {
        'total_samples': n_samples,
        'batch_size': batch_size,
        'steps_per_epoch': steps_per_epoch,
        'class_distribution': class_distribution
    }

# Print initial image counts
print("\nInitial image counts:")
train_real, train_fake = count_images(train_dir)
val_real, val_fake = count_images(val_dir)
print(f"Training - Real: {train_real}, Fake: {train_fake}")
print(f"Validation - Real: {val_real}, Fake: {val_fake}")

# Prepare temporary directories with limited images
prepare_limited_dataset(train_dir, temp_train_dir, max_images_per_class)
prepare_limited_dataset(val_dir, temp_val_dir, max_images_per_class)

# Print filtered image counts
print("\nAfter filtering:")
train_real, train_fake = count_images(temp_train_dir)
val_real, val_fake = count_images(temp_val_dir)
print(f"Training - Real: {train_real}, Fake: {train_fake}")
print(f"Validation - Real: {val_real}, Fake: {val_fake}")

# Data generators for training and validation
datagen = ImageDataGenerator(rescale=1./255)

train_gen = datagen.flow_from_directory(
    temp_train_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode='binary',
    classes=['Real', 'Fake']
)

val_gen = datagen.flow_from_directory(
    temp_val_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode='binary',
    classes=['Real', 'Fake']
)

# Get training and validation information
train_info = get_processed_images_info(train_gen)
val_info = get_processed_images_info(val_gen)

print("\nTraining Data Processing Info:")
print(f"Total training samples: {train_info['total_samples']}")
print(f"Batch size: {train_info['batch_size']}")
print(f"Steps per epoch: {train_info['steps_per_epoch']}")
print("\nClass distribution in training:")
for class_name, count in train_info['class_distribution'].items():
    print(f"{class_name}: {count} images")

print("\nValidation Data Processing Info:")
print(f"Total validation samples: {val_info['total_samples']}")
print(f"Batch size: {val_info['batch_size']}")
print(f"Steps per epoch: {val_info['steps_per_epoch']}")
print("\nClass distribution in validation:")
for class_name, count in val_info['class_distribution'].items():
    print(f"{class_name}: {count} images")

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_gen,
    steps_per_epoch=train_info['steps_per_epoch'],
    validation_data=val_gen,
    validation_steps=val_info['steps_per_epoch'],
    epochs=10
)

# Calculate total images processed
total_training_images_processed = train_info['total_samples'] * 10  # 10 epochs
total_validation_images_processed = val_info['total_samples'] * 10  # 10 epochs

print(f"\nTotal images processed during training: {total_training_images_processed}")
print(f"Total images processed during validation: {total_validation_images_processed}")
print(f"Combined total processed: {total_training_images_processed + total_validation_images_processed}")

# Save the model
model.save("deepfake_detector.h5")

# Functions for prediction
def predict_image(img_path):
    """Predict whether a single image is real or fake."""
    img = image.load_img(img_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    prediction = model.predict(img_array)
    return "Fake" if prediction[0][0] > 0.5 else "Real"

def predict_video(video_path):
    """Predict whether a video is real or fake by analyzing frames."""
    cap = cv2.VideoCapture(video_path)
    fake_count, real_count = 0, 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Preprocess the frame
        frame_resized = cv2.resize(frame, (img_height, img_width))
        frame_array = np.array(frame_resized) / 255.0
        frame_array = np.expand_dims(frame_array, axis=0)

        # Predict
        prediction = model.predict(frame_array)
        if prediction[0][0] > 0.5:
            fake_count += 1
        else:
            real_count += 1

    cap.release()
    return "Fake" if fake_count > real_count else "Real"

# Example usage
if __name__ == "__main__":
    # Test an image
    test_image_path = "C:/Users/ramya/OneDrive - iiit-b/Desktop/test1.jpg"
    if os.path.exists(test_image_path):
        image_result = predict_image(test_image_path)
        print(f"\nTest image prediction: {image_result}")

    # Test a video (uncomment and modify path as needed)
    # test_video_path = "example_video.mp4"
    # if os.path.exists(test_video_path):
    #     video_result = predict_video(test_video_path)
    #     print(f"Test video prediction: {video_result}")