File size: 4,664 Bytes
caa56d6
 
 
 
 
 
 
 
 
 
44efd89
caa56d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d336d1e
 
 
 
 
 
 
caa56d6
 
 
 
 
 
 
 
d336d1e
caa56d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44efd89
caa56d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44efd89
caa56d6
44efd89
 
 
 
caa56d6
6552965
44efd89
 
caa56d6
6552965
caa56d6
 
6552965
caa56d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import cv2
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
from training.detectors import DETECTOR
import yaml
import gradio as gr
from huggingface_hub import hf_hub_download

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# available models in the repository
AVAILABLE_MODELS = [
    "xception",
    "ucf",
]

def load_model(model_name, config_path, weights_path):
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    config['model_name'] = model_name
    
    # download the pretrained model from Hugging Face
    if 'pretrained' in config:
        pretrained_filename = f"{model_name}_best.pth"
        repo_id = "ArissBandoss/deepfake-video-classifier"
        pretrained_path = hf_hub_download(repo_id=repo_id, filename=pretrained_filename)
        config['pretrained'] = pretrained_path
    
    model_class = DETECTOR[model_name]
    model = model_class(config).to(device)
    
    checkpoint = torch.load(weights_path, map_location=device)
    model.load_state_dict(checkpoint, strict=True)
    model.eval()
    return model


# preprocess a single video
def preprocess_video(video_path, output_dir, frame_num=32):
    os.makedirs(output_dir, exist_ok=True)
    frames_dir = os.path.join(output_dir, "frames")
    os.makedirs(frames_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, frame_num, dtype=int)
    
    # extract frames
    frames = []
    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame_path = os.path.join(frames_dir, f"frame_{idx:04d}.png")
            cv2.imwrite(frame_path, frame)
            frames.append(frame_path)
    
    cap.release()
    return frames

# inference on a single video
def infer_video(video_path, model, device):
    output_dir = "temp_video_frames"
    frames = preprocess_video(video_path, output_dir)
    
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    
    probs = []
    for frame_path in frames:
        frame = Image.open(frame_path).convert("RGB")
        frame = transform(frame).unsqueeze(0).to(device)
        
        data_dict = {
            "image": frame,
            "label": torch.tensor([0]).to(device),  # Dummy label
            "label_spe": torch.tensor([0]).to(device),  # Dummy specific label
        }
        
        with torch.no_grad():
            pred_dict = model(data_dict, inference=True)
            
            logits = pred_dict["cls"]  # Shape: [batch_size, num_classes]
            prob = torch.softmax(logits, dim=1)[:, 1].item()  # Probability of being "fake"
            probs.append(prob)
    
    # aggregate predictions (e.g., average probability)
    avg_prob = np.mean(probs)
    prediction = "Fake" if avg_prob > 0.5 else "Real"
    return prediction, avg_prob

# Gradio inference function
def gradio_inference(video, model_name):
    # Download config and weights from Hugging Face Model Registry
    repo_id = "ArissBandoss/deepfake-video-classifier"
    config_filename = f"{model_name}.yaml"
    weights_filename = f"{model_name}_best.pth"
    
    # download files
    config_path = hf_hub_download(repo_id=repo_id, filename=config_filename)
    weights_path = hf_hub_download(repo_id=repo_id, filename=weights_filename)
    
    # load the model
    model = load_model(model_name, config_path, weights_path)
    
    # inference
    prediction, confidence = infer_video(video, model, device)
    return f"Model: {model_name}\nPrediction: {prediction} (Confidence: {confidence:.4f})"

# Gradio App
def create_gradio_app():
    with gr.Blocks() as demo:
        gr.Markdown("# Deepfake Detection Demo")
        gr.Markdown("Upload a video and select a model to detect if it's real or fake.")
        
        with gr.Row():
            video_input = gr.Video(label="Upload Video")
            model_dropdown = gr.Dropdown(choices=AVAILABLE_MODELS, label="Select Model", value="xception")
        
        output_text = gr.Textbox(label="Prediction Result")
        
        submit_button = gr.Button("Run Inference")
        submit_button.click(
            fn=gradio_inference,
            inputs=[video_input, model_dropdown],
            outputs=output_text,
        )
    
    return demo


if __name__ == "__main__":
    demo = create_gradio_app()
    demo.launch(share=True)