Spaces:

varma123
/

deepfake_video_2

Running

App Files Files Community

varma123 commited on Feb 22, 2024

Commit

64a53bd

verified ·

1 Parent(s): 97b144d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -29

app.py CHANGED Viewed

@@ -2,19 +2,18 @@ import gradio as gr
 import torch
 import torch.nn.functional as F
 from facenet_pytorch import MTCNN, InceptionResnetV1
-import numpy as np
 import cv2
 from pytorch_grad_cam import GradCAM
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 from pytorch_grad_cam.utils.image import show_cam_on_image
-from torchvision import transforms
 from PIL import Image
 import warnings
 warnings.filterwarnings("ignore")
 # Download and Load Model
-DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
 mtcnn = MTCNN(
     select_largest=False,
@@ -25,49 +24,58 @@ mtcnn = MTCNN(
 model = InceptionResnetV1(
     pretrained="vggface2",
     classify=True,
-    num_classes=1,
     device=DEVICE
-)
 checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu'))
 model.load_state_dict(checkpoint['model_state_dict'])
 model.to(DEVICE)
-model.eval()
-# Model Inference
 def predict_frame(frame):
-    """Predict whether the input frame contains real or fake faces"""
     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     frame_pil = Image.fromarray(frame)
     face = mtcnn(frame_pil)
     if face is None:
-        raise Exception('No face detected')
-    face = face.unsqueeze(0)  # add the batch dimension
-    face = F.interpolate(face, size=(256, 256), mode='bilinear', align_corners=False)
     face = face.to(DEVICE, dtype=torch.float32) / 255.0
     target_layers = [model.block8.branch1[-1]]
     use_cuda = True if torch.cuda.is_available() else False
     cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
     targets = [ClassifierOutputTarget(0)]
     grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
     grayscale_cam = grayscale_cam[0, :]
-    visualization = show_cam_on_image(frame, grayscale_cam, use_rgb=True)
-    face_with_mask = cv2.addWeighted(frame, 1, visualization, 0.5, 0)
-    with torch.no_grad():
-        output = torch.sigmoid(model(face).squeeze(0))
-        prediction = "real" if output.item() < 0.5 else "fake"
     return prediction, face_with_mask
-# Function to process video
 def predict_video(input_video):
     cap = cv2.VideoCapture(input_video)
     frames = []
     confidences = []
@@ -82,18 +90,12 @@ def predict_video(input_video):
         confidences.append(prediction)
     cap.release()
-    list=[]
-    list.append(set(confidences))
-    if( 'fake' in list):
-        final_prediction='fake'
-    else:
-        final_prediction='real'
     # Determine the final prediction based on the maximum occurrence of predictions
-    # final_prediction = max(set(confidences), key=confidences.count)
     return final_prediction, frames
 # Gradio Interface
 interface = gr.Interface(
     fn=predict_video,
@@ -102,8 +104,10 @@ interface = gr.Interface(
     ],
     outputs=[
         gr.Label(label="Class"),
-        gr.Video(label="Face with Explainability")
     ],
 )
 interface.launch()

 import torch
 import torch.nn.functional as F
 from facenet_pytorch import MTCNN, InceptionResnetV1
 import cv2
 from pytorch_grad_cam import GradCAM
 from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 from pytorch_grad_cam.utils.image import show_cam_on_image
 from PIL import Image
+import numpy as np
 import warnings
 warnings.filterwarnings("ignore")
 # Download and Load Model
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 mtcnn = MTCNN(
     select_largest=False,
 model = InceptionResnetV1(
     pretrained="vggface2",
     classify=True,
+    num_classes=2,  # Change to 2 classes (real or fake)
     device=DEVICE
+).eval()
 checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu'))
 model.load_state_dict(checkpoint['model_state_dict'])
 model.to(DEVICE)
+# Model Inference
 def predict_frame(frame):
+    """Predict whether the input frame contains a real or fake face"""
     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     frame_pil = Image.fromarray(frame)
     face = mtcnn(frame_pil)
     if face is None:
+        return None, None  # No face detected
+    # Preprocess the face
+    face = F.interpolate(face.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False)
     face = face.to(DEVICE, dtype=torch.float32) / 255.0
+    # Predict
+    with torch.no_grad():
+        output = torch.sigmoid(model(face).squeeze(0))
+        prediction = "real" if output.item() < 0.5 else "fake"
+        # Confidence scores
+        real_prediction = 1 - output.item()
+        fake_prediction = output.item()
+        confidences = {
+            'real': real_prediction,
+            'fake': fake_prediction
+        }
+    # Visualize
     target_layers = [model.block8.branch1[-1]]
     use_cuda = True if torch.cuda.is_available() else False
     cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
     targets = [ClassifierOutputTarget(0)]
     grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
     grayscale_cam = grayscale_cam[0, :]
+    face_np = face.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    visualization = show_cam_on_image(face_np, grayscale_cam, use_rgb=True)
+    face_with_mask = cv2.addWeighted((face_np * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0)
     return prediction, face_with_mask
 def predict_video(input_video):
     cap = cv2.VideoCapture(input_video)
     frames = []
     confidences = []
         confidences.append(prediction)
     cap.release()
     # Determine the final prediction based on the maximum occurrence of predictions
+    final_prediction = 'fake' if confidences.count('fake') > confidences.count('real') else 'real'
     return final_prediction, frames
 # Gradio Interface
 interface = gr.Interface(
     fn=predict_video,
     ],
     outputs=[
         gr.Label(label="Class"),
+        gr.Image(label="Face with Explainability", type="numpy")
     ],
+    title="Video Face Authentication",
+    description="Detect whether the faces in the video are real or fake."
 )
 interface.launch()