Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
9718960
1
Parent(s):
c907f5b
Sync with https://github.com/OpenAAIGC/Deepfake-Detection-Blueprint
Browse files
app.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import warnings
|
3 |
+
import cv2
|
4 |
+
import dlib
|
5 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image
|
6 |
+
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
from retinaface.pre_trained_models import get_model
|
10 |
+
from blueprint.model import create_model, create_cam
|
11 |
+
from blueprint.preprocess import crop_face, extract_face, extract_frames
|
12 |
+
from pathlib import Path
|
13 |
+
import tempfile
|
14 |
+
import os
|
15 |
+
import io
|
16 |
+
|
17 |
+
warnings.filterwarnings('ignore')
|
18 |
+
ROOT_DIR = Path(__file__).parent.parent
|
19 |
+
|
20 |
+
def aca(img):
|
21 |
+
if len(img.shape) == 3 and img.shape[2] == 3:
|
22 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
23 |
+
img_float = img.astype(np.float32) / 255.0
|
24 |
+
channels = np.moveaxis(img_float, -1, 0)
|
25 |
+
sorted_idx = np.argsort(channels, axis=0)
|
26 |
+
sorted_values = np.take_along_axis(channels, sorted_idx, axis=0)
|
27 |
+
L = sorted_values[0]
|
28 |
+
M = sorted_values[1]
|
29 |
+
U = sorted_values[2]
|
30 |
+
eps = 1e-10
|
31 |
+
L_U = L / (U + eps)
|
32 |
+
L_M = L / (M + eps)
|
33 |
+
M_U = M / (U + eps)
|
34 |
+
kernel = np.array([[1, 0, 1], [0, -4, 0], [1, 0, 1]], dtype=np.float32)
|
35 |
+
L_U_filtered = cv2.filter2D(np.log(L_U + eps), -1, kernel)
|
36 |
+
L_M_filtered = cv2.filter2D(np.log(L_M + eps), -1, kernel)
|
37 |
+
M_U_filtered = cv2.filter2D(np.log(M_U + eps), -1, kernel)
|
38 |
+
residuals = np.abs(L_U_filtered) + np.abs(L_M_filtered) + np.abs(M_U_filtered)
|
39 |
+
p1, p99 = np.percentile(residuals[residuals > 0], (1, 99))
|
40 |
+
normalized = np.clip((residuals - p1) / (p99 - p1), 0, 1)
|
41 |
+
significant = normalized > 0.1
|
42 |
+
result = np.zeros((*residuals.shape, 3), dtype=np.float32)
|
43 |
+
result[significant, 0] = 255
|
44 |
+
intensity = np.expand_dims(normalized, -1)
|
45 |
+
result = result * intensity
|
46 |
+
return result.astype(np.uint8)
|
47 |
+
|
48 |
+
def perform_ela(img, quality=95, scale=15):
|
49 |
+
buffer = io.BytesIO()
|
50 |
+
if len(img.shape) == 3 and img.shape[2] == 3:
|
51 |
+
working_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
52 |
+
else:
|
53 |
+
working_img = img.copy()
|
54 |
+
img_bytes = cv2.imencode('.jpg', working_img, [cv2.IMWRITE_JPEG_QUALITY, quality])[1].tobytes()
|
55 |
+
buffer.write(img_bytes)
|
56 |
+
buffer.seek(0)
|
57 |
+
compressed_img = cv2.imdecode(np.frombuffer(buffer.read(), np.uint8), cv2.IMREAD_COLOR)
|
58 |
+
difference = np.abs(working_img.astype(np.float32) - compressed_img.astype(np.float32)) * scale
|
59 |
+
difference = np.clip(difference, 0, 255).astype(np.uint8)
|
60 |
+
difference_rgb = cv2.cvtColor(difference, cv2.COLOR_BGR2RGB)
|
61 |
+
luminance = np.sum(difference_rgb * np.array([0.299, 0.587, 0.114]), axis=2)
|
62 |
+
enhanced = np.zeros_like(difference_rgb)
|
63 |
+
for i in range(3):
|
64 |
+
enhanced[:,:,i] = np.minimum(difference_rgb[:,:,i] * 2, 255)
|
65 |
+
mask = luminance < np.mean(luminance) * 0.5
|
66 |
+
enhanced[mask] = [0, 0, 0]
|
67 |
+
gamma = 1.4
|
68 |
+
enhanced = (((enhanced / 255.0) ** (1/gamma)) * 255).astype(np.uint8)
|
69 |
+
return difference, enhanced
|
70 |
+
|
71 |
+
@st.cache_resource
|
72 |
+
def load_models():
|
73 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
74 |
+
sbcl = create_model(str(ROOT_DIR / "Weights/weights.tar"), device)
|
75 |
+
face_detector = get_model("resnet50_2020-07-20", max_size=1024, device=device)
|
76 |
+
face_detector.eval()
|
77 |
+
cam_sbcl = create_cam(sbcl)
|
78 |
+
dlib_face_detector = dlib.get_frontal_face_detector()
|
79 |
+
dlib_face_predictor = dlib.shape_predictor(str(ROOT_DIR / "Weights/shape_predictor_81_face_landmarks.dat"))
|
80 |
+
return device, sbcl, face_detector, cam_sbcl, dlib_face_detector, dlib_face_predictor
|
81 |
+
|
82 |
+
def predict_image(inp, models):
|
83 |
+
device, sbcl, face_detector, cam_sbcl = models[:4]
|
84 |
+
targets = [ClassifierOutputTarget(1)]
|
85 |
+
if inp is None:
|
86 |
+
return None, None
|
87 |
+
face_list = extract_face(inp, face_detector)
|
88 |
+
if len(face_list) == 0:
|
89 |
+
return None, None
|
90 |
+
try:
|
91 |
+
img = torch.tensor(face_list).to(device)
|
92 |
+
if device.type == 'cuda':
|
93 |
+
img = img.half()
|
94 |
+
img = img / 255
|
95 |
+
with torch.no_grad():
|
96 |
+
pred = sbcl(img).float().softmax(1)[:, 1].cpu().numpy().tolist()[0]
|
97 |
+
confidences = {'Real': 1 - pred, 'Fake': pred}
|
98 |
+
img.requires_grad = True
|
99 |
+
grayscale_cam = cam_sbcl(input_tensor=img, targets=targets, aug_smooth=True)
|
100 |
+
grayscale_cam = grayscale_cam[0, :]
|
101 |
+
cam_image = show_cam_on_image(face_list[0].transpose(1, 2, 0) / 255, grayscale_cam, use_rgb=True)
|
102 |
+
return confidences, cam_image
|
103 |
+
except Exception as e:
|
104 |
+
st.error(f"Error during prediction: {str(e)}")
|
105 |
+
return None, None
|
106 |
+
|
107 |
+
def predict_video(inp, models):
|
108 |
+
device, sbcl, face_detector, cam_sbcl = models[:4]
|
109 |
+
targets = [ClassifierOutputTarget(1)]
|
110 |
+
if inp is None:
|
111 |
+
return None, None
|
112 |
+
try:
|
113 |
+
face_list, idx_list = extract_frames(inp, 10, face_detector)
|
114 |
+
if not face_list:
|
115 |
+
return None, None
|
116 |
+
img = torch.tensor(face_list).to(device)
|
117 |
+
if device.type == 'cuda':
|
118 |
+
img = img.half()
|
119 |
+
img = img / 255
|
120 |
+
with torch.no_grad():
|
121 |
+
pred = sbcl(img).float().softmax(1)[:, 1]
|
122 |
+
pred_list = []
|
123 |
+
idx_img = -1
|
124 |
+
for i in range(len(pred)):
|
125 |
+
if idx_list[i] != idx_img:
|
126 |
+
pred_list.append([])
|
127 |
+
idx_img = idx_list[i]
|
128 |
+
pred_list[-1].append(pred[i].item())
|
129 |
+
pred_res = np.array([max(p) for p in pred_list])
|
130 |
+
pred = float(pred_res.mean())
|
131 |
+
most_fake = np.argmax(pred_res)
|
132 |
+
img_for_cam = img[most_fake].unsqueeze(0)
|
133 |
+
img_for_cam.requires_grad = True
|
134 |
+
grayscale_cam = cam_sbcl(input_tensor=img_for_cam, targets=targets, aug_smooth=True)
|
135 |
+
grayscale_cam = grayscale_cam[0, :]
|
136 |
+
cam_image = show_cam_on_image(face_list[most_fake].transpose(1, 2, 0) / 255, grayscale_cam, use_rgb=True)
|
137 |
+
return {'Real': 1 - pred, 'Fake': pred}, cam_image
|
138 |
+
except Exception as e:
|
139 |
+
st.error(f"Error during video prediction: {str(e)}")
|
140 |
+
return None, None
|
141 |
+
|
142 |
+
def main():
|
143 |
+
with st.sidebar:
|
144 |
+
st.title("Deepfake Detection")
|
145 |
+
tab = st.radio("Select Input Type:", ["Image", "Video"])
|
146 |
+
if tab == "Image":
|
147 |
+
st.subheader("Analysis Visualization Options")
|
148 |
+
show_gradcam = st.checkbox("GradCAM", value=True)
|
149 |
+
show_aca = st.checkbox("ACA", value=False)
|
150 |
+
show_ela = st.checkbox("ELA", value=False)
|
151 |
+
if show_ela:
|
152 |
+
quality = st.slider("JPEG Quality", 0, 100, 95)
|
153 |
+
scale = st.slider("ELA Scale", 1, 50, 15)
|
154 |
+
|
155 |
+
models = load_models()
|
156 |
+
|
157 |
+
if tab == "Image":
|
158 |
+
st.header("Image Deepfake Detection")
|
159 |
+
num_cols = 1 + sum([show_gradcam, show_aca, show_ela])
|
160 |
+
cols = st.columns(num_cols)
|
161 |
+
col_idx = 0
|
162 |
+
|
163 |
+
with cols[col_idx]:
|
164 |
+
st.subheader("Input")
|
165 |
+
image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
166 |
+
if image is not None:
|
167 |
+
image = cv2.imdecode(np.frombuffer(image.read(), np.uint8), cv2.IMREAD_COLOR)
|
168 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
169 |
+
st.image(image, caption="Input", use_container_width=True)
|
170 |
+
|
171 |
+
if st.button("Analyze"):
|
172 |
+
with st.spinner("Processing..."):
|
173 |
+
confidences, cam_image = predict_image(image, models)
|
174 |
+
if show_gradcam:
|
175 |
+
col_idx += 1
|
176 |
+
with cols[col_idx]:
|
177 |
+
st.subheader("GradCAM")
|
178 |
+
if confidences and cam_image is not None:
|
179 |
+
st.image(cam_image, caption="Model Focus", use_container_width=True)
|
180 |
+
for label, conf in confidences.items():
|
181 |
+
st.progress(conf, text=f"{label}: {conf*100:.1f}%")
|
182 |
+
else:
|
183 |
+
st.warning("No face detected!")
|
184 |
+
if show_aca:
|
185 |
+
col_idx += 1
|
186 |
+
with cols[col_idx]:
|
187 |
+
st.subheader("ACA")
|
188 |
+
color_map = aca(image)
|
189 |
+
st.image(color_map, use_container_width=True)
|
190 |
+
if show_ela:
|
191 |
+
col_idx += 1
|
192 |
+
with cols[col_idx]:
|
193 |
+
st.subheader("ELA")
|
194 |
+
_, ela_map = perform_ela(image, quality=quality, scale=scale)
|
195 |
+
st.image(ela_map, use_container_width=True)
|
196 |
+
else:
|
197 |
+
st.header("Video Deepfake Detection")
|
198 |
+
col1, col2 = st.columns(2)
|
199 |
+
with col1:
|
200 |
+
st.subheader("Input")
|
201 |
+
video = st.file_uploader("Choose a video...", type=["mp4", "avi", "mov"])
|
202 |
+
if video is not None:
|
203 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', dir='/home/appuser') as tmp_file:
|
204 |
+
tmp_file.write(video.read())
|
205 |
+
video_path = tmp_file.name
|
206 |
+
st.video(video)
|
207 |
+
if st.button("Analyze"):
|
208 |
+
with st.spinner("Processing..."):
|
209 |
+
try:
|
210 |
+
confidences, cam_image = predict_video(video_path, models)
|
211 |
+
with col2:
|
212 |
+
st.subheader("Results")
|
213 |
+
if confidences and cam_image is not None:
|
214 |
+
st.image(cam_image, caption="GradCAM", use_container_width=True)
|
215 |
+
for label, conf in confidences.items():
|
216 |
+
st.progress(conf, text=f"{label}: {conf*100:.1f}%")
|
217 |
+
else:
|
218 |
+
st.warning("No faces detected!")
|
219 |
+
finally:
|
220 |
+
if os.path.exists(video_path):
|
221 |
+
os.unlink(video_path)
|
222 |
+
|
223 |
+
if __name__ == "__main__":
|
224 |
+
main()
|