SOTER

Runtime error

App Files Files Community

junkmind

thecho7 commited on Aug 5, 2023

Commit

f69dbe4

0 Parent(s):

Duplicate from thecho7/deepfake

Browse files

Co-authored-by: Suho Cho <[email protected]>

Files changed (48) hide show

.gitattributes +36 -0
Dockerfile +54 -0
LICENSE +21 -0
README.md +14 -0
__pycache__/kernel_utils.cpython-310.pyc +0 -0
app.py +86 -0
configs/b5.json +28 -0
configs/b7.json +29 -0
download_weights.sh +9 -0
examples/liuujwwgpr.mp4 +3 -0
examples/nlurbvsozt.mp4 +3 -0
examples/rfjuhbnlro.mp4 +3 -0
kernel_utils.py +366 -0
libs/shape_predictor_68_face_landmarks.dat +3 -0
requirements.txt +131 -0
training/__init__.py +0 -0
training/__pycache__/__init__.cpython-310.pyc +0 -0
training/__pycache__/__init__.cpython-39.pyc +0 -0
training/__pycache__/losses.cpython-310.pyc +0 -0
training/__pycache__/losses.cpython-39.pyc +0 -0
training/datasets/__init__.py +0 -0
training/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
training/datasets/__pycache__/classifier_dataset.cpython-310.pyc +0 -0
training/datasets/__pycache__/validation_set.cpython-310.pyc +0 -0
training/datasets/classifier_dataset.py +384 -0
training/datasets/validation_set.py +60 -0
training/losses.py +28 -0
training/pipelines/__init__.py +0 -0
training/pipelines/train_classifier.py +364 -0
training/tools/__init__.py +0 -0
training/tools/__pycache__/__init__.cpython-310.pyc +0 -0
training/tools/__pycache__/config.cpython-310.pyc +0 -0
training/tools/__pycache__/schedulers.cpython-310.pyc +0 -0
training/tools/__pycache__/utils.cpython-310.pyc +0 -0
training/tools/config.py +43 -0
training/tools/schedulers.py +46 -0
training/tools/utils.py +121 -0
training/transforms/__init__.py +0 -0
training/transforms/__pycache__/__init__.cpython-310.pyc +0 -0
training/transforms/__pycache__/albu.cpython-310.pyc +0 -0
training/transforms/albu.py +100 -0
training/zoo/__init__.py +0 -0
training/zoo/__pycache__/__init__.cpython-310.pyc +0 -0
training/zoo/__pycache__/classifiers.cpython-310.pyc +0 -0
training/zoo/classifiers.py +172 -0
training/zoo/unet.py +151 -0
weights/.gitkeep +0 -0
weights/b7_ns_best.pth +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,54 @@

+ARG PYTORCH="1.10.0"
+ARG CUDA="11.3"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+# Setting noninteractive build, setting up tzdata and configuring timezones
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/Berlin
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 nano mc glances vim git \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+# Install cython
+RUN conda install cython -y && conda clean --all
+# Installing APEX
+RUN pip install -U pip
+RUN git clone https://github.com/NVIDIA/apex
+RUN sed -i 's/check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)/pass/g' apex/setup.py
+RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext"  ./apex
+RUN apt-get update -y
+RUN apt-get install build-essential cmake -y
+RUN apt-get install libopenblas-dev liblapack-dev -y
+RUN apt-get install libx11-dev libgtk-3-dev -y
+RUN pip install dlib
+RUN pip install facenet-pytorch
+RUN pip install albumentations==1.0.0 timm==0.4.12 pytorch_toolbelt tensorboardx
+RUN pip install cython jupyter  jupyterlab ipykernel matplotlib tqdm pandas
+# download pretraned Imagenet models
+RUN apt install wget
+RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth -P /root/.cache/torch/hub/checkpoints/
+RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth -P /root/.cache/torch/hub/checkpoints/
+# Setting the working directory
+WORKDIR /workspace
+# Copying the required codebase
+COPY . /workspace
+RUN chmod 777 preprocess_data.sh
+RUN chmod 777 train.sh
+RUN chmod 777 predict_submission.sh
+ENV PYTHONPATH=.
+CMD ["/bin/bash"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2020 Selim Seferbekov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Deepfake
+emoji: 🔥
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 3.29.0
+app_file: app.py
+pinned: false
+license: unlicense
+duplicated_from: thecho7/deepfake
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__pycache__/kernel_utils.cpython-310.pyc ADDED Viewed

Binary file (11.8 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import argparse
+import os
+import re
+import time
+import torch
+from kernel_utils import VideoReader, FaceExtractor, confident_strategy, predict_on_video
+from training.zoo.classifiers import DeepFakeClassifier
+import gradio as gr
+def model_fn(model_dir):
+	model_path = os.path.join(model_dir, 'b7_ns_best.pth')
+	model = DeepFakeClassifier(encoder="tf_efficientnet_b7_ns") # default: CPU
+	checkpoint = torch.load(model_path, map_location="cpu")
+	state_dict = checkpoint.get("state_dict", checkpoint)
+	model.load_state_dict({re.sub("^module.", "", k): v for k, v in state_dict.items()}, strict=True)
+	model.eval()
+	del checkpoint
+	#models.append(model.half())
+	return model
+def convert_result(pred, class_names=["Real", "Fake"]):
+	preds = [pred, 1 - pred]
+	assert len(class_names) == len(preds), "Class / Prediction should have the same length"
+	return {n: float(p) for n, p in zip(class_names, preds)}
+def predict_fn(video):
+	start = time.time()
+	prediction = predict_on_video(face_extractor=meta["face_extractor"],
+							   video_path=video,
+							   batch_size=meta["fps"],
+							   input_size=meta["input_size"],
+							   models=model,
+							   strategy=meta["strategy"],
+							   apply_compression=False,
+							   device='cpu')
+	elapsed_time = round(time.time() - start, 2)
+	prediction = convert_result(prediction)
+	return prediction, elapsed_time
+# Create title, description and article strings
+title = "Deepfake Detector (private)"
+description = "A video Deepfake Classifier (code: https://github.com/selimsef/dfdc_deepfake_challenge)"
+example_list = ["examples/" + str(p) for p in os.listdir("examples/")]
+# Environments
+model_dir = 'weights'
+frames_per_video = 32
+video_reader = VideoReader()
+video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video)
+face_extractor = FaceExtractor(video_read_fn)
+input_size = 380
+strategy = confident_strategy
+class_names = ["Real", "Fake"]
+meta = {"fps": 32,
+		"face_extractor": face_extractor,
+		"input_size": input_size,
+		"strategy": strategy}
+model = model_fn(model_dir)
+"""
+if __name__ == '__main__':
+	video_path = "examples/nlurbvsozt.mp4"
+	model = model_fn(model_dir)
+	a, b = predict_fn(video_path)
+	print(a, b)
+"""
+# Create the Gradio demo
+demo = gr.Interface(fn=predict_fn, # mapping function from input to output
+					inputs=gr.Video(),
+					outputs=[gr.Label(num_top_classes=2, label="Predictions"), # what are the outputs?
+							 gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
+					examples=example_list,
+					title=title,
+					description=description)
+# Launch the demo!
+demo.launch(debug=False,) # Hugging face space don't need shareable_links

configs/b5.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "network": "DeepFakeClassifier",
+    "encoder": "tf_efficientnet_b5_ns",
+    "batches_per_epoch": 2500,
+    "size": 380,
+    "fp16": true,
+    "optimizer": {
+        "batch_size": 20,
+        "type": "SGD",
+        "momentum": 0.9,
+        "weight_decay": 1e-4,
+        "learning_rate": 0.01,
+        "nesterov": true,
+        "schedule": {
+            "type": "poly",
+            "mode": "step",
+            "epochs": 30,
+            "params": {"max_iter":  75100}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    },
+    "losses": {
+        "BinaryCrossentropy": 1
+    }
+}

configs/b7.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "network": "DeepFakeClassifier",
+    "encoder": "tf_efficientnet_b7_ns",
+    "batches_per_epoch": 2500,
+    "size": 380,
+    "fp16": true,
+    "optimizer": {
+        "batch_size": 4,
+        "type": "SGD",
+        "momentum": 0.9,
+        "weight_decay": 1e-4,
+        "learning_rate": 1e-4,
+        "nesterov": true,
+        "schedule": {
+            "type": "poly",
+            "mode": "step",
+            "epochs": 20,
+            "params": {"max_iter":  100500}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    },
+    "losses": {
+        "BinaryCrossentropy": 1
+    }
+}

download_weights.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+tag=0.0.1
+wget -O weights/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36
+wget -O weights/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19
+wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29
+wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31
+wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37
+wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40
+wget -O weights/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23

examples/liuujwwgpr.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3aaefb51aa5720cdabcc68d93da5c6a22573d8da06bdaf5e009c7a370943e85
+size 12852441

examples/nlurbvsozt.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:300b7dea93132b512f35de76572e7fcde666c812b91aec6b189dafa6f100c9b5
+size 4486723

examples/rfjuhbnlro.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6d0bb841ebe6a8e20cf265b45356a1ea3fed9837025e8d549b2437290d79273
+size 16218775

kernel_utils.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import os
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from albumentations.augmentations.functional import image_compression
+from facenet_pytorch.models.mtcnn import MTCNN
+from concurrent.futures import ThreadPoolExecutor
+from torchvision.transforms import Normalize
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+normalize_transform = Normalize(mean, std)
+class VideoReader:
+    """Helper class for reading one or more frames from a video file."""
+    def __init__(self, verbose=True, insets=(0, 0)):
+        """Creates a new VideoReader.
+        Arguments:
+            verbose: whether to print warnings and error messages
+            insets: amount to inset the image by, as a percentage of
+                (width, height). This lets you "zoom in" to an image
+                to remove unimportant content around the borders.
+                Useful for face detection, which may not work if the
+                faces are too small.
+        """
+        self.verbose = verbose
+        self.insets = insets
+    def read_frames(self, path, num_frames, jitter=0, seed=None):
+        """Reads frames that are always evenly spaced throughout the video.
+        Arguments:
+            path: the video file
+            num_frames: how many frames to read, -1 means the entire video
+                (warning: this will take up a lot of memory!)
+            jitter: if not 0, adds small random offsets to the frame indices;
+                this is useful so we don't always land on even or odd frames
+            seed: random seed for jittering; if you set this to a fixed value,
+                you probably want to set it only on the first video
+        """
+        assert num_frames > 0
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_count <= 0: return None
+        frame_idxs = np.linspace(0, frame_count - 1, num_frames, endpoint=True, dtype=np.int32)
+        if jitter > 0:
+            np.random.seed(seed)
+            jitter_offsets = np.random.randint(-jitter, jitter, len(frame_idxs))
+            frame_idxs = np.clip(frame_idxs + jitter_offsets, 0, frame_count - 1)
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def read_random_frames(self, path, num_frames, seed=None):
+        """Picks the frame indices at random.
+        Arguments:
+            path: the video file
+            num_frames: how many frames to read, -1 means the entire video
+                (warning: this will take up a lot of memory!)
+        """
+        assert num_frames > 0
+        np.random.seed(seed)
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_count <= 0: return None
+        frame_idxs = sorted(np.random.choice(np.arange(0, frame_count), num_frames))
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def read_frames_at_indices(self, path, frame_idxs):
+        """Reads frames from a video and puts them into a NumPy array.
+        Arguments:
+            path: the video file
+            frame_idxs: a list of frame indices. Important: should be
+                sorted from low-to-high! If an index appears multiple
+                times, the frame is still read only once.
+        Returns:
+            - a NumPy array of shape (num_frames, height, width, 3)
+            - a list of the frame indices that were read
+        Reading stops if loading a frame fails, in which case the first
+        dimension returned may actually be less than num_frames.
+        Returns None if an exception is thrown for any reason, or if no
+        frames were read.
+        """
+        assert len(frame_idxs) > 0
+        capture = cv2.VideoCapture(path)
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def _read_frames_at_indices(self, path, capture, frame_idxs):
+        try:
+            frames = []
+            idxs_read = []
+            for frame_idx in range(frame_idxs[0], frame_idxs[-1] + 1):
+                # Get the next frame, but don't decode if we're not using it.
+                ret = capture.grab()
+                if not ret:
+                    if self.verbose:
+                        print("Error grabbing frame %d from movie %s" % (frame_idx, path))
+                    break
+                # Need to look at this frame?
+                current = len(idxs_read)
+                if frame_idx == frame_idxs[current]:
+                    ret, frame = capture.retrieve()
+                    if not ret or frame is None:
+                        if self.verbose:
+                            print("Error retrieving frame %d from movie %s" % (frame_idx, path))
+                        break
+                    frame = self._postprocess_frame(frame)
+                    frames.append(frame)
+                    idxs_read.append(frame_idx)
+            if len(frames) > 0:
+                return np.stack(frames), idxs_read
+            if self.verbose:
+                print("No frames read from movie %s" % path)
+            return None
+        except:
+            if self.verbose:
+                print("Exception while reading movie %s" % path)
+            return None
+    def read_middle_frame(self, path):
+        """Reads the frame from the middle of the video."""
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        result = self._read_frame_at_index(path, capture, frame_count // 2)
+        capture.release()
+        return result
+    def read_frame_at_index(self, path, frame_idx):
+        """Reads a single frame from a video.
+        If you just want to read a single frame from the video, this is more
+        efficient than scanning through the video to find the frame. However,
+        for reading multiple frames it's not efficient.
+        My guess is that a "streaming" approach is more efficient than a
+        "random access" approach because, unless you happen to grab a keyframe,
+        the decoder still needs to read all the previous frames in order to
+        reconstruct the one you're asking for.
+        Returns a NumPy array of shape (1, H, W, 3) and the index of the frame,
+        or None if reading failed.
+        """
+        capture = cv2.VideoCapture(path)
+        result = self._read_frame_at_index(path, capture, frame_idx)
+        capture.release()
+        return result
+    def _read_frame_at_index(self, path, capture, frame_idx):
+        capture.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+        ret, frame = capture.read()
+        if not ret or frame is None:
+            if self.verbose:
+                print("Error retrieving frame %d from movie %s" % (frame_idx, path))
+            return None
+        else:
+            frame = self._postprocess_frame(frame)
+            return np.expand_dims(frame, axis=0), [frame_idx]
+    def _postprocess_frame(self, frame):
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        if self.insets[0] > 0:
+            W = frame.shape[1]
+            p = int(W * self.insets[0])
+            frame = frame[:, p:-p, :]
+        if self.insets[1] > 0:
+            H = frame.shape[1]
+            q = int(H * self.insets[1])
+            frame = frame[q:-q, :, :]
+        return frame
+class FaceExtractor:
+    def __init__(self, video_read_fn):
+        self.video_read_fn = video_read_fn
+        self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device="cpu")
+    def process_videos(self, input_dir, filenames, video_idxs):
+        videos_read = []
+        frames_read = []
+        frames = []
+        results = []
+        for video_idx in video_idxs:
+            # Read the full-size frames from this video.
+            filename = filenames[video_idx]
+            video_path = os.path.join(input_dir, filename)
+            result = self.video_read_fn(video_path)
+            # Error? Then skip this video.
+            if result is None: continue
+            videos_read.append(video_idx)
+            # Keep track of the original frames (need them later).
+            my_frames, my_idxs = result
+            frames.append(my_frames)
+            frames_read.append(my_idxs)
+            for i, frame in enumerate(my_frames):
+                h, w = frame.shape[:2]
+                img = Image.fromarray(frame.astype(np.uint8))
+                img = img.resize(size=[s // 2 for s in img.size])
+                batch_boxes, probs = self.detector.detect(img, landmarks=False)
+                faces = []
+                scores = []
+                if batch_boxes is None:
+                    continue
+                for bbox, score in zip(batch_boxes, probs):
+                    if bbox is not None:
+                        xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
+                        w = xmax - xmin
+                        h = ymax - ymin
+                        p_h = h // 3
+                        p_w = w // 3
+                        crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w]
+                        faces.append(crop)
+                        scores.append(score)
+                frame_dict = {"video_idx": video_idx,
+                              "frame_idx": my_idxs[i],
+                              "frame_w": w,
+                              "frame_h": h,
+                              "faces": faces,
+                              "scores": scores}
+                results.append(frame_dict)
+        return results
+    def process_video(self, video_path):
+        """Convenience method for doing face extraction on a single video."""
+        input_dir = os.path.dirname(video_path)
+        filenames = [os.path.basename(video_path)]
+        return self.process_videos(input_dir, filenames, [0])
+def confident_strategy(pred, t=0.8):
+    pred = np.array(pred)
+    sz = len(pred)
+    fakes = np.count_nonzero(pred > t)
+    # 11 frames are detected as fakes with high probability
+    if fakes > sz // 2.5 and fakes > 11:
+        return np.mean(pred[pred > t])
+    elif np.count_nonzero(pred < 0.2) > 0.9 * sz:
+        return np.mean(pred[pred < 0.2])
+    else:
+        return np.mean(pred)
+strategy = confident_strategy
+def put_to_center(img, input_size):
+    img = img[:input_size, :input_size]
+    image = np.zeros((input_size, input_size, 3), dtype=np.uint8)
+    start_w = (input_size - img.shape[1]) // 2
+    start_h = (input_size - img.shape[0]) // 2
+    image[start_h:start_h + img.shape[0], start_w: start_w + img.shape[1], :] = img
+    return image
+def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
+    h, w = img.shape[:2]
+    if max(w, h) == size:
+        return img
+    if w > h:
+        scale = size / w
+        h = h * scale
+        w = size
+    else:
+        scale = size / h
+        w = w * scale
+        h = size
+    interpolation = interpolation_up if scale > 1 else interpolation_down
+    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
+    return resized
+def predict_on_video(face_extractor, video_path, batch_size, input_size, models, strategy=np.mean,
+                     apply_compression=False, device='cpu'):
+    batch_size *= 4
+    try:
+        faces = face_extractor.process_video(video_path)
+        if len(faces) > 0:
+            x = np.zeros((batch_size, input_size, input_size, 3), dtype=np.uint8)
+            n = 0
+            for frame_data in faces:
+                for face in frame_data["faces"]:
+                    resized_face = isotropically_resize_image(face, input_size)
+                    resized_face = put_to_center(resized_face, input_size)
+                    if apply_compression:
+                        resized_face = image_compression(resized_face, quality=90, image_type=".jpg")
+                    if n + 1 < batch_size:
+                        x[n] = resized_face
+                        n += 1
+                    else:
+                        pass
+            if n > 0:
+                if device == 'cpu':
+                    x = torch.tensor(x, device='cpu').float()
+                else:
+                    x = torch.tensor(x, device="cuda").float()
+                # Preprocess the images.
+                x = x.permute((0, 3, 1, 2))
+                for i in range(len(x)):
+                    x[i] = normalize_transform(x[i] / 255.)
+                # Make a prediction, then take the average.
+                with torch.no_grad():
+                    preds = []
+                    models_ = [models]
+                    for model in models_:
+                        if device == 'cpu':
+                            y_pred = model(x[:n])
+                        else:
+                            y_pred = model(x[:n].half())
+                        y_pred = torch.sigmoid(y_pred.squeeze())
+                        bpred = y_pred[:n].cpu().numpy()
+                        preds.append(strategy(bpred))
+                    return np.mean(preds)
+    except Exception as e:
+        print("Prediction error on video %s: %s" % (video_path, str(e)))
+    return 0.5
+def predict_on_video_set(face_extractor, videos, input_size, num_workers, test_dir, frames_per_video, models,
+                         strategy=np.mean,
+                         apply_compression=False):
+    def process_file(i):
+        filename = videos[i]
+        y_pred = predict_on_video(face_extractor=face_extractor, video_path=os.path.join(test_dir, filename),
+                                  input_size=input_size,
+                                  batch_size=frames_per_video,
+                                  models=models, strategy=strategy, apply_compression=apply_compression)
+        return y_pred
+    with ThreadPoolExecutor(max_workers=num_workers) as ex:
+        predictions = ex.map(process_file, range(len(videos)))
+    return list(predictions)

libs/shape_predictor_68_face_landmarks.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
+size 99693937

requirements.txt ADDED Viewed

	@@ -0,0 +1,131 @@

+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+albumentations==1.3.0
+altair==5.0.0
+anyio==3.6.2
+anykeystore==0.2
+apex
+appdirs==1.4.4
+async-timeout==4.0.2
+attrs==23.1.0
+certifi==2022.12.7
+charset-normalizer==2.1.1
+click==8.1.3
+cmake==3.25.0
+contourpy==1.0.7
+cryptacular==1.6.2
+cycler==0.11.0
+defusedxml==0.7.1
+dlib==19.24.1
+docker-pycreds==0.4.0
+facenet-pytorch==2.5.3
+fastapi==0.95.1
+ffmpy==0.3.0
+filelock==3.9.0
+fonttools==4.39.4
+frozenlist==1.3.3
+fsspec==2023.5.0
+gitdb==4.0.10
+GitPython==3.1.31
+gradio==3.30.0
+gradio_client==0.2.4
+greenlet==2.0.2
+h11==0.14.0
+httpcore==0.17.0
+httpx==0.24.0
+huggingface-hub==0.14.1
+hupper==1.12
+idna==3.4
+imageio==2.28.1
+Jinja2==3.1.2
+joblib==1.2.0
+jsonschema==4.17.3
+kiwisolver==1.4.4
+lazy_loader==0.2
+linkify-it-py==2.0.2
+lit==15.0.7
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mpmath==1.2.1
+multidict==6.0.4
+networkx==3.0
+numpy==1.24.3
+oauthlib==3.2.2
+opencv-python==4.7.0.72
+opencv-python-headless==4.7.0.72
+orjson==3.8.12
+packaging==23.0
+pandas==2.0.1
+PasteDeploy==3.0.1
+pathtools==0.1.2
+pbkdf2==1.3
+pep517==0.13.0
+Pillow==9.3.0
+plaster==1.1.2
+plaster-pastedeploy==1.0.1
+protobuf==3.20.3
+psutil==5.9.5
+pydantic==1.10.7
+pydub==0.25.1
+Pygments==2.15.1
+pyparsing==3.0.9
+pyramid==2.0.1
+pyramid-mailer==0.15.1
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-multipart==0.0.6
+python3-openid==3.2.0
+pytorch-toolbelt==0.6.3
+pytz==2023.3
+PyWavelets==1.4.1
+PyYAML==6.0
+qudida==0.0.4
+repoze.sendmail==4.4.1
+requests==2.28.1
+requests-oauthlib==1.3.1
+scikit-image==0.20.0
+scikit-learn==1.2.2
+scipy==1.9.0
+semantic-version==2.10.0
+sentry-sdk==1.22.2
+setproctitle==1.3.2
+sh==1.14.3
+six==1.16.0
+smmap==5.0.0
+sniffio==1.3.0
+SQLAlchemy==1.4.48
+starlette==0.26.1
+sympy==1.11.1
+tensorboardX==2.6
+threadpoolctl==3.1.0
+tifffile==2023.4.12
+timm==0.6.13
+toml==0.10.2
+tomli==2.0.1
+toolz==0.12.0
+torch==2.0.1
+torchvision==0.15.2
+tqdm==4.65.0
+transaction==3.1.0
+translationstring==1.4
+triton==2.0.0
+typing_extensions==4.4.0
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==1.26.13
+uvicorn==0.22.0
+velruse==1.1.1
+venusian==3.0.0
+wandb==0.15.2
+WebOb==1.8.7
+websockets==11.0.3
+WTForms==3.0.1
+wtforms-recaptcha==0.3.2
+yarl==1.9.2
+zope.deprecation==5.0
+zope.interface==6.0
+zope.sqlalchemy==2.0

training/__init__.py ADDED Viewed

File without changes

training/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (148 Bytes). View file

training/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (146 Bytes). View file

training/__pycache__/losses.cpython-310.pyc ADDED Viewed

Binary file (1.54 kB). View file

training/__pycache__/losses.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

training/datasets/__init__.py ADDED Viewed

File without changes

training/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (157 Bytes). View file

training/datasets/__pycache__/classifier_dataset.cpython-310.pyc ADDED Viewed

Binary file (10.8 kB). View file

training/datasets/__pycache__/validation_set.cpython-310.pyc ADDED Viewed

Binary file (4.99 kB). View file

training/datasets/classifier_dataset.py ADDED Viewed

	@@ -0,0 +1,384 @@

+import math
+import os
+import random
+import sys
+import traceback
+import cv2
+import numpy as np
+import pandas as pd
+import skimage.draw
+from albumentations import ImageCompression, OneOf, GaussianBlur, Blur
+from albumentations.augmentations.functional import image_compression
+from albumentations.augmentations.geometric.functional import rot90
+from albumentations.pytorch.functional import img_to_tensor
+from scipy.ndimage import binary_erosion, binary_dilation
+from skimage import measure
+from torch.utils.data import Dataset
+import dlib
+from training.datasets.validation_set import PUBLIC_SET
+def prepare_bit_masks(mask):
+    h, w = mask.shape
+    mid_w = w // 2
+    mid_h = w // 2
+    masks = []
+    ones = np.ones_like(mask)
+    ones[:mid_h] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[mid_h:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:, :mid_w] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:, mid_w:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:mid_h, :mid_w] = 0
+    ones[mid_h:, mid_w:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:mid_h, mid_w:] = 0
+    ones[mid_h:, :mid_w] = 0
+    masks.append(ones)
+    return masks
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor('libs/shape_predictor_68_face_landmarks.dat')
+def blackout_convex_hull(img):
+    try:
+        rect = detector(img)[0]
+        sp = predictor(img, rect)
+        landmarks = np.array([[p.x, p.y] for p in sp.parts()])
+        outline = landmarks[[*range(17), *range(26, 16, -1)]]
+        Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0])
+        cropped_img = np.zeros(img.shape[:2], dtype=np.uint8)
+        cropped_img[Y, X] = 1
+        # if random.random() > 0.5:
+        #     img[cropped_img == 0] = 0
+        #     #leave only face
+        #     return img
+        y, x = measure.centroid(cropped_img)
+        y = int(y)
+        x = int(x)
+        first = random.random() > 0.5
+        if random.random() > 0.5:
+            if first:
+                cropped_img[:y, :] = 0
+            else:
+                cropped_img[y:, :] = 0
+        else:
+            if first:
+                cropped_img[:, :x] = 0
+            else:
+                cropped_img[:, x:] = 0
+        img[cropped_img > 0] = 0
+    except Exception as e:
+        pass
+def dist(p1, p2):
+    return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
+def remove_eyes(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[:2]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_nose(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[:2]
+    x3, y3 = landmarks[2]
+    mask = np.zeros_like(image[..., 0])
+    x4 = int((x1 + x2) / 2)
+    y4 = int((y1 + y2) / 2)
+    line = cv2.line(mask, (x3, y3), (x4, y4), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_mouth(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[-2:]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 3)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_landmark(image, landmarks):
+    if random.random() > 0.5:
+        image = remove_eyes(image, landmarks)
+    elif random.random() > 0.5:
+        image = remove_mouth(image, landmarks)
+    elif random.random() > 0.5:
+        image = remove_nose(image, landmarks)
+    return image
+def change_padding(image, part=5):
+    h, w = image.shape[:2]
+    # original padding was done with 1/3 from each side, too much
+    pad_h = int(((3 / 5) * h) / part)
+    pad_w = int(((3 / 5) * w) / part)
+    image = image[h // 5 - pad_h:-h // 5 + pad_h, w // 5 - pad_w:-w // 5 + pad_w]
+    return image
+def blackout_random(image, mask, label):
+    binary_mask = mask > 0.4 * 255
+    h, w = binary_mask.shape[:2]
+    tries = 50
+    current_try = 1
+    while current_try < tries:
+        first = random.random() < 0.5
+        if random.random() < 0.5:
+            pivot = random.randint(h // 2 - h // 5, h // 2 + h // 5)
+            bitmap_msk = np.ones_like(binary_mask)
+            if first:
+                bitmap_msk[:pivot, :] = 0
+            else:
+                bitmap_msk[pivot:, :] = 0
+        else:
+            pivot = random.randint(w // 2 - w // 5, w // 2 + w // 5)
+            bitmap_msk = np.ones_like(binary_mask)
+            if first:
+                bitmap_msk[:, :pivot] = 0
+            else:
+                bitmap_msk[:, pivot:] = 0
+        if label < 0.5 and np.count_nonzero(image * np.expand_dims(bitmap_msk, axis=-1)) / 3 > (h * w) / 5 \
+                or np.count_nonzero(binary_mask * bitmap_msk) > 40:
+            mask *= bitmap_msk
+            image *= np.expand_dims(bitmap_msk, axis=-1)
+            break
+        current_try += 1
+    return image
+def blend_original(img):
+    img = img.copy()
+    h, w = img.shape[:2]
+    rect = detector(img)
+    if len(rect) == 0:
+        return img
+    else:
+        rect = rect[0]
+    sp = predictor(img, rect)
+    landmarks = np.array([[p.x, p.y] for p in sp.parts()])
+    outline = landmarks[[*range(17), *range(26, 16, -1)]]
+    Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0])
+    raw_mask = np.zeros(img.shape[:2], dtype=np.uint8)
+    raw_mask[Y, X] = 1
+    face = img * np.expand_dims(raw_mask, -1)
+    # add warping
+    h1 = random.randint(h - h // 2, h + h // 2)
+    w1 = random.randint(w - w // 2, w + w // 2)
+    while abs(h1 - h) < h // 3 and abs(w1 - w) < w // 3:
+        h1 = random.randint(h - h // 2, h + h // 2)
+        w1 = random.randint(w - w // 2, w + w // 2)
+    face = cv2.resize(face, (w1, h1), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC]))
+    face = cv2.resize(face, (w, h), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC]))
+    raw_mask = binary_erosion(raw_mask, iterations=random.randint(4, 10))
+    img[raw_mask, :] = face[raw_mask, :]
+    if random.random() < 0.2:
+        img = OneOf([GaussianBlur(), Blur()], p=0.5)(image=img)["image"]
+    # image compression
+    if random.random() < 0.5:
+        img = ImageCompression(quality_lower=40, quality_upper=95)(image=img)["image"]
+    return img
+class DeepFakeClassifierDataset(Dataset):
+    def __init__(self,
+                 data_path="/mnt/sota/datasets/deepfake",
+                 fold=0,
+                 label_smoothing=0.01,
+                 padding_part=3,
+                 hardcore=True,
+                 crops_dir="crops",
+                 folds_csv="folds.csv",
+                 normalize={"mean": [0.485, 0.456, 0.406],
+                            "std": [0.229, 0.224, 0.225]},
+                 rotation=False,
+                 mode="train",
+                 reduce_val=True,
+                 oversample_real=True,
+                 transforms=None
+                 ):
+        super().__init__()
+        self.data_root = data_path
+        self.fold = fold
+        self.folds_csv = folds_csv
+        self.mode = mode
+        self.rotation = rotation
+        self.padding_part = padding_part
+        self.hardcore = hardcore
+        self.crops_dir = crops_dir
+        self.label_smoothing = label_smoothing
+        self.normalize = normalize
+        self.transforms = transforms
+        self.df = pd.read_csv(self.folds_csv)
+        self.oversample_real = oversample_real
+        self.reduce_val = reduce_val
+    def __getitem__(self, index: int):
+        while True:
+            video, img_file, label, ori_video, frame, fold = self.data[index]
+            try:
+                if self.mode == "train":
+                    label = np.clip(label, self.label_smoothing, 1 - self.label_smoothing)
+                img_path = os.path.join(self.data_root, self.crops_dir, video, img_file)
+                image = cv2.imread(img_path, cv2.IMREAD_COLOR)
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                mask = np.zeros(image.shape[:2], dtype=np.uint8)
+                diff_path = os.path.join(self.data_root, "diffs", video, img_file[:-4] + "_diff.png")
+                try:
+                    msk = cv2.imread(diff_path, cv2.IMREAD_GRAYSCALE)
+                    if msk is not None:
+                        mask = msk
+                except:
+                    print("not found mask", diff_path)
+                    pass
+                if self.mode == "train" and self.hardcore and not self.rotation:
+                    landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy")
+                    if os.path.exists(landmark_path) and random.random() < 0.7:
+                        landmarks = np.load(landmark_path)
+                        image = remove_landmark(image, landmarks)
+                    elif random.random() < 0.2:
+                        blackout_convex_hull(image)
+                    elif random.random() < 0.1:
+                        binary_mask = mask > 0.4 * 255
+                        masks = prepare_bit_masks((binary_mask * 1).astype(np.uint8))
+                        tries = 6
+                        current_try = 1
+                        while current_try < tries:
+                            bitmap_msk = random.choice(masks)
+                            if label < 0.5 or np.count_nonzero(mask * bitmap_msk) > 20:
+                                mask *= bitmap_msk
+                                image *= np.expand_dims(bitmap_msk, axis=-1)
+                                break
+                            current_try += 1
+                if self.mode == "train" and self.padding_part > 3:
+                    image = change_padding(image, self.padding_part)
+                valid_label = np.count_nonzero(mask[mask > 20]) > 32 or label < 0.5
+                valid_label = 1 if valid_label else 0
+                rotation = 0
+                if self.transforms:
+                    data = self.transforms(image=image, mask=mask)
+                    image = data["image"]
+                    mask = data["mask"]
+                if self.mode == "train" and self.hardcore and self.rotation:
+                    # landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy")
+                    dropout = 0.8 if label > 0.5 else 0.6
+                    if self.rotation:
+                        dropout *= 0.7
+                    elif random.random() < dropout:
+                        blackout_random(image, mask, label)
+                #
+                # os.makedirs("../images", exist_ok=True)
+                # cv2.imwrite(os.path.join("../images", video+ "_" + str(1 if label > 0.5 else 0) + "_"+img_file), image[...,::-1])
+                if self.mode == "train" and self.rotation:
+                    rotation = random.randint(0, 3)
+                    image = rot90(image, rotation)
+                image = img_to_tensor(image, self.normalize)
+                return {"image": image, "labels": np.array((label,)), "img_name": os.path.join(video, img_file),
+                        "valid": valid_label, "rotations": rotation}
+            except Exception as e:
+                traceback.print_exc(file=sys.stdout)
+                print("Broken image", os.path.join(self.data_root, self.crops_dir, video, img_file))
+                index = random.randint(0, len(self.data) - 1)
+    def random_blackout_landmark(self, image, mask, landmarks):
+        x, y = random.choice(landmarks)
+        first = random.random() > 0.5
+        #  crop half face either vertically or horizontally
+        if random.random() > 0.5:
+            # width
+            if first:
+                image[:, :x] = 0
+                mask[:, :x] = 0
+            else:
+                image[:, x:] = 0
+                mask[:, x:] = 0
+        else:
+            # height
+            if first:
+                image[:y, :] = 0
+                mask[:y, :] = 0
+            else:
+                image[y:, :] = 0
+                mask[y:, :] = 0
+    def reset(self, epoch, seed):
+        self.data = self._prepare_data(epoch, seed)
+    def __len__(self) -> int:
+        return len(self.data)
+    def get_distribution(self):
+        return self.n_real, self.n_fake
+    def _prepare_data(self, epoch, seed):
+        df = self.df
+        if self.mode == "train":
+            rows = df[df["fold"] != self.fold]
+        else:
+            rows = df[df["fold"] == self.fold]
+        seed = (epoch + 1) * seed
+        if self.oversample_real:
+            rows = self._oversample(rows, seed)
+        if self.mode == "val" and self.reduce_val:
+            # every 2nd frame, to speed up validation
+            rows = rows[rows["frame"] % 20 == 0]
+            # another option is to use public validation set
+            #rows = rows[rows["video"].isin(PUBLIC_SET)]
+        print(
+            "real {} fakes {} mode {}".format(len(rows[rows["label"] == 0]), len(rows[rows["label"] == 1]), self.mode))
+        data = rows.values
+        self.n_real = len(rows[rows["label"] == 0])
+        self.n_fake = len(rows[rows["label"] == 1])
+        np.random.seed(seed)
+        np.random.shuffle(data)
+        return data
+    def _oversample(self, rows: pd.DataFrame, seed):
+        real = rows[rows["label"] == 0]
+        fakes = rows[rows["label"] == 1]
+        num_real = real["video"].count()
+        if self.mode == "train":
+            fakes = fakes.sample(n=num_real, replace=False, random_state=seed)
+        return pd.concat([real, fakes])

training/datasets/validation_set.py ADDED Viewed

	@@ -0,0 +1,60 @@

+PUBLIC_SET = {'tjuihawuqm', 'prwsfljdjo', 'scrbqgpvzz', 'ziipxxchai', 'uubgqnvfdl', 'wclvkepakb', 'xjvxtuakyd',
+              'qlvsqdroqo', 'bcbqxhziqz', 'yzuestxcbq', 'hxwtsaydal', 'kqlvggiqee', 'vtunvalyji', 'mohiqoogpb',
+              'siebfpwuhu', 'cekwtyxdoo', 'hszwwswewp', 'orekjthsef', 'huvlwkxoxm', 'fmhiujydwo', 'lhvjzhjxdp',
+              'ibxfxggtqh', 'bofrwgeyjo', 'rmufsuogzn', 'zbgssotnjm', 'dpevefkefv', 'sufvvwmbha', 'ncoeewrdlo',
+              'qhsehzgxqj', 'yxadevzohx', 'aomqqjipcp', 'pcyswtgick', 'wfzjxzhdkj', 'rcjfxxhcal', 'lnjkpdviqb',
+              'xmkwsnuzyq', 'ouaowjmigq', 'bkuzquigyt', 'vwxednhlwz', 'mszblrdprw', 'blnmxntbey', 'gccnvdoknm',
+              'mkzaekkvej', 'hclsparpth', 'eryjktdexi', 'hfsvqabzfq', 'acazlolrpz', 'yoyhmxtrys', 'rerpivllud',
+              'elackxuccp', 'zgbhzkditd', 'vjljdfopjg', 'famlupsgqm', 'nymodlmxni', 'qcbkztamqc', 'qclpbcbgeq',
+              'lpkgabskbw', 'mnowxangqx', 'czfqlbcfpa', 'qyyhuvqmyf', 'toinozytsp', 'ztyvglkcsf', 'nplviymzlg',
+              'opvqdabdap', 'uxuvkrjhws', 'mxahsihabr', 'cqxxumarvp', 'ptbfnkajyi', 'njzshtfmcw', 'dcqodpzomd',
+              'ajiyrjfyzp', 'ywauoonmlr', 'gochxzemmq', 'lpgxwdgnio', 'hnfwagcxdf', 'gfcycflhbo', 'gunamloolc',
+              'yhjlnisfel', 'srfefmyjvt', 'evysmtpnrf', 'aktnlyqpah', 'gpsxfxrjrr', 'zfobicuigx', 'mnzabbkpmt',
+              'rfjuhbnlro', 'zuwwbbusgl', 'csnkohqxdv', 'bzvzpwrabw', 'yietrwuncf', 'wynotylpnm', 'ekboxwrwuv',
+              'rcecrgeotc', 'rklawjhbpv', 'ilqwcbprqa', 'jsysgmycsx', 'sqixhnilfm', 'wnlubukrki', 'nikynwcvuh',
+              'sjkfxrlxxs', 'btdxnajogv', 'wjhpisoeaj', 'dyjklprkoc', 'qlqhjcshpk', 'jyfvaequfg', 'dozjwhnedd',
+              'owaogcehvc', 'oyqgwjdwaj', 'vvfszaosiv', 'kmcdjxmnoa', 'jiswxuqzyz', 'ddtbarpcgo', 'wqysrieiqu',
+              'xcruhaccxc', 'honxqdilvv', 'nxgzmgzkfv', 'cxsvvnxpyz', 'demuhxssgl', 'hzoiotcykp', 'fwykevubzy',
+              'tejfudfgpq', 'kvmpmhdxly', 'oojxonbgow', 'vurjckblge', 'oysopgovhu', 'khpipxnsvx', 'pqthmvwonf',
+              'fddmkqjwsh', 'pcoxcmtroa', 'cnxccbjlct', 'ggzjfrirjh', 'jquevmhdvc', 'ecumyiowzs', 'esmqxszybs',
+              'mllzkpgatp', 'ryxaqpfubf', 'hbufmvbium', 'vdtsbqidjb', 'sjwywglgym', 'qxyrtwozyw', 'upmgtackuf',
+              'ucthmsajay', 'zgjosltkie', 'snlyjbnpgw', 'nswtvttxre', 'iznnzjvaxc', 'jhczqfefgw', 'htzbnroagi',
+              'pdswwyyntw', 'uvrzaczrbx', 'vbcgoyxsvn', 'hzssdinxec', 'novarhxpbj', 'vizerpsvbz', 'jawgcggquk',
+              'iorbtaarte', 'yarpxfqejd', 'vhbbwdflyh', 'rrrfjhugvb', 'fneqiqpqvs', 'jytrvwlewz', 'bfjsthfhbd',
+              'rxdoimqble', 'ekelfsnqof', 'uqvxjfpwdo', 'cjkctqqakb', 'tynfsthodx', 'yllztsrwjw', 'bktkwbcawi',
+              'wcqvzujamg', 'bcvheslzrq', 'aqrsylrzgi', 'sktpeppbkc', 'mkmgcxaztt', 'etdliwticv', 'hqzwudvhih',
+              'swsaoktwgi', 'temjefwaas', 'papagllumt', 'xrtvqhdibb', 'oelqpetgwj', 'ggdpclfcgk', 'imdmhwkkni',
+              'lebzjtusnr', 'xhtppuyqdr', 'nxzgekegsp', 'waucvvmtkq', 'rnfcjxynfa', 'adohdulfwb', 'tjywwgftmv',
+              'fjrueenjyp', 'oaguiggjyv', 'ytopzxrswu', 'yxvmusxvcz', 'rukyxomwcx', 'qdqdsaiitt', 'mxlipjhmqk',
+              'voawxrmqyl', 'kezwvsxxzj', 'oocincvedt', 'qooxnxqqjb', 'mwwploizlj', 'yaxgpxhavq', 'uhakqelqri',
+              'bvpeerislp', 'bkcyglmfci', 'jyoxdvxpza', 'gkutjglghz', 'knxltsvzyu', 'ybbrkacebd', 'apvzjkvnwn',
+              'ahjnxtiamx', 'hsbljbsgxr', 'fnxgqcvlsd', 'xphdfgmfmz', 'scbdenmaed', 'ywxpquomgt', 'yljecirelf',
+              'wcvsqnplsk', 'vmxfwxgdei', 'icbsahlivv', 'yhylappzid', 'irqzdokcws', 'petmyhjclt', 'rmlzgerevr',
+              'qarqtkvgby', 'nkhzxomani', 'viteugozpv', 'qhkzlnzruj', 'eisofhptvk', 'gqnaxievjx', 'heiyoojifp',
+              'zcxcmneefk', 'wvgviwnwob', 'gcdtglsoqj', 'yqhouqakbx', 'fopjiyxiqd', 'hierggamuo', 'ypbtpunjvm',
+              'sjinmmbipg', 'kmqkiihrmj', 'wmoqzxddkb', 'lnhkjhyhvw', 'wixbuuzygv', 'fsdrwikhge', 'sfsayjgzrh',
+              'pqdeutauqc', 'frqfsucgao', 'pdufsewrec', 'bfdopzvxbi', 'shnsajrsow', 'rvvpazsffd', 'pxcfrszlgi',
+              'itfsvvmslp', 'ayipraspbn', 'prhmixykhr', 'doniqevxeg', 'dvtpwatuja', 'jiavqbrkyk', 'ipkpxvwroe',
+              'syxobtuucp', 'syuxttuyhm', 'nwvsbmyndn', 'eqslzbqfea', 'ytddugrwph', 'vokrpfjpeb', 'bdshuoldwx',
+              'fmvvmcbdrw', 'bnuwxhfahw', 'gbnzicjyhz', 'txnmkabufs', 'gfdjzwnpyp', 'hweshqpfwe', 'dxgnpnowgk',
+              'xugmhbetrw', 'rktrpsdlci', 'nthpnwylxo', 'ihglzxzroo', 'ocgdbrgmtq', 'ruhtnngrqv', 'xljemofssi',
+              'zxacihctqp', 'ghnpsltzyn', 'lbigytrrtr', 'ndikguxzek', 'mdfndlljvt', 'lyoslorecs', 'oefukgnvel',
+              'zmxeiipnqb', 'cosghhimnd', 'alrtntfxtd', 'eywdmustbb', 'ooafcxxfrs', 'fqgypsunzr', 'hevcclcklc',
+              'uhrqlmlclw', 'ipvwtgdlre', 'wcssbghcpc', 'didzujjhtg', 'fjxovgmwnm', 'dmmvuaikkv', 'hitfycdavv',
+              'zyufpqvpyu', 'coujjnypba', 'temeqbmzxu', 'apedduehoy', 'iksxzpqxzi', 'kwfdyqofzw', 'aassnaulhq',
+              'eyguqfmgzh', 'yiykshcbaz', 'sngjsueuhs', 'okgelildpc', 'ztyuiqrhdk', 'tvhjcfnqtg', 'gfgcwxkbjd',
+              'lbfqksftuo', 'kowiwvrjht', 'dkuqbduxev', 'mwnibuujwz', 'sodvtfqbpf', 'hsbwhlolsn', 'qsjiypnjwi',
+              'blszgmxkvu', 'ystdtnetgj', 'rfwxcinshk', 'vnlzxqwthl', 'ljouzjaqqe', 'gahgyuwzbu', 'xxzefxwyku',
+              'xitgdpzbxv', 'sylnrepacf', 'igpvrfjdzc', 'nxnmkytwze', 'psesikjaxx', 'dvwpvqdflx', 'bjyaxvggle',
+              'dpmgoiwhuf', 'wadvzjhwtw', 'kcjvhgvhpt', 'eppyqpgewp', 'tyjpjpglgx', 'cekarydqba', 'dvkdfhrpph',
+              'cnpanmywno', 'ljauauuyka', 'hicjuubiau', 'cqhwesrciw', 'dnmowthjcj', 'lujvyveojc', 'wndursivcx',
+              'espkiocpxq', 'jsbpkpxwew', 'dsnxgrfdmd', 'hyjqolupxn', 'xdezcezszc', 'axfhbpkdlc', 'qqnlrngaft',
+              'coqwgzpbhx', 'ncmpqwmnzb', 'sznkemeqro', 'omphqltjdd', 'uoccaiathd', 'jzmzdispyo', 'pxjkzvqomp',
+              'udxqbhgvvx', 'dzkyxbbqkr', 'dtozwcapoa', 'qswlzfgcgj', 'tgawasvbbr', 'lmdyicksrv', 'fzvpbrzssi',
+              'dxfdovivlw', 'zzmgnglanj', 'vssmlqoiti', 'vajkicalux', 'ekvwecwltj', 'ylxwcwhjjd', 'keioymnobc',
+              'usqqvxcjmg', 'phjvutxpoi', 'nycmyuzpml', 'bwdmzwhdnw', 'fxuxxtryjn', 'orixbcfvdz', 'hefisnapds',
+              'fpevfidstw', 'halvwiltfs', 'dzojiwfvba', 'ojsxxkalat', 'esjdyghhog', 'ptbnewtvon', 'hcanfkwivl',
+              'yronlutbgm', 'llplvmcvbl', 'yxirnfyijn', 'nwvloufjty', 'rtpbawlmxr', 'aayfryxljh', 'zfrrixsimm',
+              'txmnoyiyte'}

training/losses.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Any
+from pytorch_toolbelt.losses import BinaryFocalLoss
+from torch import nn
+from torch.nn.modules.loss import BCEWithLogitsLoss
+class WeightedLosses(nn.Module):
+    def __init__(self, losses, weights):
+        super().__init__()
+        self.losses = losses
+        self.weights = weights
+    def forward(self, *input: Any, **kwargs: Any):
+        cum_loss = 0
+        for loss, w in zip(self.losses, self.weights):
+            cum_loss += w * loss.forward(*input, **kwargs)
+        return cum_loss
+class BinaryCrossentropy(BCEWithLogitsLoss):
+    pass
+class FocalLoss(BinaryFocalLoss):
+    def __init__(self, alpha=None, gamma=3, ignore_index=None, reduction="mean", normalized=False,
+                 reduced_threshold=None):
+        super().__init__(alpha, gamma, ignore_index, reduction, normalized, reduced_threshold)

training/pipelines/__init__.py ADDED Viewed

File without changes

training/pipelines/train_classifier.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import argparse
+import json
+import os
+from collections import defaultdict
+from sklearn.metrics import log_loss
+from torch import topk
+import sys
+print('@@@@@@@@@@@@@@@@@@')
+sys.path.append('..')
+from training import losses
+from training.datasets.classifier_dataset import DeepFakeClassifierDataset
+from training.losses import WeightedLosses
+from training.tools.config import load_config
+from training.tools.utils import create_optimizer, AverageMeter
+from training.transforms.albu import IsotropicResize
+from training.zoo import classifiers
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["NUMEXPR_NUM_THREADS"] = "1"
+os.environ["OMP_NUM_THREADS"] = "1"
+import cv2
+cv2.ocl.setUseOpenCL(False)
+cv2.setNumThreads(0)
+import numpy as np
+from albumentations import Compose, RandomBrightnessContrast, \
+    HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \
+    ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur
+from apex.parallel import DistributedDataParallel, convert_syncbn_model
+from tensorboardX import SummaryWriter
+from apex import amp
+import torch
+from torch.backends import cudnn
+from torch.nn import DataParallel
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import torch.distributed as dist
+torch.backends.cudnn.benchmark = True
+def create_train_transforms(size=300):
+    return Compose([
+        ImageCompression(quality_lower=60, quality_upper=100, p=0.5),
+        GaussNoise(p=0.1),
+        GaussianBlur(blur_limit=3, p=0.05),
+        HorizontalFlip(),
+        OneOf([
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+        ], p=1),
+        PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
+        OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7),
+        ToGray(p=0.2),
+        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),
+    ]
+    )
+def create_val_transforms(size=300):
+    return Compose([
+        IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+        PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
+    ])
+def main():
+    parser = argparse.ArgumentParser("PyTorch Xview Pipeline")
+    arg = parser.add_argument
+    arg('--config', metavar='CONFIG_FILE', help='path to configuration file')
+    arg('--workers', type=int, default=6, help='number of cpu threads to use')
+    arg('--gpu', type=str, default='0', help='List of GPUs for parallel training, e.g. 0,1,2,3')
+    arg('--output-dir', type=str, default='weights/')
+    arg('--resume', type=str, default='')
+    arg('--fold', type=int, default=0)
+    arg('--prefix', type=str, default='classifier_')
+    arg('--data-dir', type=str, default="/mnt/sota/datasets/deepfake")
+    arg('--folds-csv', type=str, default='folds.csv')
+    arg('--crops-dir', type=str, default='crops')
+    arg('--label-smoothing', type=float, default=0.01)
+    arg('--logdir', type=str, default='logs')
+    arg('--zero-score', action='store_true', default=False)
+    arg('--from-zero', action='store_true', default=False)
+    arg('--distributed', action='store_true', default=False)
+    arg('--freeze-epochs', type=int, default=0)
+    arg("--local_rank", default=0, type=int)
+    arg("--seed", default=777, type=int)
+    arg("--padding-part", default=3, type=int)
+    arg("--opt-level", default='O1', type=str)
+    arg("--test_every", type=int, default=1)
+    arg("--no-oversample", action="store_true")
+    arg("--no-hardcore", action="store_true")
+    arg("--only-changed-frames", action="store_true")
+    args = parser.parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    if args.distributed:
+        torch.cuda.set_device(args.local_rank)
+        torch.distributed.init_process_group(backend='nccl', init_method='env://')
+    else:
+        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+    cudnn.benchmark = True
+    conf = load_config(args.config)
+    model = classifiers.__dict__[conf['network']](encoder=conf['encoder'])
+    model = model.cuda()
+    if args.distributed:
+        model = convert_syncbn_model(model)
+    ohem = conf.get("ohem_samples", None)
+    reduction = "mean"
+    if ohem:
+        reduction = "none"
+    loss_fn = []
+    weights = []
+    for loss_name, weight in conf["losses"].items():
+        loss_fn.append(losses.__dict__[loss_name](reduction=reduction).cuda())
+        weights.append(weight)
+    loss = WeightedLosses(loss_fn, weights)
+    loss_functions = {"classifier_loss": loss}
+    optimizer, scheduler = create_optimizer(conf['optimizer'], model)
+    bce_best = 100
+    start_epoch = 0
+    batch_size = conf['optimizer']['batch_size']
+    data_train = DeepFakeClassifierDataset(mode="train",
+                                           oversample_real=not args.no_oversample,
+                                           fold=args.fold,
+                                           padding_part=args.padding_part,
+                                           hardcore=not args.no_hardcore,
+                                           crops_dir=args.crops_dir,
+                                           data_path=args.data_dir,
+                                           label_smoothing=args.label_smoothing,
+                                           folds_csv=args.folds_csv,
+                                           transforms=create_train_transforms(conf["size"]),
+                                           normalize=conf.get("normalize", None))
+    data_val = DeepFakeClassifierDataset(mode="val",
+                                         fold=args.fold,
+                                         padding_part=args.padding_part,
+                                         crops_dir=args.crops_dir,
+                                         data_path=args.data_dir,
+                                         folds_csv=args.folds_csv,
+                                         transforms=create_val_transforms(conf["size"]),
+                                         normalize=conf.get("normalize", None))
+    val_data_loader = DataLoader(data_val, batch_size=batch_size * 2, num_workers=args.workers, shuffle=False,
+                                 pin_memory=False)
+    os.makedirs(args.logdir, exist_ok=True)
+    summary_writer = SummaryWriter(args.logdir + '/' + conf.get("prefix", args.prefix) + conf['encoder'] + "_" + str(args.fold))
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume, map_location='cpu')
+            state_dict = checkpoint['state_dict']
+            state_dict = {k[7:]: w for k, w in state_dict.items()}
+            model.load_state_dict(state_dict, strict=False)
+            if not args.from_zero:
+                start_epoch = checkpoint['epoch']
+                if not args.zero_score:
+                    bce_best = checkpoint.get('bce_best', 0)
+            print("=> loaded checkpoint '{}' (epoch {}, bce_best {})"
+                  .format(args.resume, checkpoint['epoch'], checkpoint['bce_best']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    if args.from_zero:
+        start_epoch = 0
+    current_epoch = start_epoch
+    if conf['fp16']:
+        model, optimizer = amp.initialize(model, optimizer,
+                                          opt_level=args.opt_level,
+                                          loss_scale='dynamic')
+    snapshot_name = "{}{}_{}_{}".format(conf.get("prefix", args.prefix), conf['network'], conf['encoder'], args.fold)
+    if args.distributed:
+        model = DistributedDataParallel(model, delay_allreduce=True)
+    else:
+        model = DataParallel(model).cuda()
+    data_val.reset(1, args.seed)
+    max_epochs = conf['optimizer']['schedule']['epochs']
+    for epoch in range(start_epoch, max_epochs):
+        data_train.reset(epoch, args.seed)
+        train_sampler = None
+        if args.distributed:
+            train_sampler = torch.utils.data.distributed.DistributedSampler(data_train)
+            train_sampler.set_epoch(epoch)
+        if epoch < args.freeze_epochs:
+            print("Freezing encoder!!!")
+            model.module.encoder.eval()
+            for p in model.module.encoder.parameters():
+                p.requires_grad = False
+        else:
+            model.module.encoder.train()
+            for p in model.module.encoder.parameters():
+                p.requires_grad = True
+        train_data_loader = DataLoader(data_train, batch_size=batch_size, num_workers=args.workers,
+                                       shuffle=train_sampler is None, sampler=train_sampler, pin_memory=False,
+                                       drop_last=True)
+        train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf,
+                    args.local_rank, args.only_changed_frames)
+        model = model.eval()
+        if args.local_rank == 0:
+            torch.save({
+                'epoch': current_epoch + 1,
+                'state_dict': model.state_dict(),
+                'bce_best': bce_best,
+            }, args.output_dir + '/' + snapshot_name + "_last")
+            torch.save({
+                'epoch': current_epoch + 1,
+                'state_dict': model.state_dict(),
+                'bce_best': bce_best,
+            }, args.output_dir + snapshot_name + "_{}".format(current_epoch))
+            if (epoch + 1) % args.test_every == 0:
+                bce_best = evaluate_val(args, val_data_loader, bce_best, model,
+                                        snapshot_name=snapshot_name,
+                                        current_epoch=current_epoch,
+                                        summary_writer=summary_writer)
+        current_epoch += 1
+def evaluate_val(args, data_val, bce_best, model, snapshot_name, current_epoch, summary_writer):
+    print("Test phase")
+    model = model.eval()
+    bce, probs, targets = validate(model, data_loader=data_val)
+    if args.local_rank == 0:
+        summary_writer.add_scalar('val/bce', float(bce), global_step=current_epoch)
+        if bce < bce_best:
+            print("Epoch {} improved from {} to {}".format(current_epoch, bce_best, bce))
+            if args.output_dir is not None:
+                torch.save({
+                    'epoch': current_epoch + 1,
+                    'state_dict': model.state_dict(),
+                    'bce_best': bce,
+                }, args.output_dir + snapshot_name + "_best_dice")
+            bce_best = bce
+            with open("predictions_{}.json".format(args.fold), "w") as f:
+                json.dump({"probs": probs, "targets": targets}, f)
+        torch.save({
+            'epoch': current_epoch + 1,
+            'state_dict': model.state_dict(),
+            'bce_best': bce_best,
+        }, args.output_dir + snapshot_name + "_last")
+        print("Epoch: {} bce: {}, bce_best: {}".format(current_epoch, bce, bce_best))
+    return bce_best
+def validate(net, data_loader, prefix=""):
+    probs = defaultdict(list)
+    targets = defaultdict(list)
+    with torch.no_grad():
+        for sample in tqdm(data_loader):
+            imgs = sample["image"].cuda()
+            img_names = sample["img_name"]
+            labels = sample["labels"].cuda().float()
+            out = net(imgs)
+            labels = labels.cpu().numpy()
+            preds = torch.sigmoid(out).cpu().numpy()
+            for i in range(out.shape[0]):
+                video, img_id = img_names[i].split("/")
+                probs[video].append(preds[i].tolist())
+                targets[video].append(labels[i].tolist())
+    data_x = []
+    data_y = []
+    for vid, score in probs.items():
+        score = np.array(score)
+        lbl = targets[vid]
+        score = np.mean(score)
+        lbl = np.mean(lbl)
+        data_x.append(score)
+        data_y.append(lbl)
+    y = np.array(data_y)
+    x = np.array(data_x)
+    fake_idx = y > 0.1
+    real_idx = y < 0.1
+    fake_loss = log_loss(y[fake_idx], x[fake_idx], labels=[0, 1])
+    real_loss = log_loss(y[real_idx], x[real_idx], labels=[0, 1])
+    print("{}fake_loss".format(prefix), fake_loss)
+    print("{}real_loss".format(prefix), real_loss)
+    return (fake_loss + real_loss) / 2, probs, targets
+def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf,
+                local_rank, only_valid):
+    losses = AverageMeter()
+    fake_losses = AverageMeter()
+    real_losses = AverageMeter()
+    max_iters = conf["batches_per_epoch"]
+    print("training epoch {}".format(current_epoch))
+    model.train()
+    pbar = tqdm(enumerate(train_data_loader), total=max_iters, desc="Epoch {}".format(current_epoch), ncols=0)
+    if conf["optimizer"]["schedule"]["mode"] == "epoch":
+        scheduler.step(current_epoch)
+    for i, sample in pbar:
+        imgs = sample["image"].cuda()
+        labels = sample["labels"].cuda().float()
+        out_labels = model(imgs)
+        if only_valid:
+            valid_idx = sample["valid"].cuda().float() > 0
+            out_labels = out_labels[valid_idx]
+            labels = labels[valid_idx]
+            if labels.size(0) == 0:
+                continue
+        fake_loss = 0
+        real_loss = 0
+        fake_idx = labels > 0.5
+        real_idx = labels <= 0.5
+        ohem = conf.get("ohem_samples", None)
+        if torch.sum(fake_idx * 1) > 0:
+            fake_loss = loss_functions["classifier_loss"](out_labels[fake_idx], labels[fake_idx])
+        if torch.sum(real_idx * 1) > 0:
+            real_loss = loss_functions["classifier_loss"](out_labels[real_idx], labels[real_idx])
+        if ohem:
+            fake_loss = topk(fake_loss, k=min(ohem, fake_loss.size(0)), sorted=False)[0].mean()
+            real_loss = topk(real_loss, k=min(ohem, real_loss.size(0)), sorted=False)[0].mean()
+        loss = (fake_loss + real_loss) / 2
+        losses.update(loss.item(), imgs.size(0))
+        fake_losses.update(0 if fake_loss == 0 else fake_loss.item(), imgs.size(0))
+        real_losses.update(0 if real_loss == 0 else real_loss.item(), imgs.size(0))
+        optimizer.zero_grad()
+        pbar.set_postfix({"lr": float(scheduler.get_lr()[-1]), "epoch": current_epoch, "loss": losses.avg,
+                          "fake_loss": fake_losses.avg, "real_loss": real_losses.avg})
+        if conf['fp16']:
+            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                scaled_loss.backward()
+        else:
+            loss.backward()
+        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
+        optimizer.step()
+        torch.cuda.synchronize()
+        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
+            scheduler.step(i + current_epoch * max_iters)
+        if i == max_iters - 1:
+            break
+    pbar.close()
+    if local_rank == 0:
+        for idx, param_group in enumerate(optimizer.param_groups):
+            lr = param_group['lr']
+            summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch)
+        summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
+if __name__ == '__main__':
+    main()

training/tools/__init__.py ADDED Viewed

File without changes

training/tools/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

training/tools/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (1.06 kB). View file

training/tools/__pycache__/schedulers.cpython-310.pyc ADDED Viewed

Binary file (3.01 kB). View file

training/tools/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (3.65 kB). View file

training/tools/config.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+DEFAULTS = {
+    "network": "dpn",
+    "encoder": "dpn92",
+    "model_params": {},
+    "optimizer": {
+        "batch_size": 32,
+        "type": "SGD",  # supported: SGD, Adam
+        "momentum": 0.9,
+        "weight_decay": 0,
+        "clip": 1.,
+        "learning_rate": 0.1,
+        "classifier_lr": -1,
+        "nesterov": True,
+        "schedule": {
+            "type": "constant",  # supported: constant, step, multistep, exponential, linear, poly
+            "mode": "epoch",  # supported: epoch, step
+            "epochs": 10,
+            "params": {}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    }
+}
+def _merge(src, dst):
+    for k, v in src.items():
+        if k in dst:
+            if isinstance(v, dict):
+                _merge(src[k], dst[k])
+        else:
+            dst[k] = v
+def load_config(config_file, defaults=DEFAULTS):
+    with open(config_file, "r") as fd:
+        config = json.load(fd)
+    _merge(defaults, config)
+    return config

training/tools/schedulers.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from bisect import bisect_right
+from torch.optim.lr_scheduler import _LRScheduler
+class LRStepScheduler(_LRScheduler):
+    def __init__(self, optimizer, steps, last_epoch=-1):
+        self.lr_steps = steps
+        super().__init__(optimizer, last_epoch)
+    def get_lr(self):
+        pos = max(bisect_right([x for x, y in self.lr_steps], self.last_epoch) - 1, 0)
+        return [self.lr_steps[pos][1] if self.lr_steps[pos][0] <= self.last_epoch else base_lr for base_lr in self.base_lrs]
+class PolyLR(_LRScheduler):
+    """Sets the learning rate of each parameter group according to poly learning rate policy
+    """
+    def __init__(self, optimizer, max_iter=90000, power=0.9, last_epoch=-1):
+        self.max_iter = max_iter
+        self.power = power
+        super(PolyLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        self.last_epoch = (self.last_epoch + 1) % self.max_iter
+        return [base_lr * ((1 - float(self.last_epoch) / self.max_iter) ** (self.power)) for base_lr in self.base_lrs]
+class ExponentialLRScheduler(_LRScheduler):
+    """Decays the learning rate of each parameter group by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        gamma (float): Multiplicative factor of learning rate decay.
+        last_epoch (int): The index of last epoch. Default: -1.
+    """
+    def __init__(self, optimizer, gamma, last_epoch=-1):
+        self.gamma = gamma
+        super(ExponentialLRScheduler, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        if self.last_epoch <= 0:
+            return self.base_lrs
+        return [base_lr * self.gamma**self.last_epoch for base_lr in self.base_lrs]

training/tools/utils.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import cv2
+from apex.optimizers import FusedAdam, FusedSGD
+from timm.optim import AdamW
+from torch import optim
+from torch.optim import lr_scheduler
+from torch.optim.rmsprop import RMSprop
+from torch.optim.adamw import AdamW
+from torch.optim.lr_scheduler import MultiStepLR, CyclicLR
+from training.tools.schedulers import ExponentialLRScheduler, PolyLR, LRStepScheduler
+cv2.ocl.setUseOpenCL(False)
+cv2.setNumThreads(0)
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+def create_optimizer(optimizer_config, model, master_params=None):
+    """Creates optimizer and schedule from configuration
+    Parameters
+    ----------
+    optimizer_config : dict
+        Dictionary containing the configuration options for the optimizer.
+    model : Model
+        The network model.
+    Returns
+    -------
+    optimizer : Optimizer
+        The optimizer.
+    scheduler : LRScheduler
+        The learning rate scheduler.
+    """
+    if optimizer_config.get("classifier_lr", -1) != -1:
+        # Separate classifier parameters from all others
+        net_params = []
+        classifier_params = []
+        for k, v in model.named_parameters():
+            if not v.requires_grad:
+                continue
+            if k.find("encoder") != -1:
+                net_params.append(v)
+            else:
+                classifier_params.append(v)
+        params = [
+            {"params": net_params},
+            {"params": classifier_params, "lr": optimizer_config["classifier_lr"]},
+        ]
+    else:
+        if master_params:
+            params = master_params
+        else:
+            params = model.parameters()
+    if optimizer_config["type"] == "SGD":
+        optimizer = optim.SGD(params,
+                              lr=optimizer_config["learning_rate"],
+                              momentum=optimizer_config["momentum"],
+                              weight_decay=optimizer_config["weight_decay"],
+                              nesterov=optimizer_config["nesterov"])
+    elif optimizer_config["type"] == "FusedSGD":
+        optimizer = FusedSGD(params,
+                             lr=optimizer_config["learning_rate"],
+                             momentum=optimizer_config["momentum"],
+                             weight_decay=optimizer_config["weight_decay"],
+                             nesterov=optimizer_config["nesterov"])
+    elif optimizer_config["type"] == "Adam":
+        optimizer = optim.Adam(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "FusedAdam":
+        optimizer = FusedAdam(params,
+                              lr=optimizer_config["learning_rate"],
+                              weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "AdamW":
+        optimizer = AdamW(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "RmsProp":
+        optimizer = RMSprop(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    else:
+        raise KeyError("unrecognized optimizer {}".format(optimizer_config["type"]))
+    if optimizer_config["schedule"]["type"] == "step":
+        scheduler = LRStepScheduler(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "clr":
+        scheduler = CyclicLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "multistep":
+        scheduler = MultiStepLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "exponential":
+        scheduler = ExponentialLRScheduler(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "poly":
+        scheduler = PolyLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "constant":
+        scheduler = lr_scheduler.LambdaLR(optimizer, lambda epoch: 1.0)
+    elif optimizer_config["schedule"]["type"] == "linear":
+        def linear_lr(it):
+            return it * optimizer_config["schedule"]["params"]["alpha"] + optimizer_config["schedule"]["params"]["beta"]
+        scheduler = lr_scheduler.LambdaLR(optimizer, linear_lr)
+    return optimizer, scheduler

training/transforms/__init__.py ADDED Viewed

File without changes

training/transforms/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (159 Bytes). View file

training/transforms/__pycache__/albu.cpython-310.pyc ADDED Viewed

Binary file (4.36 kB). View file

training/transforms/albu.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import random
+import cv2
+import numpy as np
+from albumentations import DualTransform, ImageOnlyTransform
+from albumentations.augmentations.crops.functional import crop
+#from albumentations.augmentations.functional import crop
+def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
+    h, w = img.shape[:2]
+    if max(w, h) == size:
+        return img
+    if w > h:
+        scale = size / w
+        h = h * scale
+        w = size
+    else:
+        scale = size / h
+        w = w * scale
+        h = size
+    interpolation = interpolation_up if scale > 1 else interpolation_down
+    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
+    return resized
+class IsotropicResize(DualTransform):
+    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
+                 always_apply=False, p=1):
+        super(IsotropicResize, self).__init__(always_apply, p)
+        self.max_side = max_side
+        self.interpolation_down = interpolation_down
+        self.interpolation_up = interpolation_up
+    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
+        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
+                                          interpolation_up=interpolation_up)
+    def apply_to_mask(self, img, **params):
+        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)
+    def get_transform_init_args_names(self):
+        return ("max_side", "interpolation_down", "interpolation_up")
+class Resize4xAndBack(ImageOnlyTransform):
+    def __init__(self, always_apply=False, p=0.5):
+        super(Resize4xAndBack, self).__init__(always_apply, p)
+    def apply(self, img, **params):
+        h, w = img.shape[:2]
+        scale = random.choice([2, 4])
+        img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
+        img = cv2.resize(img, (w, h),
+                         interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
+        return img
+class RandomSizedCropNonEmptyMaskIfExists(DualTransform):
+    def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5):
+        super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p)
+        self.min_max_height = min_max_height
+        self.w2h_ratio = w2h_ratio
+    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        cropped = crop(img, x_min, y_min, x_max, y_max)
+        return cropped
+    @property
+    def targets_as_params(self):
+        return ["mask"]
+    def get_params_dependent_on_targets(self, params):
+        mask = params["mask"]
+        mask_height, mask_width = mask.shape[:2]
+        crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1]))
+        w2h_ratio = random.uniform(*self.w2h_ratio)
+        crop_width = min(int(crop_height * w2h_ratio), mask_width - 1)
+        if mask.sum() == 0:
+            x_min = random.randint(0, mask_width - crop_width + 1)
+            y_min = random.randint(0, mask_height - crop_height + 1)
+        else:
+            mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
+            non_zero_yx = np.argwhere(mask)
+            y, x = random.choice(non_zero_yx)
+            x_min = x - random.randint(0, crop_width - 1)
+            y_min = y - random.randint(0, crop_height - 1)
+            x_min = np.clip(x_min, 0, mask_width - crop_width)
+            y_min = np.clip(y_min, 0, mask_height - crop_height)
+        x_max = x_min + crop_height
+        y_max = y_min + crop_width
+        y_max = min(mask_height, y_max)
+        x_max = min(mask_width, x_max)
+        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
+    def get_transform_init_args_names(self):
+        return "min_max_height", "height", "width", "w2h_ratio"

training/zoo/__init__.py ADDED Viewed

File without changes

training/zoo/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

training/zoo/__pycache__/classifiers.cpython-310.pyc ADDED Viewed

Binary file (5.55 kB). View file

training/zoo/classifiers.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from functools import partial
+import numpy as np
+import torch
+from timm.models.efficientnet import tf_efficientnet_b4_ns, tf_efficientnet_b3_ns, \
+    tf_efficientnet_b5_ns, tf_efficientnet_b2_ns, tf_efficientnet_b6_ns, tf_efficientnet_b7_ns
+from torch import nn
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.pooling import AdaptiveAvgPool2d
+encoder_params = {
+    "tf_efficientnet_b3_ns": {
+        "features": 1536,
+        "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b2_ns": {
+        "features": 1408,
+        "init_op": partial(tf_efficientnet_b2_ns, pretrained=False, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b4_ns": {
+        "features": 1792,
+        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.5)
+    },
+    "tf_efficientnet_b5_ns": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b4_ns_03d": {
+        "features": 1792,
+        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.3)
+    },
+    "tf_efficientnet_b5_ns_03d": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.3)
+    },
+    "tf_efficientnet_b5_ns_04d": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.4)
+    },
+    "tf_efficientnet_b6_ns": {
+        "features": 2304,
+        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b7_ns": {
+        "features": 2560,
+        "init_op": partial(tf_efficientnet_b7_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b6_ns_04d": {
+        "features": 2304,
+        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.4)
+    },
+}
+def setup_srm_weights(input_channels: int = 3) -> torch.Tensor:
+    """Creates the SRM kernels for noise analysis."""
+    # note: values taken from Zhou et al., "Learning Rich Features for Image Manipulation Detection", CVPR2018
+    srm_kernel = torch.from_numpy(np.array([
+        [  # srm 1/2 horiz
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 1., -2., 1., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+        ], [  # srm 1/4
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., -1., 2., -1., 0.],  # noqa: E241,E201
+            [0., 2., -4., 2., 0.],  # noqa: E241,E201
+            [0., -1., 2., -1., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+        ], [  # srm 1/12
+            [-1., 2., -2., 2., -1.],  # noqa: E241,E201
+            [2., -6., 8., -6., 2.],  # noqa: E241,E201
+            [-2., 8., -12., 8., -2.],  # noqa: E241,E201
+            [2., -6., 8., -6., 2.],  # noqa: E241,E201
+            [-1., 2., -2., 2., -1.],  # noqa: E241,E201
+        ]
+    ])).float()
+    srm_kernel[0] /= 2
+    srm_kernel[1] /= 4
+    srm_kernel[2] /= 12
+    return srm_kernel.view(3, 1, 5, 5).repeat(1, input_channels, 1, 1)
+def setup_srm_layer(input_channels: int = 3) -> torch.nn.Module:
+    """Creates a SRM convolution layer for noise analysis."""
+    weights = setup_srm_weights(input_channels)
+    conv = torch.nn.Conv2d(input_channels, out_channels=3, kernel_size=5, stride=1, padding=2, bias=False)
+    with torch.no_grad():
+        conv.weight = torch.nn.Parameter(weights, requires_grad=False)
+    return conv
+class DeepFakeClassifierSRM(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.srm_conv = setup_srm_layer(3)
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        noise = self.srm_conv(x)
+        x = self.encoder.forward_features(noise)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x
+class GlobalWeightedAvgPool2d(nn.Module):
+    """
+    Global Weighted Average Pooling from paper "Global Weighted Average
+    Pooling Bridges Pixel-level Localization and Image-level Classification"
+    """
+    def __init__(self, features: int, flatten=False):
+        super().__init__()
+        self.conv = nn.Conv2d(features, 1, kernel_size=1, bias=True)
+        self.flatten = flatten
+    def fscore(self, x):
+        m = self.conv(x)
+        m = m.sigmoid().exp()
+        return m
+    def norm(self, x: torch.Tensor):
+        return x / x.sum(dim=[2, 3], keepdim=True)
+    def forward(self, x):
+        input_x = x
+        x = self.fscore(x)
+        x = self.norm(x)
+        x = x * input_x
+        x = x.sum(dim=[2, 3], keepdim=not self.flatten)
+        return x
+class DeepFakeClassifier(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.0) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        x = self.encoder.forward_features(x)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x
+class DeepFakeClassifierGWAP(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = GlobalWeightedAvgPool2d(encoder_params[encoder]["features"])
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        x = self.encoder.forward_features(x)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x

training/zoo/unet.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from functools import partial
+import torch
+from timm.models.efficientnet import tf_efficientnet_b3_ns, tf_efficientnet_b5_ns
+from torch import nn
+from torch.nn import Dropout2d, Conv2d
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.pooling import AdaptiveAvgPool2d
+from torch.nn.modules.upsampling import UpsamplingBilinear2d
+encoder_params = {
+    "tf_efficientnet_b3_ns": {
+        "features": 1536,
+        "filters": [40, 32, 48, 136, 1536],
+        "decoder_filters": [64, 128, 256, 256],
+        "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b5_ns": {
+        "features": 2048,
+        "filters": [48, 40, 64, 176, 2048],
+        "decoder_filters": [64, 128, 256, 256],
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2)
+    },
+}
+class DecoderBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.layer = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.layer(x)
+class ConcatBottleneck(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.seq = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, dec, enc):
+        x = torch.cat([dec, enc], dim=1)
+        return self.seq(x)
+class Decoder(nn.Module):
+    def __init__(self, decoder_filters, filters, upsample_filters=None,
+                 decoder_block=DecoderBlock, bottleneck=ConcatBottleneck, dropout=0):
+        super().__init__()
+        self.decoder_filters = decoder_filters
+        self.filters = filters
+        self.decoder_block = decoder_block
+        self.decoder_stages = nn.ModuleList([self._get_decoder(idx) for idx in range(0, len(decoder_filters))])
+        self.bottlenecks = nn.ModuleList([bottleneck(self.filters[-i - 2] + f, f)
+                                          for i, f in enumerate(reversed(decoder_filters))])
+        self.dropout = Dropout2d(dropout) if dropout > 0 else None
+        self.last_block = None
+        if upsample_filters:
+            self.last_block = decoder_block(decoder_filters[0], out_channels=upsample_filters)
+        else:
+            self.last_block = UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, encoder_results: list):
+        x = encoder_results[0]
+        bottlenecks = self.bottlenecks
+        for idx, bottleneck in enumerate(bottlenecks):
+            rev_idx = - (idx + 1)
+            x = self.decoder_stages[rev_idx](x)
+            x = bottleneck(x, encoder_results[-rev_idx])
+        if self.last_block:
+            x = self.last_block(x)
+        if self.dropout:
+            x = self.dropout(x)
+        return x
+    def _get_decoder(self, layer):
+        idx = layer + 1
+        if idx == len(self.decoder_filters):
+            in_channels = self.filters[idx]
+        else:
+            in_channels = self.decoder_filters[idx]
+        return self.decoder_block(in_channels, self.decoder_filters[max(layer, 0)])
+def _initialize_weights(module):
+    for m in module.modules():
+        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
+            m.weight.data = nn.init.kaiming_normal_(m.weight.data)
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1)
+            m.bias.data.zero_()
+class EfficientUnetClassifier(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.decoder = Decoder(decoder_filters=encoder_params[encoder]["decoder_filters"],
+                               filters=encoder_params[encoder]["filters"])
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+        self.final = Conv2d(encoder_params[encoder]["decoder_filters"][0], out_channels=1, kernel_size=1, bias=False)
+        _initialize_weights(self)
+        self.encoder = encoder_params[encoder]["init_op"]()
+    def get_encoder_features(self, x):
+        encoder_results = []
+        x = self.encoder.conv_stem(x)
+        x = self.encoder.bn1(x)
+        x = self.encoder.act1(x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[:2](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[2:3](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[3:5](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[5:](x)
+        x = self.encoder.conv_head(x)
+        x = self.encoder.bn2(x)
+        x = self.encoder.act2(x)
+        encoder_results.append(x)
+        encoder_results = list(reversed(encoder_results))
+        return encoder_results
+    def forward(self, x):
+        encoder_results = self.get_encoder_features(x)
+        seg = self.final(self.decoder(encoder_results))
+        x = encoder_results[0]
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x, seg
+if __name__ == '__main__':
+    model = EfficientUnetClassifier("tf_efficientnet_b5_ns")
+    model.eval()
+    with torch.no_grad():
+        input = torch.rand(4, 3, 224, 224)
+        print(model(input))

weights/.gitkeep ADDED Viewed

File without changes

weights/b7_ns_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9db77ab9318863e2f8ab287c8eb83c2232584b82dc2fb41f1d614ddd7900cccb
+size 266910617