Spaces:
Configuration error
Configuration error
Upload 108 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .github/ISSUE_TEMPLATE/bug_report.yml +56 -0
- .github/ISSUE_TEMPLATE/config.yml +5 -0
- .github/ISSUE_TEMPLATE/feature_request.yml +16 -0
- .github/workflows/publish_action.yml +20 -0
- modules/__init__.py +0 -0
- modules/__pycache__/__init__.cpython-311.pyc +0 -0
- modules/__pycache__/processing.cpython-311.pyc +0 -0
- modules/__pycache__/scripts.cpython-311.pyc +0 -0
- modules/__pycache__/scripts_postprocessing.cpython-311.pyc +0 -0
- modules/__pycache__/shared.cpython-311.pyc +0 -0
- modules/images.py +0 -0
- modules/processing.py +13 -0
- modules/scripts.py +13 -0
- modules/scripts_postprocessing.py +0 -0
- modules/shared.py +19 -0
- r_chainner/__pycache__/model_loading.cpython-311.pyc +0 -0
- r_chainner/__pycache__/types.cpython-311.pyc +0 -0
- r_chainner/archs/face/__pycache__/gfpganv1_clean_arch.cpython-311.pyc +0 -0
- r_chainner/archs/face/__pycache__/stylegan2_clean_arch.cpython-311.pyc +0 -0
- r_chainner/archs/face/gfpganv1_clean_arch.py +370 -0
- r_chainner/archs/face/stylegan2_clean_arch.py +453 -0
- r_chainner/model_loading.py +28 -0
- r_chainner/types.py +18 -0
- r_facelib/__init__.py +0 -0
- r_facelib/__pycache__/__init__.cpython-311.pyc +0 -0
- r_facelib/detection/__init__.py +102 -0
- r_facelib/detection/__pycache__/__init__.cpython-311.pyc +0 -0
- r_facelib/detection/__pycache__/align_trans.cpython-311.pyc +0 -0
- r_facelib/detection/__pycache__/matlab_cp2tform.cpython-311.pyc +0 -0
- r_facelib/detection/align_trans.py +219 -0
- r_facelib/detection/matlab_cp2tform.py +317 -0
- r_facelib/detection/retinaface/__pycache__/retinaface.cpython-311.pyc +0 -0
- r_facelib/detection/retinaface/__pycache__/retinaface_net.cpython-311.pyc +0 -0
- r_facelib/detection/retinaface/__pycache__/retinaface_utils.cpython-311.pyc +0 -0
- r_facelib/detection/retinaface/retinaface.py +389 -0
- r_facelib/detection/retinaface/retinaface_net.py +196 -0
- r_facelib/detection/retinaface/retinaface_utils.py +421 -0
- r_facelib/detection/yolov5face/__init__.py +0 -0
- r_facelib/detection/yolov5face/__pycache__/__init__.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/__pycache__/face_detector.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/face_detector.py +141 -0
- r_facelib/detection/yolov5face/models/__init__.py +0 -0
- r_facelib/detection/yolov5face/models/__pycache__/__init__.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/models/__pycache__/common.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/models/__pycache__/experimental.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/models/__pycache__/yolo.cpython-311.pyc +0 -0
- r_facelib/detection/yolov5face/models/common.py +299 -0
- r_facelib/detection/yolov5face/models/experimental.py +45 -0
- r_facelib/detection/yolov5face/models/yolo.py +235 -0
- r_facelib/detection/yolov5face/models/yolov5l.yaml +47 -0
.github/ISSUE_TEMPLATE/bug_report.yml
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug Report
|
2 |
+
description: You think somethings is broken
|
3 |
+
labels: ["bug", "new"]
|
4 |
+
|
5 |
+
body:
|
6 |
+
- type: checkboxes
|
7 |
+
attributes:
|
8 |
+
label: First, confirm
|
9 |
+
description: Make sure you use the latest version of the ReActor extension and you have already searched to see if an issue already exists for the bug you encountered before you create a new Issue.
|
10 |
+
options:
|
11 |
+
- label: I have read the [instruction](https://github.com/Gourieff/comfyui-reactor-node/blob/main/README.md) carefully
|
12 |
+
required: true
|
13 |
+
- label: I have searched the existing issues
|
14 |
+
required: true
|
15 |
+
- label: I have updated the extension to the latest version
|
16 |
+
required: true
|
17 |
+
- type: markdown
|
18 |
+
attributes:
|
19 |
+
value: |
|
20 |
+
*Please fill this form with as much information as possible and *provide screenshots if possible**
|
21 |
+
- type: textarea
|
22 |
+
id: what-did
|
23 |
+
attributes:
|
24 |
+
label: What happened?
|
25 |
+
description: Tell what happened in a very clear and simple way
|
26 |
+
validations:
|
27 |
+
required: true
|
28 |
+
- type: textarea
|
29 |
+
id: steps
|
30 |
+
attributes:
|
31 |
+
label: Steps to reproduce the problem
|
32 |
+
description: Please provide with precise step by step instructions on how to reproduce the bug
|
33 |
+
value: |
|
34 |
+
Your workflow
|
35 |
+
validations:
|
36 |
+
required: true
|
37 |
+
- type: textarea
|
38 |
+
id: sysinfo
|
39 |
+
attributes:
|
40 |
+
label: Sysinfo
|
41 |
+
description: Describe your platform. OS, browser, GPU, what other nodes are also enabled.
|
42 |
+
validations:
|
43 |
+
required: true
|
44 |
+
- type: textarea
|
45 |
+
id: logs
|
46 |
+
attributes:
|
47 |
+
label: Relevant console log
|
48 |
+
description: Please provide cmd/terminal logs from the moment you started UI to the momemt you got an error. This will be automatically formatted into code, so no need for backticks.
|
49 |
+
render: Shell
|
50 |
+
validations:
|
51 |
+
required: true
|
52 |
+
- type: textarea
|
53 |
+
id: misc
|
54 |
+
attributes:
|
55 |
+
label: Additional information
|
56 |
+
description: Please provide with any relevant additional info or context.
|
.github/ISSUE_TEMPLATE/config.yml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
blank_issues_enabled: false
|
2 |
+
contact_links:
|
3 |
+
- name: ReActor Node Community Support
|
4 |
+
url: https://github.com/Gourieff/comfyui-reactor-node/discussions
|
5 |
+
about: Please ask and answer questions here.
|
.github/ISSUE_TEMPLATE/feature_request.yml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Feature request
|
2 |
+
description: Suggest an idea for this project
|
3 |
+
title: "[Feature]: "
|
4 |
+
labels: ["enhancement", "new"]
|
5 |
+
|
6 |
+
body:
|
7 |
+
- type: textarea
|
8 |
+
id: description
|
9 |
+
attributes:
|
10 |
+
label: Feature description
|
11 |
+
description: Describe the feature in a clear and simple way
|
12 |
+
value:
|
13 |
+
- type: markdown
|
14 |
+
attributes:
|
15 |
+
value: |
|
16 |
+
The best way to propose an idea is to start a new discussion via the [Discussions](https://github.com/Gourieff/comfyui-reactor-node/discussions) section (choose the "Idea" category)
|
.github/workflows/publish_action.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Publish to Comfy registry
|
2 |
+
on:
|
3 |
+
workflow_dispatch:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
paths:
|
8 |
+
- "pyproject.toml"
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
publish-node:
|
12 |
+
name: Publish Custom Node to registry
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
steps:
|
15 |
+
- name: Check out code
|
16 |
+
uses: actions/checkout@v4
|
17 |
+
- name: Publish Custom Node
|
18 |
+
uses: Comfy-Org/publish-node-action@main
|
19 |
+
with:
|
20 |
+
personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} ## Add your own personal access token to your Github secrets and reference it here.
|
modules/__init__.py
ADDED
File without changes
|
modules/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (266 Bytes). View file
|
|
modules/__pycache__/processing.cpython-311.pyc
ADDED
Binary file (1.43 kB). View file
|
|
modules/__pycache__/scripts.cpython-311.pyc
ADDED
Binary file (1.07 kB). View file
|
|
modules/__pycache__/scripts_postprocessing.cpython-311.pyc
ADDED
Binary file (280 Bytes). View file
|
|
modules/__pycache__/shared.cpython-311.pyc
ADDED
Binary file (1.13 kB). View file
|
|
modules/images.py
ADDED
File without changes
|
modules/processing.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class StableDiffusionProcessing:
|
2 |
+
|
3 |
+
def __init__(self, init_imgs):
|
4 |
+
self.init_images = init_imgs
|
5 |
+
self.width = init_imgs[0].width
|
6 |
+
self.height = init_imgs[0].height
|
7 |
+
self.extra_generation_params = {}
|
8 |
+
|
9 |
+
|
10 |
+
class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
11 |
+
|
12 |
+
def __init__(self, init_img):
|
13 |
+
super().__init__(init_img)
|
modules/scripts.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
|
4 |
+
class Script:
|
5 |
+
pass
|
6 |
+
|
7 |
+
|
8 |
+
def basedir():
|
9 |
+
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
10 |
+
|
11 |
+
|
12 |
+
class PostprocessImageArgs:
|
13 |
+
pass
|
modules/scripts_postprocessing.py
ADDED
File without changes
|
modules/shared.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Options:
|
2 |
+
img2img_background_color = "#ffffff" # Set to white for now
|
3 |
+
|
4 |
+
|
5 |
+
class State:
|
6 |
+
interrupted = False
|
7 |
+
|
8 |
+
def begin(self):
|
9 |
+
pass
|
10 |
+
|
11 |
+
def end(self):
|
12 |
+
pass
|
13 |
+
|
14 |
+
|
15 |
+
opts = Options()
|
16 |
+
state = State()
|
17 |
+
cmd_opts = None
|
18 |
+
sd_upscalers = []
|
19 |
+
face_restorers = []
|
r_chainner/__pycache__/model_loading.cpython-311.pyc
ADDED
Binary file (1.37 kB). View file
|
|
r_chainner/__pycache__/types.cpython-311.pyc
ADDED
Binary file (1.01 kB). View file
|
|
r_chainner/archs/face/__pycache__/gfpganv1_clean_arch.cpython-311.pyc
ADDED
Binary file (17.7 kB). View file
|
|
r_chainner/archs/face/__pycache__/stylegan2_clean_arch.cpython-311.pyc
ADDED
Binary file (22.1 kB). View file
|
|
r_chainner/archs/face/gfpganv1_clean_arch.py
ADDED
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pylint: skip-file
|
2 |
+
# type: ignore
|
3 |
+
import math
|
4 |
+
import random
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch import nn
|
8 |
+
from torch.nn import functional as F
|
9 |
+
|
10 |
+
from r_chainner.archs.face.stylegan2_clean_arch import StyleGAN2GeneratorClean
|
11 |
+
|
12 |
+
|
13 |
+
class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean):
|
14 |
+
"""StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
|
15 |
+
It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
|
16 |
+
Args:
|
17 |
+
out_size (int): The spatial size of outputs.
|
18 |
+
num_style_feat (int): Channel number of style features. Default: 512.
|
19 |
+
num_mlp (int): Layer number of MLP style layers. Default: 8.
|
20 |
+
channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
|
21 |
+
narrow (float): The narrow ratio for channels. Default: 1.
|
22 |
+
sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
|
23 |
+
"""
|
24 |
+
|
25 |
+
def __init__(
|
26 |
+
self,
|
27 |
+
out_size,
|
28 |
+
num_style_feat=512,
|
29 |
+
num_mlp=8,
|
30 |
+
channel_multiplier=2,
|
31 |
+
narrow=1,
|
32 |
+
sft_half=False,
|
33 |
+
):
|
34 |
+
super(StyleGAN2GeneratorCSFT, self).__init__(
|
35 |
+
out_size,
|
36 |
+
num_style_feat=num_style_feat,
|
37 |
+
num_mlp=num_mlp,
|
38 |
+
channel_multiplier=channel_multiplier,
|
39 |
+
narrow=narrow,
|
40 |
+
)
|
41 |
+
self.sft_half = sft_half
|
42 |
+
|
43 |
+
def forward(
|
44 |
+
self,
|
45 |
+
styles,
|
46 |
+
conditions,
|
47 |
+
input_is_latent=False,
|
48 |
+
noise=None,
|
49 |
+
randomize_noise=True,
|
50 |
+
truncation=1,
|
51 |
+
truncation_latent=None,
|
52 |
+
inject_index=None,
|
53 |
+
return_latents=False,
|
54 |
+
):
|
55 |
+
"""Forward function for StyleGAN2GeneratorCSFT.
|
56 |
+
Args:
|
57 |
+
styles (list[Tensor]): Sample codes of styles.
|
58 |
+
conditions (list[Tensor]): SFT conditions to generators.
|
59 |
+
input_is_latent (bool): Whether input is latent style. Default: False.
|
60 |
+
noise (Tensor | None): Input noise or None. Default: None.
|
61 |
+
randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
|
62 |
+
truncation (float): The truncation ratio. Default: 1.
|
63 |
+
truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
|
64 |
+
inject_index (int | None): The injection index for mixing noise. Default: None.
|
65 |
+
return_latents (bool): Whether to return style latents. Default: False.
|
66 |
+
"""
|
67 |
+
# style codes -> latents with Style MLP layer
|
68 |
+
if not input_is_latent:
|
69 |
+
styles = [self.style_mlp(s) for s in styles]
|
70 |
+
# noises
|
71 |
+
if noise is None:
|
72 |
+
if randomize_noise:
|
73 |
+
noise = [None] * self.num_layers # for each style conv layer
|
74 |
+
else: # use the stored noise
|
75 |
+
noise = [
|
76 |
+
getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
|
77 |
+
]
|
78 |
+
# style truncation
|
79 |
+
if truncation < 1:
|
80 |
+
style_truncation = []
|
81 |
+
for style in styles:
|
82 |
+
style_truncation.append(
|
83 |
+
truncation_latent + truncation * (style - truncation_latent)
|
84 |
+
)
|
85 |
+
styles = style_truncation
|
86 |
+
# get style latents with injection
|
87 |
+
if len(styles) == 1:
|
88 |
+
inject_index = self.num_latent
|
89 |
+
|
90 |
+
if styles[0].ndim < 3:
|
91 |
+
# repeat latent code for all the layers
|
92 |
+
latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
|
93 |
+
else: # used for encoder with different latent code for each layer
|
94 |
+
latent = styles[0]
|
95 |
+
elif len(styles) == 2: # mixing noises
|
96 |
+
if inject_index is None:
|
97 |
+
inject_index = random.randint(1, self.num_latent - 1)
|
98 |
+
latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
|
99 |
+
latent2 = (
|
100 |
+
styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
|
101 |
+
)
|
102 |
+
latent = torch.cat([latent1, latent2], 1)
|
103 |
+
|
104 |
+
# main generation
|
105 |
+
out = self.constant_input(latent.shape[0])
|
106 |
+
out = self.style_conv1(out, latent[:, 0], noise=noise[0])
|
107 |
+
skip = self.to_rgb1(out, latent[:, 1])
|
108 |
+
|
109 |
+
i = 1
|
110 |
+
for conv1, conv2, noise1, noise2, to_rgb in zip(
|
111 |
+
self.style_convs[::2],
|
112 |
+
self.style_convs[1::2],
|
113 |
+
noise[1::2],
|
114 |
+
noise[2::2],
|
115 |
+
self.to_rgbs,
|
116 |
+
):
|
117 |
+
out = conv1(out, latent[:, i], noise=noise1)
|
118 |
+
|
119 |
+
# the conditions may have fewer levels
|
120 |
+
if i < len(conditions):
|
121 |
+
# SFT part to combine the conditions
|
122 |
+
if self.sft_half: # only apply SFT to half of the channels
|
123 |
+
out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
|
124 |
+
out_sft = out_sft * conditions[i - 1] + conditions[i]
|
125 |
+
out = torch.cat([out_same, out_sft], dim=1)
|
126 |
+
else: # apply SFT to all the channels
|
127 |
+
out = out * conditions[i - 1] + conditions[i]
|
128 |
+
|
129 |
+
out = conv2(out, latent[:, i + 1], noise=noise2)
|
130 |
+
skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
|
131 |
+
i += 2
|
132 |
+
|
133 |
+
image = skip
|
134 |
+
|
135 |
+
if return_latents:
|
136 |
+
return image, latent
|
137 |
+
else:
|
138 |
+
return image, None
|
139 |
+
|
140 |
+
|
141 |
+
class ResBlock(nn.Module):
|
142 |
+
"""Residual block with bilinear upsampling/downsampling.
|
143 |
+
Args:
|
144 |
+
in_channels (int): Channel number of the input.
|
145 |
+
out_channels (int): Channel number of the output.
|
146 |
+
mode (str): Upsampling/downsampling mode. Options: down | up. Default: down.
|
147 |
+
"""
|
148 |
+
|
149 |
+
def __init__(self, in_channels, out_channels, mode="down"):
|
150 |
+
super(ResBlock, self).__init__()
|
151 |
+
|
152 |
+
self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
|
153 |
+
self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1)
|
154 |
+
self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False)
|
155 |
+
if mode == "down":
|
156 |
+
self.scale_factor = 0.5
|
157 |
+
elif mode == "up":
|
158 |
+
self.scale_factor = 2
|
159 |
+
|
160 |
+
def forward(self, x):
|
161 |
+
out = F.leaky_relu_(self.conv1(x), negative_slope=0.2)
|
162 |
+
# upsample/downsample
|
163 |
+
out = F.interpolate(
|
164 |
+
out, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
165 |
+
)
|
166 |
+
out = F.leaky_relu_(self.conv2(out), negative_slope=0.2)
|
167 |
+
# skip
|
168 |
+
x = F.interpolate(
|
169 |
+
x, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
|
170 |
+
)
|
171 |
+
skip = self.skip(x)
|
172 |
+
out = out + skip
|
173 |
+
return out
|
174 |
+
|
175 |
+
|
176 |
+
class GFPGANv1Clean(nn.Module):
|
177 |
+
"""The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
|
178 |
+
It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
|
179 |
+
Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
|
180 |
+
Args:
|
181 |
+
out_size (int): The spatial size of outputs.
|
182 |
+
num_style_feat (int): Channel number of style features. Default: 512.
|
183 |
+
channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
|
184 |
+
decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
|
185 |
+
fix_decoder (bool): Whether to fix the decoder. Default: True.
|
186 |
+
num_mlp (int): Layer number of MLP style layers. Default: 8.
|
187 |
+
input_is_latent (bool): Whether input is latent style. Default: False.
|
188 |
+
different_w (bool): Whether to use different latent w for different layers. Default: False.
|
189 |
+
narrow (float): The narrow ratio for channels. Default: 1.
|
190 |
+
sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
|
191 |
+
"""
|
192 |
+
|
193 |
+
def __init__(
|
194 |
+
self,
|
195 |
+
state_dict,
|
196 |
+
):
|
197 |
+
super(GFPGANv1Clean, self).__init__()
|
198 |
+
|
199 |
+
out_size = 512
|
200 |
+
num_style_feat = 512
|
201 |
+
channel_multiplier = 2
|
202 |
+
decoder_load_path = None
|
203 |
+
fix_decoder = False
|
204 |
+
num_mlp = 8
|
205 |
+
input_is_latent = True
|
206 |
+
different_w = True
|
207 |
+
narrow = 1
|
208 |
+
sft_half = True
|
209 |
+
|
210 |
+
self.model_arch = "GFPGAN"
|
211 |
+
self.sub_type = "Face SR"
|
212 |
+
self.scale = 8
|
213 |
+
self.in_nc = 3
|
214 |
+
self.out_nc = 3
|
215 |
+
self.state = state_dict
|
216 |
+
|
217 |
+
self.supports_fp16 = False
|
218 |
+
self.supports_bf16 = True
|
219 |
+
self.min_size_restriction = 512
|
220 |
+
|
221 |
+
self.input_is_latent = input_is_latent
|
222 |
+
self.different_w = different_w
|
223 |
+
self.num_style_feat = num_style_feat
|
224 |
+
|
225 |
+
unet_narrow = narrow * 0.5 # by default, use a half of input channels
|
226 |
+
channels = {
|
227 |
+
"4": int(512 * unet_narrow),
|
228 |
+
"8": int(512 * unet_narrow),
|
229 |
+
"16": int(512 * unet_narrow),
|
230 |
+
"32": int(512 * unet_narrow),
|
231 |
+
"64": int(256 * channel_multiplier * unet_narrow),
|
232 |
+
"128": int(128 * channel_multiplier * unet_narrow),
|
233 |
+
"256": int(64 * channel_multiplier * unet_narrow),
|
234 |
+
"512": int(32 * channel_multiplier * unet_narrow),
|
235 |
+
"1024": int(16 * channel_multiplier * unet_narrow),
|
236 |
+
}
|
237 |
+
|
238 |
+
self.log_size = int(math.log(out_size, 2))
|
239 |
+
first_out_size = 2 ** (int(math.log(out_size, 2)))
|
240 |
+
|
241 |
+
self.conv_body_first = nn.Conv2d(3, channels[f"{first_out_size}"], 1)
|
242 |
+
|
243 |
+
# downsample
|
244 |
+
in_channels = channels[f"{first_out_size}"]
|
245 |
+
self.conv_body_down = nn.ModuleList()
|
246 |
+
for i in range(self.log_size, 2, -1):
|
247 |
+
out_channels = channels[f"{2**(i - 1)}"]
|
248 |
+
self.conv_body_down.append(ResBlock(in_channels, out_channels, mode="down"))
|
249 |
+
in_channels = out_channels
|
250 |
+
|
251 |
+
self.final_conv = nn.Conv2d(in_channels, channels["4"], 3, 1, 1)
|
252 |
+
|
253 |
+
# upsample
|
254 |
+
in_channels = channels["4"]
|
255 |
+
self.conv_body_up = nn.ModuleList()
|
256 |
+
for i in range(3, self.log_size + 1):
|
257 |
+
out_channels = channels[f"{2**i}"]
|
258 |
+
self.conv_body_up.append(ResBlock(in_channels, out_channels, mode="up"))
|
259 |
+
in_channels = out_channels
|
260 |
+
|
261 |
+
# to RGB
|
262 |
+
self.toRGB = nn.ModuleList()
|
263 |
+
for i in range(3, self.log_size + 1):
|
264 |
+
self.toRGB.append(nn.Conv2d(channels[f"{2**i}"], 3, 1))
|
265 |
+
|
266 |
+
if different_w:
|
267 |
+
linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
|
268 |
+
else:
|
269 |
+
linear_out_channel = num_style_feat
|
270 |
+
|
271 |
+
self.final_linear = nn.Linear(channels["4"] * 4 * 4, linear_out_channel)
|
272 |
+
|
273 |
+
# the decoder: stylegan2 generator with SFT modulations
|
274 |
+
self.stylegan_decoder = StyleGAN2GeneratorCSFT(
|
275 |
+
out_size=out_size,
|
276 |
+
num_style_feat=num_style_feat,
|
277 |
+
num_mlp=num_mlp,
|
278 |
+
channel_multiplier=channel_multiplier,
|
279 |
+
narrow=narrow,
|
280 |
+
sft_half=sft_half,
|
281 |
+
)
|
282 |
+
|
283 |
+
# load pre-trained stylegan2 model if necessary
|
284 |
+
if decoder_load_path:
|
285 |
+
self.stylegan_decoder.load_state_dict(
|
286 |
+
torch.load(
|
287 |
+
decoder_load_path, map_location=lambda storage, loc: storage
|
288 |
+
)["params_ema"]
|
289 |
+
)
|
290 |
+
# fix decoder without updating params
|
291 |
+
if fix_decoder:
|
292 |
+
for _, param in self.stylegan_decoder.named_parameters():
|
293 |
+
param.requires_grad = False
|
294 |
+
|
295 |
+
# for SFT modulations (scale and shift)
|
296 |
+
self.condition_scale = nn.ModuleList()
|
297 |
+
self.condition_shift = nn.ModuleList()
|
298 |
+
for i in range(3, self.log_size + 1):
|
299 |
+
out_channels = channels[f"{2**i}"]
|
300 |
+
if sft_half:
|
301 |
+
sft_out_channels = out_channels
|
302 |
+
else:
|
303 |
+
sft_out_channels = out_channels * 2
|
304 |
+
self.condition_scale.append(
|
305 |
+
nn.Sequential(
|
306 |
+
nn.Conv2d(out_channels, out_channels, 3, 1, 1),
|
307 |
+
nn.LeakyReLU(0.2, True),
|
308 |
+
nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
|
309 |
+
)
|
310 |
+
)
|
311 |
+
self.condition_shift.append(
|
312 |
+
nn.Sequential(
|
313 |
+
nn.Conv2d(out_channels, out_channels, 3, 1, 1),
|
314 |
+
nn.LeakyReLU(0.2, True),
|
315 |
+
nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
|
316 |
+
)
|
317 |
+
)
|
318 |
+
self.load_state_dict(state_dict)
|
319 |
+
|
320 |
+
def forward(
|
321 |
+
self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs
|
322 |
+
):
|
323 |
+
"""Forward function for GFPGANv1Clean.
|
324 |
+
Args:
|
325 |
+
x (Tensor): Input images.
|
326 |
+
return_latents (bool): Whether to return style latents. Default: False.
|
327 |
+
return_rgb (bool): Whether return intermediate rgb images. Default: True.
|
328 |
+
randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
|
329 |
+
"""
|
330 |
+
conditions = []
|
331 |
+
unet_skips = []
|
332 |
+
out_rgbs = []
|
333 |
+
|
334 |
+
# encoder
|
335 |
+
feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2)
|
336 |
+
for i in range(self.log_size - 2):
|
337 |
+
feat = self.conv_body_down[i](feat)
|
338 |
+
unet_skips.insert(0, feat)
|
339 |
+
feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2)
|
340 |
+
|
341 |
+
# style code
|
342 |
+
style_code = self.final_linear(feat.view(feat.size(0), -1))
|
343 |
+
if self.different_w:
|
344 |
+
style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
|
345 |
+
|
346 |
+
# decode
|
347 |
+
for i in range(self.log_size - 2):
|
348 |
+
# add unet skip
|
349 |
+
feat = feat + unet_skips[i]
|
350 |
+
# ResUpLayer
|
351 |
+
feat = self.conv_body_up[i](feat)
|
352 |
+
# generate scale and shift for SFT layers
|
353 |
+
scale = self.condition_scale[i](feat)
|
354 |
+
conditions.append(scale.clone())
|
355 |
+
shift = self.condition_shift[i](feat)
|
356 |
+
conditions.append(shift.clone())
|
357 |
+
# generate rgb images
|
358 |
+
if return_rgb:
|
359 |
+
out_rgbs.append(self.toRGB[i](feat))
|
360 |
+
|
361 |
+
# decoder
|
362 |
+
image, _ = self.stylegan_decoder(
|
363 |
+
[style_code],
|
364 |
+
conditions,
|
365 |
+
return_latents=return_latents,
|
366 |
+
input_is_latent=self.input_is_latent,
|
367 |
+
randomize_noise=randomize_noise,
|
368 |
+
)
|
369 |
+
|
370 |
+
return image, out_rgbs
|
r_chainner/archs/face/stylegan2_clean_arch.py
ADDED
@@ -0,0 +1,453 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pylint: skip-file
|
2 |
+
# type: ignore
|
3 |
+
import math
|
4 |
+
|
5 |
+
import torch
|
6 |
+
from torch import nn
|
7 |
+
from torch.nn import functional as F
|
8 |
+
from torch.nn import init
|
9 |
+
from torch.nn.modules.batchnorm import _BatchNorm
|
10 |
+
|
11 |
+
|
12 |
+
@torch.no_grad()
|
13 |
+
def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
|
14 |
+
"""Initialize network weights.
|
15 |
+
Args:
|
16 |
+
module_list (list[nn.Module] | nn.Module): Modules to be initialized.
|
17 |
+
scale (float): Scale initialized weights, especially for residual
|
18 |
+
blocks. Default: 1.
|
19 |
+
bias_fill (float): The value to fill bias. Default: 0
|
20 |
+
kwargs (dict): Other arguments for initialization function.
|
21 |
+
"""
|
22 |
+
if not isinstance(module_list, list):
|
23 |
+
module_list = [module_list]
|
24 |
+
for module in module_list:
|
25 |
+
for m in module.modules():
|
26 |
+
if isinstance(m, nn.Conv2d):
|
27 |
+
init.kaiming_normal_(m.weight, **kwargs)
|
28 |
+
m.weight.data *= scale
|
29 |
+
if m.bias is not None:
|
30 |
+
m.bias.data.fill_(bias_fill)
|
31 |
+
elif isinstance(m, nn.Linear):
|
32 |
+
init.kaiming_normal_(m.weight, **kwargs)
|
33 |
+
m.weight.data *= scale
|
34 |
+
if m.bias is not None:
|
35 |
+
m.bias.data.fill_(bias_fill)
|
36 |
+
elif isinstance(m, _BatchNorm):
|
37 |
+
init.constant_(m.weight, 1)
|
38 |
+
if m.bias is not None:
|
39 |
+
m.bias.data.fill_(bias_fill)
|
40 |
+
|
41 |
+
|
42 |
+
class NormStyleCode(nn.Module):
|
43 |
+
def forward(self, x):
|
44 |
+
"""Normalize the style codes.
|
45 |
+
Args:
|
46 |
+
x (Tensor): Style codes with shape (b, c).
|
47 |
+
Returns:
|
48 |
+
Tensor: Normalized tensor.
|
49 |
+
"""
|
50 |
+
return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
|
51 |
+
|
52 |
+
|
53 |
+
class ModulatedConv2d(nn.Module):
|
54 |
+
"""Modulated Conv2d used in StyleGAN2.
|
55 |
+
There is no bias in ModulatedConv2d.
|
56 |
+
Args:
|
57 |
+
in_channels (int): Channel number of the input.
|
58 |
+
out_channels (int): Channel number of the output.
|
59 |
+
kernel_size (int): Size of the convolving kernel.
|
60 |
+
num_style_feat (int): Channel number of style features.
|
61 |
+
demodulate (bool): Whether to demodulate in the conv layer. Default: True.
|
62 |
+
sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
|
63 |
+
eps (float): A value added to the denominator for numerical stability. Default: 1e-8.
|
64 |
+
"""
|
65 |
+
|
66 |
+
def __init__(
|
67 |
+
self,
|
68 |
+
in_channels,
|
69 |
+
out_channels,
|
70 |
+
kernel_size,
|
71 |
+
num_style_feat,
|
72 |
+
demodulate=True,
|
73 |
+
sample_mode=None,
|
74 |
+
eps=1e-8,
|
75 |
+
):
|
76 |
+
super(ModulatedConv2d, self).__init__()
|
77 |
+
self.in_channels = in_channels
|
78 |
+
self.out_channels = out_channels
|
79 |
+
self.kernel_size = kernel_size
|
80 |
+
self.demodulate = demodulate
|
81 |
+
self.sample_mode = sample_mode
|
82 |
+
self.eps = eps
|
83 |
+
|
84 |
+
# modulation inside each modulated conv
|
85 |
+
self.modulation = nn.Linear(num_style_feat, in_channels, bias=True)
|
86 |
+
# initialization
|
87 |
+
default_init_weights(
|
88 |
+
self.modulation,
|
89 |
+
scale=1,
|
90 |
+
bias_fill=1,
|
91 |
+
a=0,
|
92 |
+
mode="fan_in",
|
93 |
+
nonlinearity="linear",
|
94 |
+
)
|
95 |
+
|
96 |
+
self.weight = nn.Parameter(
|
97 |
+
torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
|
98 |
+
/ math.sqrt(in_channels * kernel_size**2)
|
99 |
+
)
|
100 |
+
self.padding = kernel_size // 2
|
101 |
+
|
102 |
+
def forward(self, x, style):
|
103 |
+
"""Forward function.
|
104 |
+
Args:
|
105 |
+
x (Tensor): Tensor with shape (b, c, h, w).
|
106 |
+
style (Tensor): Tensor with shape (b, num_style_feat).
|
107 |
+
Returns:
|
108 |
+
Tensor: Modulated tensor after convolution.
|
109 |
+
"""
|
110 |
+
b, c, h, w = x.shape # c = c_in
|
111 |
+
# weight modulation
|
112 |
+
style = self.modulation(style).view(b, 1, c, 1, 1)
|
113 |
+
# self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
|
114 |
+
weight = self.weight * style # (b, c_out, c_in, k, k)
|
115 |
+
|
116 |
+
if self.demodulate:
|
117 |
+
demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
|
118 |
+
weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
|
119 |
+
|
120 |
+
weight = weight.view(
|
121 |
+
b * self.out_channels, c, self.kernel_size, self.kernel_size
|
122 |
+
)
|
123 |
+
|
124 |
+
# upsample or downsample if necessary
|
125 |
+
if self.sample_mode == "upsample":
|
126 |
+
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
|
127 |
+
elif self.sample_mode == "downsample":
|
128 |
+
x = F.interpolate(x, scale_factor=0.5, mode="bilinear", align_corners=False)
|
129 |
+
|
130 |
+
b, c, h, w = x.shape
|
131 |
+
x = x.view(1, b * c, h, w)
|
132 |
+
# weight: (b*c_out, c_in, k, k), groups=b
|
133 |
+
out = F.conv2d(x, weight, padding=self.padding, groups=b)
|
134 |
+
out = out.view(b, self.out_channels, *out.shape[2:4])
|
135 |
+
|
136 |
+
return out
|
137 |
+
|
138 |
+
def __repr__(self):
|
139 |
+
return (
|
140 |
+
f"{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, "
|
141 |
+
f"kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})"
|
142 |
+
)
|
143 |
+
|
144 |
+
|
145 |
+
class StyleConv(nn.Module):
|
146 |
+
"""Style conv used in StyleGAN2.
|
147 |
+
Args:
|
148 |
+
in_channels (int): Channel number of the input.
|
149 |
+
out_channels (int): Channel number of the output.
|
150 |
+
kernel_size (int): Size of the convolving kernel.
|
151 |
+
num_style_feat (int): Channel number of style features.
|
152 |
+
demodulate (bool): Whether demodulate in the conv layer. Default: True.
|
153 |
+
sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
|
154 |
+
"""
|
155 |
+
|
156 |
+
def __init__(
|
157 |
+
self,
|
158 |
+
in_channels,
|
159 |
+
out_channels,
|
160 |
+
kernel_size,
|
161 |
+
num_style_feat,
|
162 |
+
demodulate=True,
|
163 |
+
sample_mode=None,
|
164 |
+
):
|
165 |
+
super(StyleConv, self).__init__()
|
166 |
+
self.modulated_conv = ModulatedConv2d(
|
167 |
+
in_channels,
|
168 |
+
out_channels,
|
169 |
+
kernel_size,
|
170 |
+
num_style_feat,
|
171 |
+
demodulate=demodulate,
|
172 |
+
sample_mode=sample_mode,
|
173 |
+
)
|
174 |
+
self.weight = nn.Parameter(torch.zeros(1)) # for noise injection
|
175 |
+
self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1))
|
176 |
+
self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
|
177 |
+
|
178 |
+
def forward(self, x, style, noise=None):
|
179 |
+
# modulate
|
180 |
+
out = self.modulated_conv(x, style) * 2**0.5 # for conversion
|
181 |
+
# noise injection
|
182 |
+
if noise is None:
|
183 |
+
b, _, h, w = out.shape
|
184 |
+
noise = out.new_empty(b, 1, h, w).normal_()
|
185 |
+
out = out + self.weight * noise
|
186 |
+
# add bias
|
187 |
+
out = out + self.bias
|
188 |
+
# activation
|
189 |
+
out = self.activate(out)
|
190 |
+
return out
|
191 |
+
|
192 |
+
|
193 |
+
class ToRGB(nn.Module):
|
194 |
+
"""To RGB (image space) from features.
|
195 |
+
Args:
|
196 |
+
in_channels (int): Channel number of input.
|
197 |
+
num_style_feat (int): Channel number of style features.
|
198 |
+
upsample (bool): Whether to upsample. Default: True.
|
199 |
+
"""
|
200 |
+
|
201 |
+
def __init__(self, in_channels, num_style_feat, upsample=True):
|
202 |
+
super(ToRGB, self).__init__()
|
203 |
+
self.upsample = upsample
|
204 |
+
self.modulated_conv = ModulatedConv2d(
|
205 |
+
in_channels,
|
206 |
+
3,
|
207 |
+
kernel_size=1,
|
208 |
+
num_style_feat=num_style_feat,
|
209 |
+
demodulate=False,
|
210 |
+
sample_mode=None,
|
211 |
+
)
|
212 |
+
self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
|
213 |
+
|
214 |
+
def forward(self, x, style, skip=None):
|
215 |
+
"""Forward function.
|
216 |
+
Args:
|
217 |
+
x (Tensor): Feature tensor with shape (b, c, h, w).
|
218 |
+
style (Tensor): Tensor with shape (b, num_style_feat).
|
219 |
+
skip (Tensor): Base/skip tensor. Default: None.
|
220 |
+
Returns:
|
221 |
+
Tensor: RGB images.
|
222 |
+
"""
|
223 |
+
out = self.modulated_conv(x, style)
|
224 |
+
out = out + self.bias
|
225 |
+
if skip is not None:
|
226 |
+
if self.upsample:
|
227 |
+
skip = F.interpolate(
|
228 |
+
skip, scale_factor=2, mode="bilinear", align_corners=False
|
229 |
+
)
|
230 |
+
out = out + skip
|
231 |
+
return out
|
232 |
+
|
233 |
+
|
234 |
+
class ConstantInput(nn.Module):
|
235 |
+
"""Constant input.
|
236 |
+
Args:
|
237 |
+
num_channel (int): Channel number of constant input.
|
238 |
+
size (int): Spatial size of constant input.
|
239 |
+
"""
|
240 |
+
|
241 |
+
def __init__(self, num_channel, size):
|
242 |
+
super(ConstantInput, self).__init__()
|
243 |
+
self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
|
244 |
+
|
245 |
+
def forward(self, batch):
|
246 |
+
out = self.weight.repeat(batch, 1, 1, 1)
|
247 |
+
return out
|
248 |
+
|
249 |
+
|
250 |
+
class StyleGAN2GeneratorClean(nn.Module):
|
251 |
+
"""Clean version of StyleGAN2 Generator.
|
252 |
+
Args:
|
253 |
+
out_size (int): The spatial size of outputs.
|
254 |
+
num_style_feat (int): Channel number of style features. Default: 512.
|
255 |
+
num_mlp (int): Layer number of MLP style layers. Default: 8.
|
256 |
+
channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
|
257 |
+
narrow (float): Narrow ratio for channels. Default: 1.0.
|
258 |
+
"""
|
259 |
+
|
260 |
+
def __init__(
|
261 |
+
self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1
|
262 |
+
):
|
263 |
+
super(StyleGAN2GeneratorClean, self).__init__()
|
264 |
+
# Style MLP layers
|
265 |
+
self.num_style_feat = num_style_feat
|
266 |
+
style_mlp_layers = [NormStyleCode()]
|
267 |
+
for i in range(num_mlp):
|
268 |
+
style_mlp_layers.extend(
|
269 |
+
[
|
270 |
+
nn.Linear(num_style_feat, num_style_feat, bias=True),
|
271 |
+
nn.LeakyReLU(negative_slope=0.2, inplace=True),
|
272 |
+
]
|
273 |
+
)
|
274 |
+
self.style_mlp = nn.Sequential(*style_mlp_layers)
|
275 |
+
# initialization
|
276 |
+
default_init_weights(
|
277 |
+
self.style_mlp,
|
278 |
+
scale=1,
|
279 |
+
bias_fill=0,
|
280 |
+
a=0.2,
|
281 |
+
mode="fan_in",
|
282 |
+
nonlinearity="leaky_relu",
|
283 |
+
)
|
284 |
+
|
285 |
+
# channel list
|
286 |
+
channels = {
|
287 |
+
"4": int(512 * narrow),
|
288 |
+
"8": int(512 * narrow),
|
289 |
+
"16": int(512 * narrow),
|
290 |
+
"32": int(512 * narrow),
|
291 |
+
"64": int(256 * channel_multiplier * narrow),
|
292 |
+
"128": int(128 * channel_multiplier * narrow),
|
293 |
+
"256": int(64 * channel_multiplier * narrow),
|
294 |
+
"512": int(32 * channel_multiplier * narrow),
|
295 |
+
"1024": int(16 * channel_multiplier * narrow),
|
296 |
+
}
|
297 |
+
self.channels = channels
|
298 |
+
|
299 |
+
self.constant_input = ConstantInput(channels["4"], size=4)
|
300 |
+
self.style_conv1 = StyleConv(
|
301 |
+
channels["4"],
|
302 |
+
channels["4"],
|
303 |
+
kernel_size=3,
|
304 |
+
num_style_feat=num_style_feat,
|
305 |
+
demodulate=True,
|
306 |
+
sample_mode=None,
|
307 |
+
)
|
308 |
+
self.to_rgb1 = ToRGB(channels["4"], num_style_feat, upsample=False)
|
309 |
+
|
310 |
+
self.log_size = int(math.log(out_size, 2))
|
311 |
+
self.num_layers = (self.log_size - 2) * 2 + 1
|
312 |
+
self.num_latent = self.log_size * 2 - 2
|
313 |
+
|
314 |
+
self.style_convs = nn.ModuleList()
|
315 |
+
self.to_rgbs = nn.ModuleList()
|
316 |
+
self.noises = nn.Module()
|
317 |
+
|
318 |
+
in_channels = channels["4"]
|
319 |
+
# noise
|
320 |
+
for layer_idx in range(self.num_layers):
|
321 |
+
resolution = 2 ** ((layer_idx + 5) // 2)
|
322 |
+
shape = [1, 1, resolution, resolution]
|
323 |
+
self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
|
324 |
+
# style convs and to_rgbs
|
325 |
+
for i in range(3, self.log_size + 1):
|
326 |
+
out_channels = channels[f"{2**i}"]
|
327 |
+
self.style_convs.append(
|
328 |
+
StyleConv(
|
329 |
+
in_channels,
|
330 |
+
out_channels,
|
331 |
+
kernel_size=3,
|
332 |
+
num_style_feat=num_style_feat,
|
333 |
+
demodulate=True,
|
334 |
+
sample_mode="upsample",
|
335 |
+
)
|
336 |
+
)
|
337 |
+
self.style_convs.append(
|
338 |
+
StyleConv(
|
339 |
+
out_channels,
|
340 |
+
out_channels,
|
341 |
+
kernel_size=3,
|
342 |
+
num_style_feat=num_style_feat,
|
343 |
+
demodulate=True,
|
344 |
+
sample_mode=None,
|
345 |
+
)
|
346 |
+
)
|
347 |
+
self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True))
|
348 |
+
in_channels = out_channels
|
349 |
+
|
350 |
+
def make_noise(self):
|
351 |
+
"""Make noise for noise injection."""
|
352 |
+
device = self.constant_input.weight.device
|
353 |
+
noises = [torch.randn(1, 1, 4, 4, device=device)]
|
354 |
+
|
355 |
+
for i in range(3, self.log_size + 1):
|
356 |
+
for _ in range(2):
|
357 |
+
noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
|
358 |
+
|
359 |
+
return noises
|
360 |
+
|
361 |
+
def get_latent(self, x):
|
362 |
+
return self.style_mlp(x)
|
363 |
+
|
364 |
+
def mean_latent(self, num_latent):
|
365 |
+
latent_in = torch.randn(
|
366 |
+
num_latent, self.num_style_feat, device=self.constant_input.weight.device
|
367 |
+
)
|
368 |
+
latent = self.style_mlp(latent_in).mean(0, keepdim=True)
|
369 |
+
return latent
|
370 |
+
|
371 |
+
def forward(
|
372 |
+
self,
|
373 |
+
styles,
|
374 |
+
input_is_latent=False,
|
375 |
+
noise=None,
|
376 |
+
randomize_noise=True,
|
377 |
+
truncation=1,
|
378 |
+
truncation_latent=None,
|
379 |
+
inject_index=None,
|
380 |
+
return_latents=False,
|
381 |
+
):
|
382 |
+
"""Forward function for StyleGAN2GeneratorClean.
|
383 |
+
Args:
|
384 |
+
styles (list[Tensor]): Sample codes of styles.
|
385 |
+
input_is_latent (bool): Whether input is latent style. Default: False.
|
386 |
+
noise (Tensor | None): Input noise or None. Default: None.
|
387 |
+
randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
|
388 |
+
truncation (float): The truncation ratio. Default: 1.
|
389 |
+
truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
|
390 |
+
inject_index (int | None): The injection index for mixing noise. Default: None.
|
391 |
+
return_latents (bool): Whether to return style latents. Default: False.
|
392 |
+
"""
|
393 |
+
# style codes -> latents with Style MLP layer
|
394 |
+
if not input_is_latent:
|
395 |
+
styles = [self.style_mlp(s) for s in styles]
|
396 |
+
# noises
|
397 |
+
if noise is None:
|
398 |
+
if randomize_noise:
|
399 |
+
noise = [None] * self.num_layers # for each style conv layer
|
400 |
+
else: # use the stored noise
|
401 |
+
noise = [
|
402 |
+
getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
|
403 |
+
]
|
404 |
+
# style truncation
|
405 |
+
if truncation < 1:
|
406 |
+
style_truncation = []
|
407 |
+
for style in styles:
|
408 |
+
style_truncation.append(
|
409 |
+
truncation_latent + truncation * (style - truncation_latent)
|
410 |
+
)
|
411 |
+
styles = style_truncation
|
412 |
+
# get style latents with injection
|
413 |
+
if len(styles) == 1:
|
414 |
+
inject_index = self.num_latent
|
415 |
+
|
416 |
+
if styles[0].ndim < 3:
|
417 |
+
# repeat latent code for all the layers
|
418 |
+
latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
|
419 |
+
else: # used for encoder with different latent code for each layer
|
420 |
+
latent = styles[0]
|
421 |
+
elif len(styles) == 2: # mixing noises
|
422 |
+
if inject_index is None:
|
423 |
+
inject_index = random.randint(1, self.num_latent - 1)
|
424 |
+
latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
|
425 |
+
latent2 = (
|
426 |
+
styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
|
427 |
+
)
|
428 |
+
latent = torch.cat([latent1, latent2], 1)
|
429 |
+
|
430 |
+
# main generation
|
431 |
+
out = self.constant_input(latent.shape[0])
|
432 |
+
out = self.style_conv1(out, latent[:, 0], noise=noise[0])
|
433 |
+
skip = self.to_rgb1(out, latent[:, 1])
|
434 |
+
|
435 |
+
i = 1
|
436 |
+
for conv1, conv2, noise1, noise2, to_rgb in zip(
|
437 |
+
self.style_convs[::2],
|
438 |
+
self.style_convs[1::2],
|
439 |
+
noise[1::2],
|
440 |
+
noise[2::2],
|
441 |
+
self.to_rgbs,
|
442 |
+
):
|
443 |
+
out = conv1(out, latent[:, i], noise=noise1)
|
444 |
+
out = conv2(out, latent[:, i + 1], noise=noise2)
|
445 |
+
skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
|
446 |
+
i += 2
|
447 |
+
|
448 |
+
image = skip
|
449 |
+
|
450 |
+
if return_latents:
|
451 |
+
return image, latent
|
452 |
+
else:
|
453 |
+
return image, None
|
r_chainner/model_loading.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
|
2 |
+
from r_chainner.types import PyTorchModel
|
3 |
+
|
4 |
+
|
5 |
+
class UnsupportedModel(Exception):
|
6 |
+
pass
|
7 |
+
|
8 |
+
|
9 |
+
def load_state_dict(state_dict) -> PyTorchModel:
|
10 |
+
|
11 |
+
state_dict_keys = list(state_dict.keys())
|
12 |
+
|
13 |
+
if "params_ema" in state_dict_keys:
|
14 |
+
state_dict = state_dict["params_ema"]
|
15 |
+
elif "params-ema" in state_dict_keys:
|
16 |
+
state_dict = state_dict["params-ema"]
|
17 |
+
elif "params" in state_dict_keys:
|
18 |
+
state_dict = state_dict["params"]
|
19 |
+
|
20 |
+
state_dict_keys = list(state_dict.keys())
|
21 |
+
|
22 |
+
# GFPGAN
|
23 |
+
if (
|
24 |
+
"toRGB.0.weight" in state_dict_keys
|
25 |
+
and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
|
26 |
+
):
|
27 |
+
model = GFPGANv1Clean(state_dict)
|
28 |
+
return model
|
r_chainner/types.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Union
|
2 |
+
|
3 |
+
from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
|
4 |
+
|
5 |
+
|
6 |
+
PyTorchFaceModels = (GFPGANv1Clean,)
|
7 |
+
PyTorchFaceModel = Union[GFPGANv1Clean]
|
8 |
+
|
9 |
+
|
10 |
+
def is_pytorch_face_model(model: object):
|
11 |
+
return isinstance(model, PyTorchFaceModels)
|
12 |
+
|
13 |
+
PyTorchModels = (*PyTorchFaceModels, )
|
14 |
+
PyTorchModel = Union[PyTorchFaceModel]
|
15 |
+
|
16 |
+
|
17 |
+
def is_pytorch_model(model: object):
|
18 |
+
return isinstance(model, PyTorchModels)
|
r_facelib/__init__.py
ADDED
File without changes
|
r_facelib/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (268 Bytes). View file
|
|
r_facelib/detection/__init__.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from torch import nn
|
4 |
+
from copy import deepcopy
|
5 |
+
import pathlib
|
6 |
+
|
7 |
+
from r_facelib.utils import load_file_from_url
|
8 |
+
from r_facelib.utils import download_pretrained_models
|
9 |
+
from r_facelib.detection.yolov5face.models.common import Conv
|
10 |
+
|
11 |
+
from .retinaface.retinaface import RetinaFace
|
12 |
+
from .yolov5face.face_detector import YoloDetector
|
13 |
+
|
14 |
+
|
15 |
+
def init_detection_model(model_name, half=False, device='cuda'):
|
16 |
+
if 'retinaface' in model_name:
|
17 |
+
model = init_retinaface_model(model_name, half, device)
|
18 |
+
elif 'YOLOv5' in model_name:
|
19 |
+
model = init_yolov5face_model(model_name, device)
|
20 |
+
else:
|
21 |
+
raise NotImplementedError(f'{model_name} is not implemented.')
|
22 |
+
|
23 |
+
return model
|
24 |
+
|
25 |
+
|
26 |
+
def init_retinaface_model(model_name, half=False, device='cuda'):
|
27 |
+
if model_name == 'retinaface_resnet50':
|
28 |
+
model = RetinaFace(network_name='resnet50', half=half)
|
29 |
+
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth'
|
30 |
+
elif model_name == 'retinaface_mobile0.25':
|
31 |
+
model = RetinaFace(network_name='mobile0.25', half=half)
|
32 |
+
model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth'
|
33 |
+
else:
|
34 |
+
raise NotImplementedError(f'{model_name} is not implemented.')
|
35 |
+
|
36 |
+
model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
|
37 |
+
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
38 |
+
# remove unnecessary 'module.'
|
39 |
+
for k, v in deepcopy(load_net).items():
|
40 |
+
if k.startswith('module.'):
|
41 |
+
load_net[k[7:]] = v
|
42 |
+
load_net.pop(k)
|
43 |
+
model.load_state_dict(load_net, strict=True)
|
44 |
+
model.eval()
|
45 |
+
model = model.to(device)
|
46 |
+
|
47 |
+
return model
|
48 |
+
|
49 |
+
|
50 |
+
def init_yolov5face_model(model_name, device='cuda'):
|
51 |
+
current_dir = str(pathlib.Path(__file__).parent.resolve())
|
52 |
+
if model_name == 'YOLOv5l':
|
53 |
+
model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5l.yaml', device=device)
|
54 |
+
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5l-face.pth'
|
55 |
+
elif model_name == 'YOLOv5n':
|
56 |
+
model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5n.yaml', device=device)
|
57 |
+
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5n-face.pth'
|
58 |
+
else:
|
59 |
+
raise NotImplementedError(f'{model_name} is not implemented.')
|
60 |
+
|
61 |
+
model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
|
62 |
+
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
63 |
+
model.detector.load_state_dict(load_net, strict=True)
|
64 |
+
model.detector.eval()
|
65 |
+
model.detector = model.detector.to(device).float()
|
66 |
+
|
67 |
+
for m in model.detector.modules():
|
68 |
+
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
69 |
+
m.inplace = True # pytorch 1.7.0 compatibility
|
70 |
+
elif isinstance(m, Conv):
|
71 |
+
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
72 |
+
|
73 |
+
return model
|
74 |
+
|
75 |
+
|
76 |
+
# Download from Google Drive
|
77 |
+
# def init_yolov5face_model(model_name, device='cuda'):
|
78 |
+
# if model_name == 'YOLOv5l':
|
79 |
+
# model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5l.yaml', device=device)
|
80 |
+
# f_id = {'yolov5l-face.pth': '131578zMA6B2x8VQHyHfa6GEPtulMCNzV'}
|
81 |
+
# elif model_name == 'YOLOv5n':
|
82 |
+
# model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5n.yaml', device=device)
|
83 |
+
# f_id = {'yolov5n-face.pth': '1fhcpFvWZqghpGXjYPIne2sw1Fy4yhw6o'}
|
84 |
+
# else:
|
85 |
+
# raise NotImplementedError(f'{model_name} is not implemented.')
|
86 |
+
|
87 |
+
# model_path = os.path.join('../../models/facedetection', list(f_id.keys())[0])
|
88 |
+
# if not os.path.exists(model_path):
|
89 |
+
# download_pretrained_models(file_ids=f_id, save_path_root='../../models/facedetection')
|
90 |
+
|
91 |
+
# load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
|
92 |
+
# model.detector.load_state_dict(load_net, strict=True)
|
93 |
+
# model.detector.eval()
|
94 |
+
# model.detector = model.detector.to(device).float()
|
95 |
+
|
96 |
+
# for m in model.detector.modules():
|
97 |
+
# if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
|
98 |
+
# m.inplace = True # pytorch 1.7.0 compatibility
|
99 |
+
# elif isinstance(m, Conv):
|
100 |
+
# m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
101 |
+
|
102 |
+
# return model
|
r_facelib/detection/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (4.99 kB). View file
|
|
r_facelib/detection/__pycache__/align_trans.cpython-311.pyc
ADDED
Binary file (9.78 kB). View file
|
|
r_facelib/detection/__pycache__/matlab_cp2tform.cpython-311.pyc
ADDED
Binary file (10.9 kB). View file
|
|
r_facelib/detection/align_trans.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
from .matlab_cp2tform import get_similarity_transform_for_cv2
|
5 |
+
|
6 |
+
# reference facial points, a list of coordinates (x,y)
|
7 |
+
REFERENCE_FACIAL_POINTS = [[30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278],
|
8 |
+
[33.54930115, 92.3655014], [62.72990036, 92.20410156]]
|
9 |
+
|
10 |
+
DEFAULT_CROP_SIZE = (96, 112)
|
11 |
+
|
12 |
+
|
13 |
+
class FaceWarpException(Exception):
|
14 |
+
|
15 |
+
def __str__(self):
|
16 |
+
return 'In File {}:{}'.format(__file__, super.__str__(self))
|
17 |
+
|
18 |
+
|
19 |
+
def get_reference_facial_points(output_size=None, inner_padding_factor=0.0, outer_padding=(0, 0), default_square=False):
|
20 |
+
"""
|
21 |
+
Function:
|
22 |
+
----------
|
23 |
+
get reference 5 key points according to crop settings:
|
24 |
+
0. Set default crop_size:
|
25 |
+
if default_square:
|
26 |
+
crop_size = (112, 112)
|
27 |
+
else:
|
28 |
+
crop_size = (96, 112)
|
29 |
+
1. Pad the crop_size by inner_padding_factor in each side;
|
30 |
+
2. Resize crop_size into (output_size - outer_padding*2),
|
31 |
+
pad into output_size with outer_padding;
|
32 |
+
3. Output reference_5point;
|
33 |
+
Parameters:
|
34 |
+
----------
|
35 |
+
@output_size: (w, h) or None
|
36 |
+
size of aligned face image
|
37 |
+
@inner_padding_factor: (w_factor, h_factor)
|
38 |
+
padding factor for inner (w, h)
|
39 |
+
@outer_padding: (w_pad, h_pad)
|
40 |
+
each row is a pair of coordinates (x, y)
|
41 |
+
@default_square: True or False
|
42 |
+
if True:
|
43 |
+
default crop_size = (112, 112)
|
44 |
+
else:
|
45 |
+
default crop_size = (96, 112);
|
46 |
+
!!! make sure, if output_size is not None:
|
47 |
+
(output_size - outer_padding)
|
48 |
+
= some_scale * (default crop_size * (1.0 +
|
49 |
+
inner_padding_factor))
|
50 |
+
Returns:
|
51 |
+
----------
|
52 |
+
@reference_5point: 5x2 np.array
|
53 |
+
each row is a pair of transformed coordinates (x, y)
|
54 |
+
"""
|
55 |
+
|
56 |
+
tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
|
57 |
+
tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
|
58 |
+
|
59 |
+
# 0) make the inner region a square
|
60 |
+
if default_square:
|
61 |
+
size_diff = max(tmp_crop_size) - tmp_crop_size
|
62 |
+
tmp_5pts += size_diff / 2
|
63 |
+
tmp_crop_size += size_diff
|
64 |
+
|
65 |
+
if (output_size and output_size[0] == tmp_crop_size[0] and output_size[1] == tmp_crop_size[1]):
|
66 |
+
|
67 |
+
return tmp_5pts
|
68 |
+
|
69 |
+
if (inner_padding_factor == 0 and outer_padding == (0, 0)):
|
70 |
+
if output_size is None:
|
71 |
+
return tmp_5pts
|
72 |
+
else:
|
73 |
+
raise FaceWarpException('No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
|
74 |
+
|
75 |
+
# check output size
|
76 |
+
if not (0 <= inner_padding_factor <= 1.0):
|
77 |
+
raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
|
78 |
+
|
79 |
+
if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) and output_size is None):
|
80 |
+
output_size = tmp_crop_size * \
|
81 |
+
(1 + inner_padding_factor * 2).astype(np.int32)
|
82 |
+
output_size += np.array(outer_padding)
|
83 |
+
if not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1]):
|
84 |
+
raise FaceWarpException('Not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1])')
|
85 |
+
|
86 |
+
# 1) pad the inner region according inner_padding_factor
|
87 |
+
if inner_padding_factor > 0:
|
88 |
+
size_diff = tmp_crop_size * inner_padding_factor * 2
|
89 |
+
tmp_5pts += size_diff / 2
|
90 |
+
tmp_crop_size += np.round(size_diff).astype(np.int32)
|
91 |
+
|
92 |
+
# 2) resize the padded inner region
|
93 |
+
size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
|
94 |
+
|
95 |
+
if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
|
96 |
+
raise FaceWarpException('Must have (output_size - outer_padding)'
|
97 |
+
'= some_scale * (crop_size * (1.0 + inner_padding_factor)')
|
98 |
+
|
99 |
+
scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
|
100 |
+
tmp_5pts = tmp_5pts * scale_factor
|
101 |
+
# size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
|
102 |
+
# tmp_5pts = tmp_5pts + size_diff / 2
|
103 |
+
tmp_crop_size = size_bf_outer_pad
|
104 |
+
|
105 |
+
# 3) add outer_padding to make output_size
|
106 |
+
reference_5point = tmp_5pts + np.array(outer_padding)
|
107 |
+
tmp_crop_size = output_size
|
108 |
+
|
109 |
+
return reference_5point
|
110 |
+
|
111 |
+
|
112 |
+
def get_affine_transform_matrix(src_pts, dst_pts):
|
113 |
+
"""
|
114 |
+
Function:
|
115 |
+
----------
|
116 |
+
get affine transform matrix 'tfm' from src_pts to dst_pts
|
117 |
+
Parameters:
|
118 |
+
----------
|
119 |
+
@src_pts: Kx2 np.array
|
120 |
+
source points matrix, each row is a pair of coordinates (x, y)
|
121 |
+
@dst_pts: Kx2 np.array
|
122 |
+
destination points matrix, each row is a pair of coordinates (x, y)
|
123 |
+
Returns:
|
124 |
+
----------
|
125 |
+
@tfm: 2x3 np.array
|
126 |
+
transform matrix from src_pts to dst_pts
|
127 |
+
"""
|
128 |
+
|
129 |
+
tfm = np.float32([[1, 0, 0], [0, 1, 0]])
|
130 |
+
n_pts = src_pts.shape[0]
|
131 |
+
ones = np.ones((n_pts, 1), src_pts.dtype)
|
132 |
+
src_pts_ = np.hstack([src_pts, ones])
|
133 |
+
dst_pts_ = np.hstack([dst_pts, ones])
|
134 |
+
|
135 |
+
A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
|
136 |
+
|
137 |
+
if rank == 3:
|
138 |
+
tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]], [A[0, 1], A[1, 1], A[2, 1]]])
|
139 |
+
elif rank == 2:
|
140 |
+
tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
|
141 |
+
|
142 |
+
return tfm
|
143 |
+
|
144 |
+
|
145 |
+
def warp_and_crop_face(src_img, facial_pts, reference_pts=None, crop_size=(96, 112), align_type='smilarity'):
|
146 |
+
"""
|
147 |
+
Function:
|
148 |
+
----------
|
149 |
+
apply affine transform 'trans' to uv
|
150 |
+
Parameters:
|
151 |
+
----------
|
152 |
+
@src_img: 3x3 np.array
|
153 |
+
input image
|
154 |
+
@facial_pts: could be
|
155 |
+
1)a list of K coordinates (x,y)
|
156 |
+
or
|
157 |
+
2) Kx2 or 2xK np.array
|
158 |
+
each row or col is a pair of coordinates (x, y)
|
159 |
+
@reference_pts: could be
|
160 |
+
1) a list of K coordinates (x,y)
|
161 |
+
or
|
162 |
+
2) Kx2 or 2xK np.array
|
163 |
+
each row or col is a pair of coordinates (x, y)
|
164 |
+
or
|
165 |
+
3) None
|
166 |
+
if None, use default reference facial points
|
167 |
+
@crop_size: (w, h)
|
168 |
+
output face image size
|
169 |
+
@align_type: transform type, could be one of
|
170 |
+
1) 'similarity': use similarity transform
|
171 |
+
2) 'cv2_affine': use the first 3 points to do affine transform,
|
172 |
+
by calling cv2.getAffineTransform()
|
173 |
+
3) 'affine': use all points to do affine transform
|
174 |
+
Returns:
|
175 |
+
----------
|
176 |
+
@face_img: output face image with size (w, h) = @crop_size
|
177 |
+
"""
|
178 |
+
|
179 |
+
if reference_pts is None:
|
180 |
+
if crop_size[0] == 96 and crop_size[1] == 112:
|
181 |
+
reference_pts = REFERENCE_FACIAL_POINTS
|
182 |
+
else:
|
183 |
+
default_square = False
|
184 |
+
inner_padding_factor = 0
|
185 |
+
outer_padding = (0, 0)
|
186 |
+
output_size = crop_size
|
187 |
+
|
188 |
+
reference_pts = get_reference_facial_points(output_size, inner_padding_factor, outer_padding,
|
189 |
+
default_square)
|
190 |
+
|
191 |
+
ref_pts = np.float32(reference_pts)
|
192 |
+
ref_pts_shp = ref_pts.shape
|
193 |
+
if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
|
194 |
+
raise FaceWarpException('reference_pts.shape must be (K,2) or (2,K) and K>2')
|
195 |
+
|
196 |
+
if ref_pts_shp[0] == 2:
|
197 |
+
ref_pts = ref_pts.T
|
198 |
+
|
199 |
+
src_pts = np.float32(facial_pts)
|
200 |
+
src_pts_shp = src_pts.shape
|
201 |
+
if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
|
202 |
+
raise FaceWarpException('facial_pts.shape must be (K,2) or (2,K) and K>2')
|
203 |
+
|
204 |
+
if src_pts_shp[0] == 2:
|
205 |
+
src_pts = src_pts.T
|
206 |
+
|
207 |
+
if src_pts.shape != ref_pts.shape:
|
208 |
+
raise FaceWarpException('facial_pts and reference_pts must have the same shape')
|
209 |
+
|
210 |
+
if align_type == 'cv2_affine':
|
211 |
+
tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
|
212 |
+
elif align_type == 'affine':
|
213 |
+
tfm = get_affine_transform_matrix(src_pts, ref_pts)
|
214 |
+
else:
|
215 |
+
tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
|
216 |
+
|
217 |
+
face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
|
218 |
+
|
219 |
+
return face_img
|
r_facelib/detection/matlab_cp2tform.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from numpy.linalg import inv, lstsq
|
3 |
+
from numpy.linalg import matrix_rank as rank
|
4 |
+
from numpy.linalg import norm
|
5 |
+
|
6 |
+
|
7 |
+
class MatlabCp2tormException(Exception):
|
8 |
+
|
9 |
+
def __str__(self):
|
10 |
+
return 'In File {}:{}'.format(__file__, super.__str__(self))
|
11 |
+
|
12 |
+
|
13 |
+
def tformfwd(trans, uv):
|
14 |
+
"""
|
15 |
+
Function:
|
16 |
+
----------
|
17 |
+
apply affine transform 'trans' to uv
|
18 |
+
|
19 |
+
Parameters:
|
20 |
+
----------
|
21 |
+
@trans: 3x3 np.array
|
22 |
+
transform matrix
|
23 |
+
@uv: Kx2 np.array
|
24 |
+
each row is a pair of coordinates (x, y)
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
----------
|
28 |
+
@xy: Kx2 np.array
|
29 |
+
each row is a pair of transformed coordinates (x, y)
|
30 |
+
"""
|
31 |
+
uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
|
32 |
+
xy = np.dot(uv, trans)
|
33 |
+
xy = xy[:, 0:-1]
|
34 |
+
return xy
|
35 |
+
|
36 |
+
|
37 |
+
def tforminv(trans, uv):
|
38 |
+
"""
|
39 |
+
Function:
|
40 |
+
----------
|
41 |
+
apply the inverse of affine transform 'trans' to uv
|
42 |
+
|
43 |
+
Parameters:
|
44 |
+
----------
|
45 |
+
@trans: 3x3 np.array
|
46 |
+
transform matrix
|
47 |
+
@uv: Kx2 np.array
|
48 |
+
each row is a pair of coordinates (x, y)
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
----------
|
52 |
+
@xy: Kx2 np.array
|
53 |
+
each row is a pair of inverse-transformed coordinates (x, y)
|
54 |
+
"""
|
55 |
+
Tinv = inv(trans)
|
56 |
+
xy = tformfwd(Tinv, uv)
|
57 |
+
return xy
|
58 |
+
|
59 |
+
|
60 |
+
def findNonreflectiveSimilarity(uv, xy, options=None):
|
61 |
+
options = {'K': 2}
|
62 |
+
|
63 |
+
K = options['K']
|
64 |
+
M = xy.shape[0]
|
65 |
+
x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
66 |
+
y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
67 |
+
|
68 |
+
tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
|
69 |
+
tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
|
70 |
+
X = np.vstack((tmp1, tmp2))
|
71 |
+
|
72 |
+
u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
|
73 |
+
v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
|
74 |
+
U = np.vstack((u, v))
|
75 |
+
|
76 |
+
# We know that X * r = U
|
77 |
+
if rank(X) >= 2 * K:
|
78 |
+
r, _, _, _ = lstsq(X, U, rcond=-1)
|
79 |
+
r = np.squeeze(r)
|
80 |
+
else:
|
81 |
+
raise Exception('cp2tform:twoUniquePointsReq')
|
82 |
+
sc = r[0]
|
83 |
+
ss = r[1]
|
84 |
+
tx = r[2]
|
85 |
+
ty = r[3]
|
86 |
+
|
87 |
+
Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
|
88 |
+
T = inv(Tinv)
|
89 |
+
T[:, 2] = np.array([0, 0, 1])
|
90 |
+
|
91 |
+
return T, Tinv
|
92 |
+
|
93 |
+
|
94 |
+
def findSimilarity(uv, xy, options=None):
|
95 |
+
options = {'K': 2}
|
96 |
+
|
97 |
+
# uv = np.array(uv)
|
98 |
+
# xy = np.array(xy)
|
99 |
+
|
100 |
+
# Solve for trans1
|
101 |
+
trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
|
102 |
+
|
103 |
+
# Solve for trans2
|
104 |
+
|
105 |
+
# manually reflect the xy data across the Y-axis
|
106 |
+
xyR = xy
|
107 |
+
xyR[:, 0] = -1 * xyR[:, 0]
|
108 |
+
|
109 |
+
trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
|
110 |
+
|
111 |
+
# manually reflect the tform to undo the reflection done on xyR
|
112 |
+
TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
|
113 |
+
|
114 |
+
trans2 = np.dot(trans2r, TreflectY)
|
115 |
+
|
116 |
+
# Figure out if trans1 or trans2 is better
|
117 |
+
xy1 = tformfwd(trans1, uv)
|
118 |
+
norm1 = norm(xy1 - xy)
|
119 |
+
|
120 |
+
xy2 = tformfwd(trans2, uv)
|
121 |
+
norm2 = norm(xy2 - xy)
|
122 |
+
|
123 |
+
if norm1 <= norm2:
|
124 |
+
return trans1, trans1_inv
|
125 |
+
else:
|
126 |
+
trans2_inv = inv(trans2)
|
127 |
+
return trans2, trans2_inv
|
128 |
+
|
129 |
+
|
130 |
+
def get_similarity_transform(src_pts, dst_pts, reflective=True):
|
131 |
+
"""
|
132 |
+
Function:
|
133 |
+
----------
|
134 |
+
Find Similarity Transform Matrix 'trans':
|
135 |
+
u = src_pts[:, 0]
|
136 |
+
v = src_pts[:, 1]
|
137 |
+
x = dst_pts[:, 0]
|
138 |
+
y = dst_pts[:, 1]
|
139 |
+
[x, y, 1] = [u, v, 1] * trans
|
140 |
+
|
141 |
+
Parameters:
|
142 |
+
----------
|
143 |
+
@src_pts: Kx2 np.array
|
144 |
+
source points, each row is a pair of coordinates (x, y)
|
145 |
+
@dst_pts: Kx2 np.array
|
146 |
+
destination points, each row is a pair of transformed
|
147 |
+
coordinates (x, y)
|
148 |
+
@reflective: True or False
|
149 |
+
if True:
|
150 |
+
use reflective similarity transform
|
151 |
+
else:
|
152 |
+
use non-reflective similarity transform
|
153 |
+
|
154 |
+
Returns:
|
155 |
+
----------
|
156 |
+
@trans: 3x3 np.array
|
157 |
+
transform matrix from uv to xy
|
158 |
+
trans_inv: 3x3 np.array
|
159 |
+
inverse of trans, transform matrix from xy to uv
|
160 |
+
"""
|
161 |
+
|
162 |
+
if reflective:
|
163 |
+
trans, trans_inv = findSimilarity(src_pts, dst_pts)
|
164 |
+
else:
|
165 |
+
trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
|
166 |
+
|
167 |
+
return trans, trans_inv
|
168 |
+
|
169 |
+
|
170 |
+
def cvt_tform_mat_for_cv2(trans):
|
171 |
+
"""
|
172 |
+
Function:
|
173 |
+
----------
|
174 |
+
Convert Transform Matrix 'trans' into 'cv2_trans' which could be
|
175 |
+
directly used by cv2.warpAffine():
|
176 |
+
u = src_pts[:, 0]
|
177 |
+
v = src_pts[:, 1]
|
178 |
+
x = dst_pts[:, 0]
|
179 |
+
y = dst_pts[:, 1]
|
180 |
+
[x, y].T = cv_trans * [u, v, 1].T
|
181 |
+
|
182 |
+
Parameters:
|
183 |
+
----------
|
184 |
+
@trans: 3x3 np.array
|
185 |
+
transform matrix from uv to xy
|
186 |
+
|
187 |
+
Returns:
|
188 |
+
----------
|
189 |
+
@cv2_trans: 2x3 np.array
|
190 |
+
transform matrix from src_pts to dst_pts, could be directly used
|
191 |
+
for cv2.warpAffine()
|
192 |
+
"""
|
193 |
+
cv2_trans = trans[:, 0:2].T
|
194 |
+
|
195 |
+
return cv2_trans
|
196 |
+
|
197 |
+
|
198 |
+
def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
|
199 |
+
"""
|
200 |
+
Function:
|
201 |
+
----------
|
202 |
+
Find Similarity Transform Matrix 'cv2_trans' which could be
|
203 |
+
directly used by cv2.warpAffine():
|
204 |
+
u = src_pts[:, 0]
|
205 |
+
v = src_pts[:, 1]
|
206 |
+
x = dst_pts[:, 0]
|
207 |
+
y = dst_pts[:, 1]
|
208 |
+
[x, y].T = cv_trans * [u, v, 1].T
|
209 |
+
|
210 |
+
Parameters:
|
211 |
+
----------
|
212 |
+
@src_pts: Kx2 np.array
|
213 |
+
source points, each row is a pair of coordinates (x, y)
|
214 |
+
@dst_pts: Kx2 np.array
|
215 |
+
destination points, each row is a pair of transformed
|
216 |
+
coordinates (x, y)
|
217 |
+
reflective: True or False
|
218 |
+
if True:
|
219 |
+
use reflective similarity transform
|
220 |
+
else:
|
221 |
+
use non-reflective similarity transform
|
222 |
+
|
223 |
+
Returns:
|
224 |
+
----------
|
225 |
+
@cv2_trans: 2x3 np.array
|
226 |
+
transform matrix from src_pts to dst_pts, could be directly used
|
227 |
+
for cv2.warpAffine()
|
228 |
+
"""
|
229 |
+
trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
|
230 |
+
cv2_trans = cvt_tform_mat_for_cv2(trans)
|
231 |
+
|
232 |
+
return cv2_trans
|
233 |
+
|
234 |
+
|
235 |
+
if __name__ == '__main__':
|
236 |
+
"""
|
237 |
+
u = [0, 6, -2]
|
238 |
+
v = [0, 3, 5]
|
239 |
+
x = [-1, 0, 4]
|
240 |
+
y = [-1, -10, 4]
|
241 |
+
|
242 |
+
# In Matlab, run:
|
243 |
+
#
|
244 |
+
# uv = [u'; v'];
|
245 |
+
# xy = [x'; y'];
|
246 |
+
# tform_sim=cp2tform(uv,xy,'similarity');
|
247 |
+
#
|
248 |
+
# trans = tform_sim.tdata.T
|
249 |
+
# ans =
|
250 |
+
# -0.0764 -1.6190 0
|
251 |
+
# 1.6190 -0.0764 0
|
252 |
+
# -3.2156 0.0290 1.0000
|
253 |
+
# trans_inv = tform_sim.tdata.Tinv
|
254 |
+
# ans =
|
255 |
+
#
|
256 |
+
# -0.0291 0.6163 0
|
257 |
+
# -0.6163 -0.0291 0
|
258 |
+
# -0.0756 1.9826 1.0000
|
259 |
+
# xy_m=tformfwd(tform_sim, u,v)
|
260 |
+
#
|
261 |
+
# xy_m =
|
262 |
+
#
|
263 |
+
# -3.2156 0.0290
|
264 |
+
# 1.1833 -9.9143
|
265 |
+
# 5.0323 2.8853
|
266 |
+
# uv_m=tforminv(tform_sim, x,y)
|
267 |
+
#
|
268 |
+
# uv_m =
|
269 |
+
#
|
270 |
+
# 0.5698 1.3953
|
271 |
+
# 6.0872 2.2733
|
272 |
+
# -2.6570 4.3314
|
273 |
+
"""
|
274 |
+
u = [0, 6, -2]
|
275 |
+
v = [0, 3, 5]
|
276 |
+
x = [-1, 0, 4]
|
277 |
+
y = [-1, -10, 4]
|
278 |
+
|
279 |
+
uv = np.array((u, v)).T
|
280 |
+
xy = np.array((x, y)).T
|
281 |
+
|
282 |
+
print('\n--->uv:')
|
283 |
+
print(uv)
|
284 |
+
print('\n--->xy:')
|
285 |
+
print(xy)
|
286 |
+
|
287 |
+
trans, trans_inv = get_similarity_transform(uv, xy)
|
288 |
+
|
289 |
+
print('\n--->trans matrix:')
|
290 |
+
print(trans)
|
291 |
+
|
292 |
+
print('\n--->trans_inv matrix:')
|
293 |
+
print(trans_inv)
|
294 |
+
|
295 |
+
print('\n---> apply transform to uv')
|
296 |
+
print('\nxy_m = uv_augmented * trans')
|
297 |
+
uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
|
298 |
+
xy_m = np.dot(uv_aug, trans)
|
299 |
+
print(xy_m)
|
300 |
+
|
301 |
+
print('\nxy_m = tformfwd(trans, uv)')
|
302 |
+
xy_m = tformfwd(trans, uv)
|
303 |
+
print(xy_m)
|
304 |
+
|
305 |
+
print('\n---> apply inverse transform to xy')
|
306 |
+
print('\nuv_m = xy_augmented * trans_inv')
|
307 |
+
xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
|
308 |
+
uv_m = np.dot(xy_aug, trans_inv)
|
309 |
+
print(uv_m)
|
310 |
+
|
311 |
+
print('\nuv_m = tformfwd(trans_inv, xy)')
|
312 |
+
uv_m = tformfwd(trans_inv, xy)
|
313 |
+
print(uv_m)
|
314 |
+
|
315 |
+
uv_m = tforminv(trans, xy)
|
316 |
+
print('\nuv_m = tforminv(trans, xy)')
|
317 |
+
print(uv_m)
|
r_facelib/detection/retinaface/__pycache__/retinaface.cpython-311.pyc
ADDED
Binary file (20.8 kB). View file
|
|
r_facelib/detection/retinaface/__pycache__/retinaface_net.cpython-311.pyc
ADDED
Binary file (13 kB). View file
|
|
r_facelib/detection/retinaface/__pycache__/retinaface_utils.cpython-311.pyc
ADDED
Binary file (26.8 kB). View file
|
|
r_facelib/detection/retinaface/retinaface.py
ADDED
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from PIL import Image
|
7 |
+
from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter
|
8 |
+
|
9 |
+
from modules import shared
|
10 |
+
|
11 |
+
from r_facelib.detection.align_trans import get_reference_facial_points, warp_and_crop_face
|
12 |
+
from r_facelib.detection.retinaface.retinaface_net import FPN, SSH, MobileNetV1, make_bbox_head, make_class_head, make_landmark_head
|
13 |
+
from r_facelib.detection.retinaface.retinaface_utils import (PriorBox, batched_decode, batched_decode_landm, decode, decode_landm,
|
14 |
+
py_cpu_nms)
|
15 |
+
|
16 |
+
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
17 |
+
if torch.cuda.is_available():
|
18 |
+
device = torch.device('cuda')
|
19 |
+
elif torch.backends.mps.is_available():
|
20 |
+
device = torch.device('mps')
|
21 |
+
# elif hasattr(torch,'dml'):
|
22 |
+
# device = torch.device('dml')
|
23 |
+
elif hasattr(torch,'dml') or hasattr(torch,'privateuseone'): # AMD
|
24 |
+
if shared.cmd_opts is not None: # A1111
|
25 |
+
if shared.cmd_opts.device_id is not None:
|
26 |
+
device = torch.device(f'privateuseone:{shared.cmd_opts.device_id}')
|
27 |
+
else:
|
28 |
+
device = torch.device('privateuseone:0')
|
29 |
+
else:
|
30 |
+
device = torch.device('privateuseone:0')
|
31 |
+
else:
|
32 |
+
device = torch.device('cpu')
|
33 |
+
|
34 |
+
|
35 |
+
def generate_config(network_name):
|
36 |
+
|
37 |
+
cfg_mnet = {
|
38 |
+
'name': 'mobilenet0.25',
|
39 |
+
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
40 |
+
'steps': [8, 16, 32],
|
41 |
+
'variance': [0.1, 0.2],
|
42 |
+
'clip': False,
|
43 |
+
'loc_weight': 2.0,
|
44 |
+
'gpu_train': True,
|
45 |
+
'batch_size': 32,
|
46 |
+
'ngpu': 1,
|
47 |
+
'epoch': 250,
|
48 |
+
'decay1': 190,
|
49 |
+
'decay2': 220,
|
50 |
+
'image_size': 640,
|
51 |
+
'return_layers': {
|
52 |
+
'stage1': 1,
|
53 |
+
'stage2': 2,
|
54 |
+
'stage3': 3
|
55 |
+
},
|
56 |
+
'in_channel': 32,
|
57 |
+
'out_channel': 64
|
58 |
+
}
|
59 |
+
|
60 |
+
cfg_re50 = {
|
61 |
+
'name': 'Resnet50',
|
62 |
+
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
63 |
+
'steps': [8, 16, 32],
|
64 |
+
'variance': [0.1, 0.2],
|
65 |
+
'clip': False,
|
66 |
+
'loc_weight': 2.0,
|
67 |
+
'gpu_train': True,
|
68 |
+
'batch_size': 24,
|
69 |
+
'ngpu': 4,
|
70 |
+
'epoch': 100,
|
71 |
+
'decay1': 70,
|
72 |
+
'decay2': 90,
|
73 |
+
'image_size': 840,
|
74 |
+
'return_layers': {
|
75 |
+
'layer2': 1,
|
76 |
+
'layer3': 2,
|
77 |
+
'layer4': 3
|
78 |
+
},
|
79 |
+
'in_channel': 256,
|
80 |
+
'out_channel': 256
|
81 |
+
}
|
82 |
+
|
83 |
+
if network_name == 'mobile0.25':
|
84 |
+
return cfg_mnet
|
85 |
+
elif network_name == 'resnet50':
|
86 |
+
return cfg_re50
|
87 |
+
else:
|
88 |
+
raise NotImplementedError(f'network_name={network_name}')
|
89 |
+
|
90 |
+
|
91 |
+
class RetinaFace(nn.Module):
|
92 |
+
|
93 |
+
def __init__(self, network_name='resnet50', half=False, phase='test'):
|
94 |
+
super(RetinaFace, self).__init__()
|
95 |
+
self.half_inference = half
|
96 |
+
cfg = generate_config(network_name)
|
97 |
+
self.backbone = cfg['name']
|
98 |
+
|
99 |
+
self.model_name = f'retinaface_{network_name}'
|
100 |
+
self.cfg = cfg
|
101 |
+
self.phase = phase
|
102 |
+
self.target_size, self.max_size = 1600, 2150
|
103 |
+
self.resize, self.scale, self.scale1 = 1., None, None
|
104 |
+
self.mean_tensor = torch.tensor([[[[104.]], [[117.]], [[123.]]]]).to(device)
|
105 |
+
self.reference = get_reference_facial_points(default_square=True)
|
106 |
+
# Build network.
|
107 |
+
backbone = None
|
108 |
+
if cfg['name'] == 'mobilenet0.25':
|
109 |
+
backbone = MobileNetV1()
|
110 |
+
self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
|
111 |
+
elif cfg['name'] == 'Resnet50':
|
112 |
+
import torchvision.models as models
|
113 |
+
backbone = models.resnet50(pretrained=False)
|
114 |
+
self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
|
115 |
+
|
116 |
+
in_channels_stage2 = cfg['in_channel']
|
117 |
+
in_channels_list = [
|
118 |
+
in_channels_stage2 * 2,
|
119 |
+
in_channels_stage2 * 4,
|
120 |
+
in_channels_stage2 * 8,
|
121 |
+
]
|
122 |
+
|
123 |
+
out_channels = cfg['out_channel']
|
124 |
+
self.fpn = FPN(in_channels_list, out_channels)
|
125 |
+
self.ssh1 = SSH(out_channels, out_channels)
|
126 |
+
self.ssh2 = SSH(out_channels, out_channels)
|
127 |
+
self.ssh3 = SSH(out_channels, out_channels)
|
128 |
+
|
129 |
+
self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
|
130 |
+
self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
|
131 |
+
self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
|
132 |
+
|
133 |
+
self.to(device)
|
134 |
+
self.eval()
|
135 |
+
if self.half_inference:
|
136 |
+
self.half()
|
137 |
+
|
138 |
+
def forward(self, inputs):
|
139 |
+
self.to(device)
|
140 |
+
out = self.body(inputs)
|
141 |
+
|
142 |
+
if self.backbone == 'mobilenet0.25' or self.backbone == 'Resnet50':
|
143 |
+
out = list(out.values())
|
144 |
+
# FPN
|
145 |
+
fpn = self.fpn(out)
|
146 |
+
|
147 |
+
# SSH
|
148 |
+
feature1 = self.ssh1(fpn[0])
|
149 |
+
feature2 = self.ssh2(fpn[1])
|
150 |
+
feature3 = self.ssh3(fpn[2])
|
151 |
+
features = [feature1, feature2, feature3]
|
152 |
+
|
153 |
+
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
154 |
+
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
155 |
+
tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]
|
156 |
+
ldm_regressions = (torch.cat(tmp, dim=1))
|
157 |
+
|
158 |
+
if self.phase == 'train':
|
159 |
+
output = (bbox_regressions, classifications, ldm_regressions)
|
160 |
+
else:
|
161 |
+
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
|
162 |
+
return output
|
163 |
+
|
164 |
+
def __detect_faces(self, inputs):
|
165 |
+
# get scale
|
166 |
+
height, width = inputs.shape[2:]
|
167 |
+
self.scale = torch.tensor([width, height, width, height], dtype=torch.float32).to(device)
|
168 |
+
tmp = [width, height, width, height, width, height, width, height, width, height]
|
169 |
+
self.scale1 = torch.tensor(tmp, dtype=torch.float32).to(device)
|
170 |
+
|
171 |
+
# forawrd
|
172 |
+
inputs = inputs.to(device)
|
173 |
+
if self.half_inference:
|
174 |
+
inputs = inputs.half()
|
175 |
+
loc, conf, landmarks = self(inputs)
|
176 |
+
|
177 |
+
# get priorbox
|
178 |
+
priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])
|
179 |
+
priors = priorbox.forward().to(device)
|
180 |
+
|
181 |
+
return loc, conf, landmarks, priors
|
182 |
+
|
183 |
+
# single image detection
|
184 |
+
def transform(self, image, use_origin_size):
|
185 |
+
# convert to opencv format
|
186 |
+
if isinstance(image, Image.Image):
|
187 |
+
image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
|
188 |
+
image = image.astype(np.float32)
|
189 |
+
|
190 |
+
# testing scale
|
191 |
+
im_size_min = np.min(image.shape[0:2])
|
192 |
+
im_size_max = np.max(image.shape[0:2])
|
193 |
+
resize = float(self.target_size) / float(im_size_min)
|
194 |
+
|
195 |
+
# prevent bigger axis from being more than max_size
|
196 |
+
if np.round(resize * im_size_max) > self.max_size:
|
197 |
+
resize = float(self.max_size) / float(im_size_max)
|
198 |
+
resize = 1 if use_origin_size else resize
|
199 |
+
|
200 |
+
# resize
|
201 |
+
if resize != 1:
|
202 |
+
image = cv2.resize(image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
|
203 |
+
|
204 |
+
# convert to torch.tensor format
|
205 |
+
# image -= (104, 117, 123)
|
206 |
+
image = image.transpose(2, 0, 1)
|
207 |
+
image = torch.from_numpy(image).unsqueeze(0)
|
208 |
+
|
209 |
+
return image, resize
|
210 |
+
|
211 |
+
def detect_faces(
|
212 |
+
self,
|
213 |
+
image,
|
214 |
+
conf_threshold=0.8,
|
215 |
+
nms_threshold=0.4,
|
216 |
+
use_origin_size=True,
|
217 |
+
):
|
218 |
+
"""
|
219 |
+
Params:
|
220 |
+
imgs: BGR image
|
221 |
+
"""
|
222 |
+
image, self.resize = self.transform(image, use_origin_size)
|
223 |
+
image = image.to(device)
|
224 |
+
if self.half_inference:
|
225 |
+
image = image.half()
|
226 |
+
image = image - self.mean_tensor
|
227 |
+
|
228 |
+
loc, conf, landmarks, priors = self.__detect_faces(image)
|
229 |
+
|
230 |
+
boxes = decode(loc.data.squeeze(0), priors.data, self.cfg['variance'])
|
231 |
+
boxes = boxes * self.scale / self.resize
|
232 |
+
boxes = boxes.cpu().numpy()
|
233 |
+
|
234 |
+
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
235 |
+
|
236 |
+
landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg['variance'])
|
237 |
+
landmarks = landmarks * self.scale1 / self.resize
|
238 |
+
landmarks = landmarks.cpu().numpy()
|
239 |
+
|
240 |
+
# ignore low scores
|
241 |
+
inds = np.where(scores > conf_threshold)[0]
|
242 |
+
boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]
|
243 |
+
|
244 |
+
# sort
|
245 |
+
order = scores.argsort()[::-1]
|
246 |
+
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
247 |
+
|
248 |
+
# do NMS
|
249 |
+
bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
250 |
+
keep = py_cpu_nms(bounding_boxes, nms_threshold)
|
251 |
+
bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]
|
252 |
+
# self.t['forward_pass'].toc()
|
253 |
+
# print(self.t['forward_pass'].average_time)
|
254 |
+
# import sys
|
255 |
+
# sys.stdout.flush()
|
256 |
+
return np.concatenate((bounding_boxes, landmarks), axis=1)
|
257 |
+
|
258 |
+
def __align_multi(self, image, boxes, landmarks, limit=None):
|
259 |
+
|
260 |
+
if len(boxes) < 1:
|
261 |
+
return [], []
|
262 |
+
|
263 |
+
if limit:
|
264 |
+
boxes = boxes[:limit]
|
265 |
+
landmarks = landmarks[:limit]
|
266 |
+
|
267 |
+
faces = []
|
268 |
+
for landmark in landmarks:
|
269 |
+
facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]
|
270 |
+
|
271 |
+
warped_face = warp_and_crop_face(np.array(image), facial5points, self.reference, crop_size=(112, 112))
|
272 |
+
faces.append(warped_face)
|
273 |
+
|
274 |
+
return np.concatenate((boxes, landmarks), axis=1), faces
|
275 |
+
|
276 |
+
def align_multi(self, img, conf_threshold=0.8, limit=None):
|
277 |
+
|
278 |
+
rlt = self.detect_faces(img, conf_threshold=conf_threshold)
|
279 |
+
boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]
|
280 |
+
|
281 |
+
return self.__align_multi(img, boxes, landmarks, limit)
|
282 |
+
|
283 |
+
# batched detection
|
284 |
+
def batched_transform(self, frames, use_origin_size):
|
285 |
+
"""
|
286 |
+
Arguments:
|
287 |
+
frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],
|
288 |
+
type=np.float32, BGR format).
|
289 |
+
use_origin_size: whether to use origin size.
|
290 |
+
"""
|
291 |
+
from_PIL = True if isinstance(frames[0], Image.Image) else False
|
292 |
+
|
293 |
+
# convert to opencv format
|
294 |
+
if from_PIL:
|
295 |
+
frames = [cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames]
|
296 |
+
frames = np.asarray(frames, dtype=np.float32)
|
297 |
+
|
298 |
+
# testing scale
|
299 |
+
im_size_min = np.min(frames[0].shape[0:2])
|
300 |
+
im_size_max = np.max(frames[0].shape[0:2])
|
301 |
+
resize = float(self.target_size) / float(im_size_min)
|
302 |
+
|
303 |
+
# prevent bigger axis from being more than max_size
|
304 |
+
if np.round(resize * im_size_max) > self.max_size:
|
305 |
+
resize = float(self.max_size) / float(im_size_max)
|
306 |
+
resize = 1 if use_origin_size else resize
|
307 |
+
|
308 |
+
# resize
|
309 |
+
if resize != 1:
|
310 |
+
if not from_PIL:
|
311 |
+
frames = F.interpolate(frames, scale_factor=resize)
|
312 |
+
else:
|
313 |
+
frames = [
|
314 |
+
cv2.resize(frame, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
|
315 |
+
for frame in frames
|
316 |
+
]
|
317 |
+
|
318 |
+
# convert to torch.tensor format
|
319 |
+
if not from_PIL:
|
320 |
+
frames = frames.transpose(1, 2).transpose(1, 3).contiguous()
|
321 |
+
else:
|
322 |
+
frames = frames.transpose((0, 3, 1, 2))
|
323 |
+
frames = torch.from_numpy(frames)
|
324 |
+
|
325 |
+
return frames, resize
|
326 |
+
|
327 |
+
def batched_detect_faces(self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True):
|
328 |
+
"""
|
329 |
+
Arguments:
|
330 |
+
frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],
|
331 |
+
type=np.uint8, BGR format).
|
332 |
+
conf_threshold: confidence threshold.
|
333 |
+
nms_threshold: nms threshold.
|
334 |
+
use_origin_size: whether to use origin size.
|
335 |
+
Returns:
|
336 |
+
final_bounding_boxes: list of np.array ([n_boxes, 5],
|
337 |
+
type=np.float32).
|
338 |
+
final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).
|
339 |
+
"""
|
340 |
+
# self.t['forward_pass'].tic()
|
341 |
+
frames, self.resize = self.batched_transform(frames, use_origin_size)
|
342 |
+
frames = frames.to(device)
|
343 |
+
frames = frames - self.mean_tensor
|
344 |
+
|
345 |
+
b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)
|
346 |
+
|
347 |
+
final_bounding_boxes, final_landmarks = [], []
|
348 |
+
|
349 |
+
# decode
|
350 |
+
priors = priors.unsqueeze(0)
|
351 |
+
b_loc = batched_decode(b_loc, priors, self.cfg['variance']) * self.scale / self.resize
|
352 |
+
b_landmarks = batched_decode_landm(b_landmarks, priors, self.cfg['variance']) * self.scale1 / self.resize
|
353 |
+
b_conf = b_conf[:, :, 1]
|
354 |
+
|
355 |
+
# index for selection
|
356 |
+
b_indice = b_conf > conf_threshold
|
357 |
+
|
358 |
+
# concat
|
359 |
+
b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()
|
360 |
+
|
361 |
+
for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):
|
362 |
+
|
363 |
+
# ignore low scores
|
364 |
+
pred, landm = pred[inds, :], landm[inds, :]
|
365 |
+
if pred.shape[0] == 0:
|
366 |
+
final_bounding_boxes.append(np.array([], dtype=np.float32))
|
367 |
+
final_landmarks.append(np.array([], dtype=np.float32))
|
368 |
+
continue
|
369 |
+
|
370 |
+
# sort
|
371 |
+
# order = score.argsort(descending=True)
|
372 |
+
# box, landm, score = box[order], landm[order], score[order]
|
373 |
+
|
374 |
+
# to CPU
|
375 |
+
bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()
|
376 |
+
|
377 |
+
# NMS
|
378 |
+
keep = py_cpu_nms(bounding_boxes, nms_threshold)
|
379 |
+
bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]
|
380 |
+
|
381 |
+
# append
|
382 |
+
final_bounding_boxes.append(bounding_boxes)
|
383 |
+
final_landmarks.append(landmarks)
|
384 |
+
# self.t['forward_pass'].toc(average=True)
|
385 |
+
# self.batch_time += self.t['forward_pass'].diff
|
386 |
+
# self.total_frame += len(frames)
|
387 |
+
# print(self.batch_time / self.total_frame)
|
388 |
+
|
389 |
+
return final_bounding_boxes, final_landmarks
|
r_facelib/detection/retinaface/retinaface_net.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
def conv_bn(inp, oup, stride=1, leaky=0):
|
7 |
+
return nn.Sequential(
|
8 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup),
|
9 |
+
nn.LeakyReLU(negative_slope=leaky, inplace=True))
|
10 |
+
|
11 |
+
|
12 |
+
def conv_bn_no_relu(inp, oup, stride):
|
13 |
+
return nn.Sequential(
|
14 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
15 |
+
nn.BatchNorm2d(oup),
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
def conv_bn1X1(inp, oup, stride, leaky=0):
|
20 |
+
return nn.Sequential(
|
21 |
+
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), nn.BatchNorm2d(oup),
|
22 |
+
nn.LeakyReLU(negative_slope=leaky, inplace=True))
|
23 |
+
|
24 |
+
|
25 |
+
def conv_dw(inp, oup, stride, leaky=0.1):
|
26 |
+
return nn.Sequential(
|
27 |
+
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
28 |
+
nn.BatchNorm2d(inp),
|
29 |
+
nn.LeakyReLU(negative_slope=leaky, inplace=True),
|
30 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
31 |
+
nn.BatchNorm2d(oup),
|
32 |
+
nn.LeakyReLU(negative_slope=leaky, inplace=True),
|
33 |
+
)
|
34 |
+
|
35 |
+
|
36 |
+
class SSH(nn.Module):
|
37 |
+
|
38 |
+
def __init__(self, in_channel, out_channel):
|
39 |
+
super(SSH, self).__init__()
|
40 |
+
assert out_channel % 4 == 0
|
41 |
+
leaky = 0
|
42 |
+
if (out_channel <= 64):
|
43 |
+
leaky = 0.1
|
44 |
+
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
|
45 |
+
|
46 |
+
self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
|
47 |
+
self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
|
48 |
+
|
49 |
+
self.conv7X7_2 = conv_bn(out_channel // 4, out_channel // 4, stride=1, leaky=leaky)
|
50 |
+
self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
|
51 |
+
|
52 |
+
def forward(self, input):
|
53 |
+
conv3X3 = self.conv3X3(input)
|
54 |
+
|
55 |
+
conv5X5_1 = self.conv5X5_1(input)
|
56 |
+
conv5X5 = self.conv5X5_2(conv5X5_1)
|
57 |
+
|
58 |
+
conv7X7_2 = self.conv7X7_2(conv5X5_1)
|
59 |
+
conv7X7 = self.conv7x7_3(conv7X7_2)
|
60 |
+
|
61 |
+
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
|
62 |
+
out = F.relu(out)
|
63 |
+
return out
|
64 |
+
|
65 |
+
|
66 |
+
class FPN(nn.Module):
|
67 |
+
|
68 |
+
def __init__(self, in_channels_list, out_channels):
|
69 |
+
super(FPN, self).__init__()
|
70 |
+
leaky = 0
|
71 |
+
if (out_channels <= 64):
|
72 |
+
leaky = 0.1
|
73 |
+
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky)
|
74 |
+
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky)
|
75 |
+
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky)
|
76 |
+
|
77 |
+
self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
|
78 |
+
self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
|
79 |
+
|
80 |
+
def forward(self, input):
|
81 |
+
# names = list(input.keys())
|
82 |
+
# input = list(input.values())
|
83 |
+
|
84 |
+
output1 = self.output1(input[0])
|
85 |
+
output2 = self.output2(input[1])
|
86 |
+
output3 = self.output3(input[2])
|
87 |
+
|
88 |
+
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode='nearest')
|
89 |
+
output2 = output2 + up3
|
90 |
+
output2 = self.merge2(output2)
|
91 |
+
|
92 |
+
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode='nearest')
|
93 |
+
output1 = output1 + up2
|
94 |
+
output1 = self.merge1(output1)
|
95 |
+
|
96 |
+
out = [output1, output2, output3]
|
97 |
+
return out
|
98 |
+
|
99 |
+
|
100 |
+
class MobileNetV1(nn.Module):
|
101 |
+
|
102 |
+
def __init__(self):
|
103 |
+
super(MobileNetV1, self).__init__()
|
104 |
+
self.stage1 = nn.Sequential(
|
105 |
+
conv_bn(3, 8, 2, leaky=0.1), # 3
|
106 |
+
conv_dw(8, 16, 1), # 7
|
107 |
+
conv_dw(16, 32, 2), # 11
|
108 |
+
conv_dw(32, 32, 1), # 19
|
109 |
+
conv_dw(32, 64, 2), # 27
|
110 |
+
conv_dw(64, 64, 1), # 43
|
111 |
+
)
|
112 |
+
self.stage2 = nn.Sequential(
|
113 |
+
conv_dw(64, 128, 2), # 43 + 16 = 59
|
114 |
+
conv_dw(128, 128, 1), # 59 + 32 = 91
|
115 |
+
conv_dw(128, 128, 1), # 91 + 32 = 123
|
116 |
+
conv_dw(128, 128, 1), # 123 + 32 = 155
|
117 |
+
conv_dw(128, 128, 1), # 155 + 32 = 187
|
118 |
+
conv_dw(128, 128, 1), # 187 + 32 = 219
|
119 |
+
)
|
120 |
+
self.stage3 = nn.Sequential(
|
121 |
+
conv_dw(128, 256, 2), # 219 +3 2 = 241
|
122 |
+
conv_dw(256, 256, 1), # 241 + 64 = 301
|
123 |
+
)
|
124 |
+
self.avg = nn.AdaptiveAvgPool2d((1, 1))
|
125 |
+
self.fc = nn.Linear(256, 1000)
|
126 |
+
|
127 |
+
def forward(self, x):
|
128 |
+
x = self.stage1(x)
|
129 |
+
x = self.stage2(x)
|
130 |
+
x = self.stage3(x)
|
131 |
+
x = self.avg(x)
|
132 |
+
# x = self.model(x)
|
133 |
+
x = x.view(-1, 256)
|
134 |
+
x = self.fc(x)
|
135 |
+
return x
|
136 |
+
|
137 |
+
|
138 |
+
class ClassHead(nn.Module):
|
139 |
+
|
140 |
+
def __init__(self, inchannels=512, num_anchors=3):
|
141 |
+
super(ClassHead, self).__init__()
|
142 |
+
self.num_anchors = num_anchors
|
143 |
+
self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0)
|
144 |
+
|
145 |
+
def forward(self, x):
|
146 |
+
out = self.conv1x1(x)
|
147 |
+
out = out.permute(0, 2, 3, 1).contiguous()
|
148 |
+
|
149 |
+
return out.view(out.shape[0], -1, 2)
|
150 |
+
|
151 |
+
|
152 |
+
class BboxHead(nn.Module):
|
153 |
+
|
154 |
+
def __init__(self, inchannels=512, num_anchors=3):
|
155 |
+
super(BboxHead, self).__init__()
|
156 |
+
self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0)
|
157 |
+
|
158 |
+
def forward(self, x):
|
159 |
+
out = self.conv1x1(x)
|
160 |
+
out = out.permute(0, 2, 3, 1).contiguous()
|
161 |
+
|
162 |
+
return out.view(out.shape[0], -1, 4)
|
163 |
+
|
164 |
+
|
165 |
+
class LandmarkHead(nn.Module):
|
166 |
+
|
167 |
+
def __init__(self, inchannels=512, num_anchors=3):
|
168 |
+
super(LandmarkHead, self).__init__()
|
169 |
+
self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0)
|
170 |
+
|
171 |
+
def forward(self, x):
|
172 |
+
out = self.conv1x1(x)
|
173 |
+
out = out.permute(0, 2, 3, 1).contiguous()
|
174 |
+
|
175 |
+
return out.view(out.shape[0], -1, 10)
|
176 |
+
|
177 |
+
|
178 |
+
def make_class_head(fpn_num=3, inchannels=64, anchor_num=2):
|
179 |
+
classhead = nn.ModuleList()
|
180 |
+
for i in range(fpn_num):
|
181 |
+
classhead.append(ClassHead(inchannels, anchor_num))
|
182 |
+
return classhead
|
183 |
+
|
184 |
+
|
185 |
+
def make_bbox_head(fpn_num=3, inchannels=64, anchor_num=2):
|
186 |
+
bboxhead = nn.ModuleList()
|
187 |
+
for i in range(fpn_num):
|
188 |
+
bboxhead.append(BboxHead(inchannels, anchor_num))
|
189 |
+
return bboxhead
|
190 |
+
|
191 |
+
|
192 |
+
def make_landmark_head(fpn_num=3, inchannels=64, anchor_num=2):
|
193 |
+
landmarkhead = nn.ModuleList()
|
194 |
+
for i in range(fpn_num):
|
195 |
+
landmarkhead.append(LandmarkHead(inchannels, anchor_num))
|
196 |
+
return landmarkhead
|
r_facelib/detection/retinaface/retinaface_utils.py
ADDED
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torchvision
|
4 |
+
from itertools import product as product
|
5 |
+
from math import ceil
|
6 |
+
|
7 |
+
|
8 |
+
class PriorBox(object):
|
9 |
+
|
10 |
+
def __init__(self, cfg, image_size=None, phase='train'):
|
11 |
+
super(PriorBox, self).__init__()
|
12 |
+
self.min_sizes = cfg['min_sizes']
|
13 |
+
self.steps = cfg['steps']
|
14 |
+
self.clip = cfg['clip']
|
15 |
+
self.image_size = image_size
|
16 |
+
self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps]
|
17 |
+
self.name = 's'
|
18 |
+
|
19 |
+
def forward(self):
|
20 |
+
anchors = []
|
21 |
+
for k, f in enumerate(self.feature_maps):
|
22 |
+
min_sizes = self.min_sizes[k]
|
23 |
+
for i, j in product(range(f[0]), range(f[1])):
|
24 |
+
for min_size in min_sizes:
|
25 |
+
s_kx = min_size / self.image_size[1]
|
26 |
+
s_ky = min_size / self.image_size[0]
|
27 |
+
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
|
28 |
+
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
|
29 |
+
for cy, cx in product(dense_cy, dense_cx):
|
30 |
+
anchors += [cx, cy, s_kx, s_ky]
|
31 |
+
|
32 |
+
# back to torch land
|
33 |
+
output = torch.Tensor(anchors).view(-1, 4)
|
34 |
+
if self.clip:
|
35 |
+
output.clamp_(max=1, min=0)
|
36 |
+
return output
|
37 |
+
|
38 |
+
|
39 |
+
def py_cpu_nms(dets, thresh):
|
40 |
+
"""Pure Python NMS baseline."""
|
41 |
+
keep = torchvision.ops.nms(
|
42 |
+
boxes=torch.Tensor(dets[:, :4]),
|
43 |
+
scores=torch.Tensor(dets[:, 4]),
|
44 |
+
iou_threshold=thresh,
|
45 |
+
)
|
46 |
+
|
47 |
+
return list(keep)
|
48 |
+
|
49 |
+
|
50 |
+
def point_form(boxes):
|
51 |
+
""" Convert prior_boxes to (xmin, ymin, xmax, ymax)
|
52 |
+
representation for comparison to point form ground truth data.
|
53 |
+
Args:
|
54 |
+
boxes: (tensor) center-size default boxes from priorbox layers.
|
55 |
+
Return:
|
56 |
+
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
|
57 |
+
"""
|
58 |
+
return torch.cat(
|
59 |
+
(
|
60 |
+
boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
|
61 |
+
boxes[:, :2] + boxes[:, 2:] / 2),
|
62 |
+
1) # xmax, ymax
|
63 |
+
|
64 |
+
|
65 |
+
def center_size(boxes):
|
66 |
+
""" Convert prior_boxes to (cx, cy, w, h)
|
67 |
+
representation for comparison to center-size form ground truth data.
|
68 |
+
Args:
|
69 |
+
boxes: (tensor) point_form boxes
|
70 |
+
Return:
|
71 |
+
boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
|
72 |
+
"""
|
73 |
+
return torch.cat(
|
74 |
+
(boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy
|
75 |
+
boxes[:, 2:] - boxes[:, :2],
|
76 |
+
1) # w, h
|
77 |
+
|
78 |
+
|
79 |
+
def intersect(box_a, box_b):
|
80 |
+
""" We resize both tensors to [A,B,2] without new malloc:
|
81 |
+
[A,2] -> [A,1,2] -> [A,B,2]
|
82 |
+
[B,2] -> [1,B,2] -> [A,B,2]
|
83 |
+
Then we compute the area of intersect between box_a and box_b.
|
84 |
+
Args:
|
85 |
+
box_a: (tensor) bounding boxes, Shape: [A,4].
|
86 |
+
box_b: (tensor) bounding boxes, Shape: [B,4].
|
87 |
+
Return:
|
88 |
+
(tensor) intersection area, Shape: [A,B].
|
89 |
+
"""
|
90 |
+
A = box_a.size(0)
|
91 |
+
B = box_b.size(0)
|
92 |
+
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
|
93 |
+
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
|
94 |
+
inter = torch.clamp((max_xy - min_xy), min=0)
|
95 |
+
return inter[:, :, 0] * inter[:, :, 1]
|
96 |
+
|
97 |
+
|
98 |
+
def jaccard(box_a, box_b):
|
99 |
+
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
100 |
+
is simply the intersection over union of two boxes. Here we operate on
|
101 |
+
ground truth boxes and default boxes.
|
102 |
+
E.g.:
|
103 |
+
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
104 |
+
Args:
|
105 |
+
box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
|
106 |
+
box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
|
107 |
+
Return:
|
108 |
+
jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
|
109 |
+
"""
|
110 |
+
inter = intersect(box_a, box_b)
|
111 |
+
area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
|
112 |
+
area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
|
113 |
+
union = area_a + area_b - inter
|
114 |
+
return inter / union # [A,B]
|
115 |
+
|
116 |
+
|
117 |
+
def matrix_iou(a, b):
|
118 |
+
"""
|
119 |
+
return iou of a and b, numpy version for data augenmentation
|
120 |
+
"""
|
121 |
+
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
|
122 |
+
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
|
123 |
+
|
124 |
+
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
|
125 |
+
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
|
126 |
+
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
|
127 |
+
return area_i / (area_a[:, np.newaxis] + area_b - area_i)
|
128 |
+
|
129 |
+
|
130 |
+
def matrix_iof(a, b):
|
131 |
+
"""
|
132 |
+
return iof of a and b, numpy version for data augenmentation
|
133 |
+
"""
|
134 |
+
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
|
135 |
+
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
|
136 |
+
|
137 |
+
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
|
138 |
+
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
|
139 |
+
return area_i / np.maximum(area_a[:, np.newaxis], 1)
|
140 |
+
|
141 |
+
|
142 |
+
def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
|
143 |
+
"""Match each prior box with the ground truth box of the highest jaccard
|
144 |
+
overlap, encode the bounding boxes, then return the matched indices
|
145 |
+
corresponding to both confidence and location preds.
|
146 |
+
Args:
|
147 |
+
threshold: (float) The overlap threshold used when matching boxes.
|
148 |
+
truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
|
149 |
+
priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
|
150 |
+
variances: (tensor) Variances corresponding to each prior coord,
|
151 |
+
Shape: [num_priors, 4].
|
152 |
+
labels: (tensor) All the class labels for the image, Shape: [num_obj].
|
153 |
+
landms: (tensor) Ground truth landms, Shape [num_obj, 10].
|
154 |
+
loc_t: (tensor) Tensor to be filled w/ encoded location targets.
|
155 |
+
conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
|
156 |
+
landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
|
157 |
+
idx: (int) current batch index
|
158 |
+
Return:
|
159 |
+
The matched indices corresponding to 1)location 2)confidence
|
160 |
+
3)landm preds.
|
161 |
+
"""
|
162 |
+
# jaccard index
|
163 |
+
overlaps = jaccard(truths, point_form(priors))
|
164 |
+
# (Bipartite Matching)
|
165 |
+
# [1,num_objects] best prior for each ground truth
|
166 |
+
best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
|
167 |
+
|
168 |
+
# ignore hard gt
|
169 |
+
valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
|
170 |
+
best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
|
171 |
+
if best_prior_idx_filter.shape[0] <= 0:
|
172 |
+
loc_t[idx] = 0
|
173 |
+
conf_t[idx] = 0
|
174 |
+
return
|
175 |
+
|
176 |
+
# [1,num_priors] best ground truth for each prior
|
177 |
+
best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
|
178 |
+
best_truth_idx.squeeze_(0)
|
179 |
+
best_truth_overlap.squeeze_(0)
|
180 |
+
best_prior_idx.squeeze_(1)
|
181 |
+
best_prior_idx_filter.squeeze_(1)
|
182 |
+
best_prior_overlap.squeeze_(1)
|
183 |
+
best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
|
184 |
+
# TODO refactor: index best_prior_idx with long tensor
|
185 |
+
# ensure every gt matches with its prior of max overlap
|
186 |
+
for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
|
187 |
+
best_truth_idx[best_prior_idx[j]] = j
|
188 |
+
matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
|
189 |
+
conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
|
190 |
+
conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本
|
191 |
+
loc = encode(matches, priors, variances)
|
192 |
+
|
193 |
+
matches_landm = landms[best_truth_idx]
|
194 |
+
landm = encode_landm(matches_landm, priors, variances)
|
195 |
+
loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
|
196 |
+
conf_t[idx] = conf # [num_priors] top class label for each prior
|
197 |
+
landm_t[idx] = landm
|
198 |
+
|
199 |
+
|
200 |
+
def encode(matched, priors, variances):
|
201 |
+
"""Encode the variances from the priorbox layers into the ground truth boxes
|
202 |
+
we have matched (based on jaccard overlap) with the prior boxes.
|
203 |
+
Args:
|
204 |
+
matched: (tensor) Coords of ground truth for each prior in point-form
|
205 |
+
Shape: [num_priors, 4].
|
206 |
+
priors: (tensor) Prior boxes in center-offset form
|
207 |
+
Shape: [num_priors,4].
|
208 |
+
variances: (list[float]) Variances of priorboxes
|
209 |
+
Return:
|
210 |
+
encoded boxes (tensor), Shape: [num_priors, 4]
|
211 |
+
"""
|
212 |
+
|
213 |
+
# dist b/t match center and prior's center
|
214 |
+
g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
|
215 |
+
# encode variance
|
216 |
+
g_cxcy /= (variances[0] * priors[:, 2:])
|
217 |
+
# match wh / prior wh
|
218 |
+
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
|
219 |
+
g_wh = torch.log(g_wh) / variances[1]
|
220 |
+
# return target for smooth_l1_loss
|
221 |
+
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
|
222 |
+
|
223 |
+
|
224 |
+
def encode_landm(matched, priors, variances):
|
225 |
+
"""Encode the variances from the priorbox layers into the ground truth boxes
|
226 |
+
we have matched (based on jaccard overlap) with the prior boxes.
|
227 |
+
Args:
|
228 |
+
matched: (tensor) Coords of ground truth for each prior in point-form
|
229 |
+
Shape: [num_priors, 10].
|
230 |
+
priors: (tensor) Prior boxes in center-offset form
|
231 |
+
Shape: [num_priors,4].
|
232 |
+
variances: (list[float]) Variances of priorboxes
|
233 |
+
Return:
|
234 |
+
encoded landm (tensor), Shape: [num_priors, 10]
|
235 |
+
"""
|
236 |
+
|
237 |
+
# dist b/t match center and prior's center
|
238 |
+
matched = torch.reshape(matched, (matched.size(0), 5, 2))
|
239 |
+
priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
|
240 |
+
priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
|
241 |
+
priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
|
242 |
+
priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
|
243 |
+
priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
|
244 |
+
g_cxcy = matched[:, :, :2] - priors[:, :, :2]
|
245 |
+
# encode variance
|
246 |
+
g_cxcy /= (variances[0] * priors[:, :, 2:])
|
247 |
+
# g_cxcy /= priors[:, :, 2:]
|
248 |
+
g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
|
249 |
+
# return target for smooth_l1_loss
|
250 |
+
return g_cxcy
|
251 |
+
|
252 |
+
|
253 |
+
# Adapted from https://github.com/Hakuyume/chainer-ssd
|
254 |
+
def decode(loc, priors, variances):
|
255 |
+
"""Decode locations from predictions using priors to undo
|
256 |
+
the encoding we did for offset regression at train time.
|
257 |
+
Args:
|
258 |
+
loc (tensor): location predictions for loc layers,
|
259 |
+
Shape: [num_priors,4]
|
260 |
+
priors (tensor): Prior boxes in center-offset form.
|
261 |
+
Shape: [num_priors,4].
|
262 |
+
variances: (list[float]) Variances of priorboxes
|
263 |
+
Return:
|
264 |
+
decoded bounding box predictions
|
265 |
+
"""
|
266 |
+
|
267 |
+
boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
|
268 |
+
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
|
269 |
+
boxes[:, :2] -= boxes[:, 2:] / 2
|
270 |
+
boxes[:, 2:] += boxes[:, :2]
|
271 |
+
return boxes
|
272 |
+
|
273 |
+
|
274 |
+
def decode_landm(pre, priors, variances):
|
275 |
+
"""Decode landm from predictions using priors to undo
|
276 |
+
the encoding we did for offset regression at train time.
|
277 |
+
Args:
|
278 |
+
pre (tensor): landm predictions for loc layers,
|
279 |
+
Shape: [num_priors,10]
|
280 |
+
priors (tensor): Prior boxes in center-offset form.
|
281 |
+
Shape: [num_priors,4].
|
282 |
+
variances: (list[float]) Variances of priorboxes
|
283 |
+
Return:
|
284 |
+
decoded landm predictions
|
285 |
+
"""
|
286 |
+
tmp = (
|
287 |
+
priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
|
288 |
+
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
|
289 |
+
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
|
290 |
+
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
|
291 |
+
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
|
292 |
+
)
|
293 |
+
landms = torch.cat(tmp, dim=1)
|
294 |
+
return landms
|
295 |
+
|
296 |
+
|
297 |
+
def batched_decode(b_loc, priors, variances):
|
298 |
+
"""Decode locations from predictions using priors to undo
|
299 |
+
the encoding we did for offset regression at train time.
|
300 |
+
Args:
|
301 |
+
b_loc (tensor): location predictions for loc layers,
|
302 |
+
Shape: [num_batches,num_priors,4]
|
303 |
+
priors (tensor): Prior boxes in center-offset form.
|
304 |
+
Shape: [1,num_priors,4].
|
305 |
+
variances: (list[float]) Variances of priorboxes
|
306 |
+
Return:
|
307 |
+
decoded bounding box predictions
|
308 |
+
"""
|
309 |
+
boxes = (
|
310 |
+
priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
|
311 |
+
priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
|
312 |
+
)
|
313 |
+
boxes = torch.cat(boxes, dim=2)
|
314 |
+
|
315 |
+
boxes[:, :, :2] -= boxes[:, :, 2:] / 2
|
316 |
+
boxes[:, :, 2:] += boxes[:, :, :2]
|
317 |
+
return boxes
|
318 |
+
|
319 |
+
|
320 |
+
def batched_decode_landm(pre, priors, variances):
|
321 |
+
"""Decode landm from predictions using priors to undo
|
322 |
+
the encoding we did for offset regression at train time.
|
323 |
+
Args:
|
324 |
+
pre (tensor): landm predictions for loc layers,
|
325 |
+
Shape: [num_batches,num_priors,10]
|
326 |
+
priors (tensor): Prior boxes in center-offset form.
|
327 |
+
Shape: [1,num_priors,4].
|
328 |
+
variances: (list[float]) Variances of priorboxes
|
329 |
+
Return:
|
330 |
+
decoded landm predictions
|
331 |
+
"""
|
332 |
+
landms = (
|
333 |
+
priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
|
334 |
+
priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
|
335 |
+
priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
|
336 |
+
priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
|
337 |
+
priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
|
338 |
+
)
|
339 |
+
landms = torch.cat(landms, dim=2)
|
340 |
+
return landms
|
341 |
+
|
342 |
+
|
343 |
+
def log_sum_exp(x):
|
344 |
+
"""Utility function for computing log_sum_exp while determining
|
345 |
+
This will be used to determine unaveraged confidence loss across
|
346 |
+
all examples in a batch.
|
347 |
+
Args:
|
348 |
+
x (Variable(tensor)): conf_preds from conf layers
|
349 |
+
"""
|
350 |
+
x_max = x.data.max()
|
351 |
+
return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
|
352 |
+
|
353 |
+
|
354 |
+
# Original author: Francisco Massa:
|
355 |
+
# https://github.com/fmassa/object-detection.torch
|
356 |
+
# Ported to PyTorch by Max deGroot (02/01/2017)
|
357 |
+
def nms(boxes, scores, overlap=0.5, top_k=200):
|
358 |
+
"""Apply non-maximum suppression at test time to avoid detecting too many
|
359 |
+
overlapping bounding boxes for a given object.
|
360 |
+
Args:
|
361 |
+
boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
|
362 |
+
scores: (tensor) The class predscores for the img, Shape:[num_priors].
|
363 |
+
overlap: (float) The overlap thresh for suppressing unnecessary boxes.
|
364 |
+
top_k: (int) The Maximum number of box preds to consider.
|
365 |
+
Return:
|
366 |
+
The indices of the kept boxes with respect to num_priors.
|
367 |
+
"""
|
368 |
+
|
369 |
+
keep = torch.Tensor(scores.size(0)).fill_(0).long()
|
370 |
+
if boxes.numel() == 0:
|
371 |
+
return keep
|
372 |
+
x1 = boxes[:, 0]
|
373 |
+
y1 = boxes[:, 1]
|
374 |
+
x2 = boxes[:, 2]
|
375 |
+
y2 = boxes[:, 3]
|
376 |
+
area = torch.mul(x2 - x1, y2 - y1)
|
377 |
+
v, idx = scores.sort(0) # sort in ascending order
|
378 |
+
# I = I[v >= 0.01]
|
379 |
+
idx = idx[-top_k:] # indices of the top-k largest vals
|
380 |
+
xx1 = boxes.new()
|
381 |
+
yy1 = boxes.new()
|
382 |
+
xx2 = boxes.new()
|
383 |
+
yy2 = boxes.new()
|
384 |
+
w = boxes.new()
|
385 |
+
h = boxes.new()
|
386 |
+
|
387 |
+
# keep = torch.Tensor()
|
388 |
+
count = 0
|
389 |
+
while idx.numel() > 0:
|
390 |
+
i = idx[-1] # index of current largest val
|
391 |
+
# keep.append(i)
|
392 |
+
keep[count] = i
|
393 |
+
count += 1
|
394 |
+
if idx.size(0) == 1:
|
395 |
+
break
|
396 |
+
idx = idx[:-1] # remove kept element from view
|
397 |
+
# load bboxes of next highest vals
|
398 |
+
torch.index_select(x1, 0, idx, out=xx1)
|
399 |
+
torch.index_select(y1, 0, idx, out=yy1)
|
400 |
+
torch.index_select(x2, 0, idx, out=xx2)
|
401 |
+
torch.index_select(y2, 0, idx, out=yy2)
|
402 |
+
# store element-wise max with next highest score
|
403 |
+
xx1 = torch.clamp(xx1, min=x1[i])
|
404 |
+
yy1 = torch.clamp(yy1, min=y1[i])
|
405 |
+
xx2 = torch.clamp(xx2, max=x2[i])
|
406 |
+
yy2 = torch.clamp(yy2, max=y2[i])
|
407 |
+
w.resize_as_(xx2)
|
408 |
+
h.resize_as_(yy2)
|
409 |
+
w = xx2 - xx1
|
410 |
+
h = yy2 - yy1
|
411 |
+
# check sizes of xx1 and xx2.. after each iteration
|
412 |
+
w = torch.clamp(w, min=0.0)
|
413 |
+
h = torch.clamp(h, min=0.0)
|
414 |
+
inter = w * h
|
415 |
+
# IoU = i / (area(a) + area(b) - i)
|
416 |
+
rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
|
417 |
+
union = (rem_areas - inter) + area[i]
|
418 |
+
IoU = inter / union # store result in iou
|
419 |
+
# keep only elements with an IoU <= overlap
|
420 |
+
idx = idx[IoU.le(overlap)]
|
421 |
+
return keep, count
|
r_facelib/detection/yolov5face/__init__.py
ADDED
File without changes
|
r_facelib/detection/yolov5face/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (289 Bytes). View file
|
|
r_facelib/detection/yolov5face/__pycache__/face_detector.cpython-311.pyc
ADDED
Binary file (10.9 kB). View file
|
|
r_facelib/detection/yolov5face/face_detector.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from torch import torch_version
|
8 |
+
|
9 |
+
from r_facelib.detection.yolov5face.models.common import Conv
|
10 |
+
from r_facelib.detection.yolov5face.models.yolo import Model
|
11 |
+
from r_facelib.detection.yolov5face.utils.datasets import letterbox
|
12 |
+
from r_facelib.detection.yolov5face.utils.general import (
|
13 |
+
check_img_size,
|
14 |
+
non_max_suppression_face,
|
15 |
+
scale_coords,
|
16 |
+
scale_coords_landmarks,
|
17 |
+
)
|
18 |
+
|
19 |
+
print(f"Torch version: {torch.__version__}")
|
20 |
+
IS_HIGH_VERSION = torch_version.__version__ >= "1.9.0"
|
21 |
+
|
22 |
+
def isListempty(inList):
|
23 |
+
if isinstance(inList, list): # Is a list
|
24 |
+
return all(map(isListempty, inList))
|
25 |
+
return False # Not a list
|
26 |
+
|
27 |
+
class YoloDetector:
|
28 |
+
def __init__(
|
29 |
+
self,
|
30 |
+
config_name,
|
31 |
+
min_face=10,
|
32 |
+
target_size=None,
|
33 |
+
device='cuda',
|
34 |
+
):
|
35 |
+
"""
|
36 |
+
config_name: name of .yaml config with network configuration from models/ folder.
|
37 |
+
min_face : minimal face size in pixels.
|
38 |
+
target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080.
|
39 |
+
None for original resolution.
|
40 |
+
"""
|
41 |
+
self._class_path = Path(__file__).parent.absolute()
|
42 |
+
self.target_size = target_size
|
43 |
+
self.min_face = min_face
|
44 |
+
self.detector = Model(cfg=config_name)
|
45 |
+
self.device = device
|
46 |
+
|
47 |
+
|
48 |
+
def _preprocess(self, imgs):
|
49 |
+
"""
|
50 |
+
Preprocessing image before passing through the network. Resize and conversion to torch tensor.
|
51 |
+
"""
|
52 |
+
pp_imgs = []
|
53 |
+
for img in imgs:
|
54 |
+
h0, w0 = img.shape[:2] # orig hw
|
55 |
+
if self.target_size:
|
56 |
+
r = self.target_size / min(h0, w0) # resize image to img_size
|
57 |
+
if r < 1:
|
58 |
+
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR)
|
59 |
+
|
60 |
+
imgsz = check_img_size(max(img.shape[:2]), s=self.detector.stride.max()) # check img_size
|
61 |
+
img = letterbox(img, new_shape=imgsz)[0]
|
62 |
+
pp_imgs.append(img)
|
63 |
+
pp_imgs = np.array(pp_imgs)
|
64 |
+
pp_imgs = pp_imgs.transpose(0, 3, 1, 2)
|
65 |
+
pp_imgs = torch.from_numpy(pp_imgs).to(self.device)
|
66 |
+
pp_imgs = pp_imgs.float() # uint8 to fp16/32
|
67 |
+
return pp_imgs / 255.0 # 0 - 255 to 0.0 - 1.0
|
68 |
+
|
69 |
+
def _postprocess(self, imgs, origimgs, pred, conf_thres, iou_thres):
|
70 |
+
"""
|
71 |
+
Postprocessing of raw pytorch model output.
|
72 |
+
Returns:
|
73 |
+
bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
|
74 |
+
points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
|
75 |
+
"""
|
76 |
+
bboxes = [[] for _ in range(len(origimgs))]
|
77 |
+
landmarks = [[] for _ in range(len(origimgs))]
|
78 |
+
|
79 |
+
pred = non_max_suppression_face(pred, conf_thres, iou_thres)
|
80 |
+
|
81 |
+
for image_id, origimg in enumerate(origimgs):
|
82 |
+
img_shape = origimg.shape
|
83 |
+
image_height, image_width = img_shape[:2]
|
84 |
+
gn = torch.tensor(img_shape)[[1, 0, 1, 0]] # normalization gain whwh
|
85 |
+
gn_lks = torch.tensor(img_shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] # normalization gain landmarks
|
86 |
+
det = pred[image_id].cpu()
|
87 |
+
scale_coords(imgs[image_id].shape[1:], det[:, :4], img_shape).round()
|
88 |
+
scale_coords_landmarks(imgs[image_id].shape[1:], det[:, 5:15], img_shape).round()
|
89 |
+
|
90 |
+
for j in range(det.size()[0]):
|
91 |
+
box = (det[j, :4].view(1, 4) / gn).view(-1).tolist()
|
92 |
+
box = list(
|
93 |
+
map(int, [box[0] * image_width, box[1] * image_height, box[2] * image_width, box[3] * image_height])
|
94 |
+
)
|
95 |
+
if box[3] - box[1] < self.min_face:
|
96 |
+
continue
|
97 |
+
lm = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
|
98 |
+
lm = list(map(int, [i * image_width if j % 2 == 0 else i * image_height for j, i in enumerate(lm)]))
|
99 |
+
lm = [lm[i : i + 2] for i in range(0, len(lm), 2)]
|
100 |
+
bboxes[image_id].append(box)
|
101 |
+
landmarks[image_id].append(lm)
|
102 |
+
return bboxes, landmarks
|
103 |
+
|
104 |
+
def detect_faces(self, imgs, conf_thres=0.7, iou_thres=0.5):
|
105 |
+
"""
|
106 |
+
Get bbox coordinates and keypoints of faces on original image.
|
107 |
+
Params:
|
108 |
+
imgs: image or list of images to detect faces on with BGR order (convert to RGB order for inference)
|
109 |
+
conf_thres: confidence threshold for each prediction
|
110 |
+
iou_thres: threshold for NMS (filter of intersecting bboxes)
|
111 |
+
Returns:
|
112 |
+
bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
|
113 |
+
points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
|
114 |
+
"""
|
115 |
+
# Pass input images through face detector
|
116 |
+
images = imgs if isinstance(imgs, list) else [imgs]
|
117 |
+
images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
|
118 |
+
origimgs = copy.deepcopy(images)
|
119 |
+
|
120 |
+
images = self._preprocess(images)
|
121 |
+
|
122 |
+
if IS_HIGH_VERSION:
|
123 |
+
with torch.inference_mode(): # for pytorch>=1.9
|
124 |
+
pred = self.detector(images)[0]
|
125 |
+
else:
|
126 |
+
with torch.no_grad(): # for pytorch<1.9
|
127 |
+
pred = self.detector(images)[0]
|
128 |
+
|
129 |
+
bboxes, points = self._postprocess(images, origimgs, pred, conf_thres, iou_thres)
|
130 |
+
|
131 |
+
# return bboxes, points
|
132 |
+
if not isListempty(points):
|
133 |
+
bboxes = np.array(bboxes).reshape(-1,4)
|
134 |
+
points = np.array(points).reshape(-1,10)
|
135 |
+
padding = bboxes[:,0].reshape(-1,1)
|
136 |
+
return np.concatenate((bboxes, padding, points), axis=1)
|
137 |
+
else:
|
138 |
+
return None
|
139 |
+
|
140 |
+
def __call__(self, *args):
|
141 |
+
return self.predict(*args)
|
r_facelib/detection/yolov5face/models/__init__.py
ADDED
File without changes
|
r_facelib/detection/yolov5face/models/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (296 Bytes). View file
|
|
r_facelib/detection/yolov5face/models/__pycache__/common.cpython-311.pyc
ADDED
Binary file (25.3 kB). View file
|
|
r_facelib/detection/yolov5face/models/__pycache__/experimental.cpython-311.pyc
ADDED
Binary file (4.83 kB). View file
|
|
r_facelib/detection/yolov5face/models/__pycache__/yolo.cpython-311.pyc
ADDED
Binary file (19.8 kB). View file
|
|
r_facelib/detection/yolov5face/models/common.py
ADDED
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file contains modules common to various models
|
2 |
+
|
3 |
+
import math
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from torch import nn
|
8 |
+
|
9 |
+
from r_facelib.detection.yolov5face.utils.datasets import letterbox
|
10 |
+
from r_facelib.detection.yolov5face.utils.general import (
|
11 |
+
make_divisible,
|
12 |
+
non_max_suppression,
|
13 |
+
scale_coords,
|
14 |
+
xyxy2xywh,
|
15 |
+
)
|
16 |
+
|
17 |
+
|
18 |
+
def autopad(k, p=None): # kernel, padding
|
19 |
+
# Pad to 'same'
|
20 |
+
if p is None:
|
21 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
22 |
+
return p
|
23 |
+
|
24 |
+
|
25 |
+
def channel_shuffle(x, groups):
|
26 |
+
batchsize, num_channels, height, width = x.data.size()
|
27 |
+
channels_per_group = torch.div(num_channels, groups, rounding_mode="trunc")
|
28 |
+
|
29 |
+
# reshape
|
30 |
+
x = x.view(batchsize, groups, channels_per_group, height, width)
|
31 |
+
x = torch.transpose(x, 1, 2).contiguous()
|
32 |
+
|
33 |
+
# flatten
|
34 |
+
return x.view(batchsize, -1, height, width)
|
35 |
+
|
36 |
+
|
37 |
+
def DWConv(c1, c2, k=1, s=1, act=True):
|
38 |
+
# Depthwise convolution
|
39 |
+
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
40 |
+
|
41 |
+
|
42 |
+
class Conv(nn.Module):
|
43 |
+
# Standard convolution
|
44 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
45 |
+
super().__init__()
|
46 |
+
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
47 |
+
self.bn = nn.BatchNorm2d(c2)
|
48 |
+
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
return self.act(self.bn(self.conv(x)))
|
52 |
+
|
53 |
+
def fuseforward(self, x):
|
54 |
+
return self.act(self.conv(x))
|
55 |
+
|
56 |
+
|
57 |
+
class StemBlock(nn.Module):
|
58 |
+
def __init__(self, c1, c2, k=3, s=2, p=None, g=1, act=True):
|
59 |
+
super().__init__()
|
60 |
+
self.stem_1 = Conv(c1, c2, k, s, p, g, act)
|
61 |
+
self.stem_2a = Conv(c2, c2 // 2, 1, 1, 0)
|
62 |
+
self.stem_2b = Conv(c2 // 2, c2, 3, 2, 1)
|
63 |
+
self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
|
64 |
+
self.stem_3 = Conv(c2 * 2, c2, 1, 1, 0)
|
65 |
+
|
66 |
+
def forward(self, x):
|
67 |
+
stem_1_out = self.stem_1(x)
|
68 |
+
stem_2a_out = self.stem_2a(stem_1_out)
|
69 |
+
stem_2b_out = self.stem_2b(stem_2a_out)
|
70 |
+
stem_2p_out = self.stem_2p(stem_1_out)
|
71 |
+
return self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1))
|
72 |
+
|
73 |
+
|
74 |
+
class Bottleneck(nn.Module):
|
75 |
+
# Standard bottleneck
|
76 |
+
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
77 |
+
super().__init__()
|
78 |
+
c_ = int(c2 * e) # hidden channels
|
79 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
80 |
+
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
81 |
+
self.add = shortcut and c1 == c2
|
82 |
+
|
83 |
+
def forward(self, x):
|
84 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
85 |
+
|
86 |
+
|
87 |
+
class BottleneckCSP(nn.Module):
|
88 |
+
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
89 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
90 |
+
super().__init__()
|
91 |
+
c_ = int(c2 * e) # hidden channels
|
92 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
93 |
+
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
94 |
+
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
95 |
+
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
96 |
+
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
97 |
+
self.act = nn.LeakyReLU(0.1, inplace=True)
|
98 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
99 |
+
|
100 |
+
def forward(self, x):
|
101 |
+
y1 = self.cv3(self.m(self.cv1(x)))
|
102 |
+
y2 = self.cv2(x)
|
103 |
+
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
104 |
+
|
105 |
+
|
106 |
+
class C3(nn.Module):
|
107 |
+
# CSP Bottleneck with 3 convolutions
|
108 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
109 |
+
super().__init__()
|
110 |
+
c_ = int(c2 * e) # hidden channels
|
111 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
112 |
+
self.cv2 = Conv(c1, c_, 1, 1)
|
113 |
+
self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
|
114 |
+
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
115 |
+
|
116 |
+
def forward(self, x):
|
117 |
+
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
|
118 |
+
|
119 |
+
|
120 |
+
class ShuffleV2Block(nn.Module):
|
121 |
+
def __init__(self, inp, oup, stride):
|
122 |
+
super().__init__()
|
123 |
+
|
124 |
+
if not 1 <= stride <= 3:
|
125 |
+
raise ValueError("illegal stride value")
|
126 |
+
self.stride = stride
|
127 |
+
|
128 |
+
branch_features = oup // 2
|
129 |
+
|
130 |
+
if self.stride > 1:
|
131 |
+
self.branch1 = nn.Sequential(
|
132 |
+
self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
|
133 |
+
nn.BatchNorm2d(inp),
|
134 |
+
nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
|
135 |
+
nn.BatchNorm2d(branch_features),
|
136 |
+
nn.SiLU(),
|
137 |
+
)
|
138 |
+
else:
|
139 |
+
self.branch1 = nn.Sequential()
|
140 |
+
|
141 |
+
self.branch2 = nn.Sequential(
|
142 |
+
nn.Conv2d(
|
143 |
+
inp if (self.stride > 1) else branch_features,
|
144 |
+
branch_features,
|
145 |
+
kernel_size=1,
|
146 |
+
stride=1,
|
147 |
+
padding=0,
|
148 |
+
bias=False,
|
149 |
+
),
|
150 |
+
nn.BatchNorm2d(branch_features),
|
151 |
+
nn.SiLU(),
|
152 |
+
self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
|
153 |
+
nn.BatchNorm2d(branch_features),
|
154 |
+
nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
|
155 |
+
nn.BatchNorm2d(branch_features),
|
156 |
+
nn.SiLU(),
|
157 |
+
)
|
158 |
+
|
159 |
+
@staticmethod
|
160 |
+
def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
|
161 |
+
return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
|
162 |
+
|
163 |
+
def forward(self, x):
|
164 |
+
if self.stride == 1:
|
165 |
+
x1, x2 = x.chunk(2, dim=1)
|
166 |
+
out = torch.cat((x1, self.branch2(x2)), dim=1)
|
167 |
+
else:
|
168 |
+
out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
|
169 |
+
out = channel_shuffle(out, 2)
|
170 |
+
return out
|
171 |
+
|
172 |
+
|
173 |
+
class SPP(nn.Module):
|
174 |
+
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
175 |
+
def __init__(self, c1, c2, k=(5, 9, 13)):
|
176 |
+
super().__init__()
|
177 |
+
c_ = c1 // 2 # hidden channels
|
178 |
+
self.cv1 = Conv(c1, c_, 1, 1)
|
179 |
+
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
180 |
+
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
181 |
+
|
182 |
+
def forward(self, x):
|
183 |
+
x = self.cv1(x)
|
184 |
+
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
185 |
+
|
186 |
+
|
187 |
+
class Focus(nn.Module):
|
188 |
+
# Focus wh information into c-space
|
189 |
+
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
190 |
+
super().__init__()
|
191 |
+
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
192 |
+
|
193 |
+
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
194 |
+
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
195 |
+
|
196 |
+
|
197 |
+
class Concat(nn.Module):
|
198 |
+
# Concatenate a list of tensors along dimension
|
199 |
+
def __init__(self, dimension=1):
|
200 |
+
super().__init__()
|
201 |
+
self.d = dimension
|
202 |
+
|
203 |
+
def forward(self, x):
|
204 |
+
return torch.cat(x, self.d)
|
205 |
+
|
206 |
+
|
207 |
+
class NMS(nn.Module):
|
208 |
+
# Non-Maximum Suppression (NMS) module
|
209 |
+
conf = 0.25 # confidence threshold
|
210 |
+
iou = 0.45 # IoU threshold
|
211 |
+
classes = None # (optional list) filter by class
|
212 |
+
|
213 |
+
def forward(self, x):
|
214 |
+
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
|
215 |
+
|
216 |
+
|
217 |
+
class AutoShape(nn.Module):
|
218 |
+
# input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
|
219 |
+
img_size = 640 # inference size (pixels)
|
220 |
+
conf = 0.25 # NMS confidence threshold
|
221 |
+
iou = 0.45 # NMS IoU threshold
|
222 |
+
classes = None # (optional list) filter by class
|
223 |
+
|
224 |
+
def __init__(self, model):
|
225 |
+
super().__init__()
|
226 |
+
self.model = model.eval()
|
227 |
+
|
228 |
+
def autoshape(self):
|
229 |
+
print("autoShape already enabled, skipping... ") # model already converted to model.autoshape()
|
230 |
+
return self
|
231 |
+
|
232 |
+
def forward(self, imgs, size=640, augment=False, profile=False):
|
233 |
+
# Inference from various sources. For height=720, width=1280, RGB images example inputs are:
|
234 |
+
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
|
235 |
+
# PIL: = Image.open('image.jpg') # HWC x(720,1280,3)
|
236 |
+
# numpy: = np.zeros((720,1280,3)) # HWC
|
237 |
+
# torch: = torch.zeros(16,3,720,1280) # BCHW
|
238 |
+
# multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
|
239 |
+
|
240 |
+
p = next(self.model.parameters()) # for device and type
|
241 |
+
if isinstance(imgs, torch.Tensor): # torch
|
242 |
+
return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
|
243 |
+
|
244 |
+
# Pre-process
|
245 |
+
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
|
246 |
+
shape0, shape1 = [], [] # image and inference shapes
|
247 |
+
for i, im in enumerate(imgs):
|
248 |
+
im = np.array(im) # to numpy
|
249 |
+
if im.shape[0] < 5: # image in CHW
|
250 |
+
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
|
251 |
+
im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
|
252 |
+
s = im.shape[:2] # HWC
|
253 |
+
shape0.append(s) # image shape
|
254 |
+
g = size / max(s) # gain
|
255 |
+
shape1.append([y * g for y in s])
|
256 |
+
imgs[i] = im # update
|
257 |
+
shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
|
258 |
+
x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
|
259 |
+
x = np.stack(x, 0) if n > 1 else x[0][None] # stack
|
260 |
+
x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
|
261 |
+
x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0 # uint8 to fp16/32
|
262 |
+
|
263 |
+
# Inference
|
264 |
+
with torch.no_grad():
|
265 |
+
y = self.model(x, augment, profile)[0] # forward
|
266 |
+
y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
|
267 |
+
|
268 |
+
# Post-process
|
269 |
+
for i in range(n):
|
270 |
+
scale_coords(shape1, y[i][:, :4], shape0[i])
|
271 |
+
|
272 |
+
return Detections(imgs, y, self.names)
|
273 |
+
|
274 |
+
|
275 |
+
class Detections:
|
276 |
+
# detections class for YOLOv5 inference results
|
277 |
+
def __init__(self, imgs, pred, names=None):
|
278 |
+
super().__init__()
|
279 |
+
d = pred[0].device # device
|
280 |
+
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1.0, 1.0], device=d) for im in imgs] # normalizations
|
281 |
+
self.imgs = imgs # list of images as numpy arrays
|
282 |
+
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
|
283 |
+
self.names = names # class names
|
284 |
+
self.xyxy = pred # xyxy pixels
|
285 |
+
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
|
286 |
+
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
|
287 |
+
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
|
288 |
+
self.n = len(self.pred)
|
289 |
+
|
290 |
+
def __len__(self):
|
291 |
+
return self.n
|
292 |
+
|
293 |
+
def tolist(self):
|
294 |
+
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
295 |
+
x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
|
296 |
+
for d in x:
|
297 |
+
for k in ["imgs", "pred", "xyxy", "xyxyn", "xywh", "xywhn"]:
|
298 |
+
setattr(d, k, getattr(d, k)[0]) # pop out of list
|
299 |
+
return x
|
r_facelib/detection/yolov5face/models/experimental.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# # This file contains experimental modules
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
from torch import nn
|
6 |
+
|
7 |
+
from r_facelib.detection.yolov5face.models.common import Conv
|
8 |
+
|
9 |
+
|
10 |
+
class CrossConv(nn.Module):
|
11 |
+
# Cross Convolution Downsample
|
12 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
13 |
+
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
14 |
+
super().__init__()
|
15 |
+
c_ = int(c2 * e) # hidden channels
|
16 |
+
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
17 |
+
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
18 |
+
self.add = shortcut and c1 == c2
|
19 |
+
|
20 |
+
def forward(self, x):
|
21 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
22 |
+
|
23 |
+
|
24 |
+
class MixConv2d(nn.Module):
|
25 |
+
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
26 |
+
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
27 |
+
super().__init__()
|
28 |
+
groups = len(k)
|
29 |
+
if equal_ch: # equal c_ per group
|
30 |
+
i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices
|
31 |
+
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
32 |
+
else: # equal weight.numel() per group
|
33 |
+
b = [c2] + [0] * groups
|
34 |
+
a = np.eye(groups + 1, groups, k=-1)
|
35 |
+
a -= np.roll(a, 1, axis=1)
|
36 |
+
a *= np.array(k) ** 2
|
37 |
+
a[0] = 1
|
38 |
+
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
39 |
+
|
40 |
+
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
41 |
+
self.bn = nn.BatchNorm2d(c2)
|
42 |
+
self.act = nn.LeakyReLU(0.1, inplace=True)
|
43 |
+
|
44 |
+
def forward(self, x):
|
45 |
+
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
r_facelib/detection/yolov5face/models/yolo.py
ADDED
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
from copy import deepcopy
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import yaml # for torch hub
|
7 |
+
from torch import nn
|
8 |
+
|
9 |
+
from r_facelib.detection.yolov5face.models.common import (
|
10 |
+
C3,
|
11 |
+
NMS,
|
12 |
+
SPP,
|
13 |
+
AutoShape,
|
14 |
+
Bottleneck,
|
15 |
+
BottleneckCSP,
|
16 |
+
Concat,
|
17 |
+
Conv,
|
18 |
+
DWConv,
|
19 |
+
Focus,
|
20 |
+
ShuffleV2Block,
|
21 |
+
StemBlock,
|
22 |
+
)
|
23 |
+
from r_facelib.detection.yolov5face.models.experimental import CrossConv, MixConv2d
|
24 |
+
from r_facelib.detection.yolov5face.utils.autoanchor import check_anchor_order
|
25 |
+
from r_facelib.detection.yolov5face.utils.general import make_divisible
|
26 |
+
from r_facelib.detection.yolov5face.utils.torch_utils import copy_attr, fuse_conv_and_bn
|
27 |
+
|
28 |
+
|
29 |
+
class Detect(nn.Module):
|
30 |
+
stride = None # strides computed during build
|
31 |
+
export = False # onnx export
|
32 |
+
|
33 |
+
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
|
34 |
+
super().__init__()
|
35 |
+
self.nc = nc # number of classes
|
36 |
+
self.no = nc + 5 + 10 # number of outputs per anchor
|
37 |
+
|
38 |
+
self.nl = len(anchors) # number of detection layers
|
39 |
+
self.na = len(anchors[0]) // 2 # number of anchors
|
40 |
+
self.grid = [torch.zeros(1)] * self.nl # init grid
|
41 |
+
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
|
42 |
+
self.register_buffer("anchors", a) # shape(nl,na,2)
|
43 |
+
self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
44 |
+
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
45 |
+
|
46 |
+
def forward(self, x):
|
47 |
+
z = [] # inference output
|
48 |
+
if self.export:
|
49 |
+
for i in range(self.nl):
|
50 |
+
x[i] = self.m[i](x[i])
|
51 |
+
return x
|
52 |
+
for i in range(self.nl):
|
53 |
+
x[i] = self.m[i](x[i]) # conv
|
54 |
+
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
55 |
+
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
56 |
+
|
57 |
+
if not self.training: # inference
|
58 |
+
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
59 |
+
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
60 |
+
|
61 |
+
y = torch.full_like(x[i], 0)
|
62 |
+
y[..., [0, 1, 2, 3, 4, 15]] = x[i][..., [0, 1, 2, 3, 4, 15]].sigmoid()
|
63 |
+
y[..., 5:15] = x[i][..., 5:15]
|
64 |
+
|
65 |
+
y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
66 |
+
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
67 |
+
|
68 |
+
y[..., 5:7] = (
|
69 |
+
y[..., 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
|
70 |
+
) # landmark x1 y1
|
71 |
+
y[..., 7:9] = (
|
72 |
+
y[..., 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
|
73 |
+
) # landmark x2 y2
|
74 |
+
y[..., 9:11] = (
|
75 |
+
y[..., 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
|
76 |
+
) # landmark x3 y3
|
77 |
+
y[..., 11:13] = (
|
78 |
+
y[..., 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
|
79 |
+
) # landmark x4 y4
|
80 |
+
y[..., 13:15] = (
|
81 |
+
y[..., 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
|
82 |
+
) # landmark x5 y5
|
83 |
+
|
84 |
+
z.append(y.view(bs, -1, self.no))
|
85 |
+
|
86 |
+
return x if self.training else (torch.cat(z, 1), x)
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def _make_grid(nx=20, ny=20):
|
90 |
+
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij") # for pytorch>=1.10
|
91 |
+
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
92 |
+
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
93 |
+
|
94 |
+
|
95 |
+
class Model(nn.Module):
|
96 |
+
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None): # model, input channels, number of classes
|
97 |
+
super().__init__()
|
98 |
+
self.yaml_file = Path(cfg).name
|
99 |
+
with Path(cfg).open(encoding="utf8") as f:
|
100 |
+
self.yaml = yaml.safe_load(f) # model dict
|
101 |
+
|
102 |
+
# Define model
|
103 |
+
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
|
104 |
+
if nc and nc != self.yaml["nc"]:
|
105 |
+
self.yaml["nc"] = nc # override yaml value
|
106 |
+
|
107 |
+
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
|
108 |
+
self.names = [str(i) for i in range(self.yaml["nc"])] # default names
|
109 |
+
|
110 |
+
# Build strides, anchors
|
111 |
+
m = self.model[-1] # Detect()
|
112 |
+
if isinstance(m, Detect):
|
113 |
+
s = 128 # 2x min stride
|
114 |
+
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
115 |
+
m.anchors /= m.stride.view(-1, 1, 1)
|
116 |
+
check_anchor_order(m)
|
117 |
+
self.stride = m.stride
|
118 |
+
self._initialize_biases() # only run once
|
119 |
+
|
120 |
+
def forward(self, x):
|
121 |
+
return self.forward_once(x) # single-scale inference, train
|
122 |
+
|
123 |
+
def forward_once(self, x):
|
124 |
+
y = [] # outputs
|
125 |
+
for m in self.model:
|
126 |
+
if m.f != -1: # if not from previous layer
|
127 |
+
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
128 |
+
|
129 |
+
x = m(x) # run
|
130 |
+
y.append(x if m.i in self.save else None) # save output
|
131 |
+
|
132 |
+
return x
|
133 |
+
|
134 |
+
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
135 |
+
# https://arxiv.org/abs/1708.02002 section 3.3
|
136 |
+
m = self.model[-1] # Detect() module
|
137 |
+
for mi, s in zip(m.m, m.stride): # from
|
138 |
+
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
139 |
+
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
140 |
+
b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
141 |
+
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
142 |
+
|
143 |
+
def _print_biases(self):
|
144 |
+
m = self.model[-1] # Detect() module
|
145 |
+
for mi in m.m: # from
|
146 |
+
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
147 |
+
print(("%6g Conv2d.bias:" + "%10.3g" * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
148 |
+
|
149 |
+
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
150 |
+
print("Fusing layers... ")
|
151 |
+
for m in self.model.modules():
|
152 |
+
if isinstance(m, Conv) and hasattr(m, "bn"):
|
153 |
+
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
154 |
+
delattr(m, "bn") # remove batchnorm
|
155 |
+
m.forward = m.fuseforward # update forward
|
156 |
+
elif type(m) is nn.Upsample:
|
157 |
+
m.recompute_scale_factor = None # torch 1.11.0 compatibility
|
158 |
+
return self
|
159 |
+
|
160 |
+
def nms(self, mode=True): # add or remove NMS module
|
161 |
+
present = isinstance(self.model[-1], NMS) # last layer is NMS
|
162 |
+
if mode and not present:
|
163 |
+
print("Adding NMS... ")
|
164 |
+
m = NMS() # module
|
165 |
+
m.f = -1 # from
|
166 |
+
m.i = self.model[-1].i + 1 # index
|
167 |
+
self.model.add_module(name=str(m.i), module=m) # add
|
168 |
+
self.eval()
|
169 |
+
elif not mode and present:
|
170 |
+
print("Removing NMS... ")
|
171 |
+
self.model = self.model[:-1] # remove
|
172 |
+
return self
|
173 |
+
|
174 |
+
def autoshape(self): # add autoShape module
|
175 |
+
print("Adding autoShape... ")
|
176 |
+
m = AutoShape(self) # wrap model
|
177 |
+
copy_attr(m, self, include=("yaml", "nc", "hyp", "names", "stride"), exclude=()) # copy attributes
|
178 |
+
return m
|
179 |
+
|
180 |
+
|
181 |
+
def parse_model(d, ch): # model_dict, input_channels(3)
|
182 |
+
anchors, nc, gd, gw = d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"]
|
183 |
+
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
184 |
+
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
185 |
+
|
186 |
+
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
187 |
+
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
|
188 |
+
m = eval(m) if isinstance(m, str) else m # eval strings
|
189 |
+
for j, a in enumerate(args):
|
190 |
+
try:
|
191 |
+
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
192 |
+
except:
|
193 |
+
pass
|
194 |
+
|
195 |
+
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
196 |
+
if m in [
|
197 |
+
Conv,
|
198 |
+
Bottleneck,
|
199 |
+
SPP,
|
200 |
+
DWConv,
|
201 |
+
MixConv2d,
|
202 |
+
Focus,
|
203 |
+
CrossConv,
|
204 |
+
BottleneckCSP,
|
205 |
+
C3,
|
206 |
+
ShuffleV2Block,
|
207 |
+
StemBlock,
|
208 |
+
]:
|
209 |
+
c1, c2 = ch[f], args[0]
|
210 |
+
|
211 |
+
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
|
212 |
+
|
213 |
+
args = [c1, c2, *args[1:]]
|
214 |
+
if m in [BottleneckCSP, C3]:
|
215 |
+
args.insert(2, n)
|
216 |
+
n = 1
|
217 |
+
elif m is nn.BatchNorm2d:
|
218 |
+
args = [ch[f]]
|
219 |
+
elif m is Concat:
|
220 |
+
c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
|
221 |
+
elif m is Detect:
|
222 |
+
args.append([ch[x + 1] for x in f])
|
223 |
+
if isinstance(args[1], int): # number of anchors
|
224 |
+
args[1] = [list(range(args[1] * 2))] * len(f)
|
225 |
+
else:
|
226 |
+
c2 = ch[f]
|
227 |
+
|
228 |
+
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
|
229 |
+
t = str(m)[8:-2].replace("__main__.", "") # module type
|
230 |
+
np = sum(x.numel() for x in m_.parameters()) # number params
|
231 |
+
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
232 |
+
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
233 |
+
layers.append(m_)
|
234 |
+
ch.append(c2)
|
235 |
+
return nn.Sequential(*layers), sorted(save)
|
r_facelib/detection/yolov5face/models/yolov5l.yaml
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# parameters
|
2 |
+
nc: 1 # number of classes
|
3 |
+
depth_multiple: 1.0 # model depth multiple
|
4 |
+
width_multiple: 1.0 # layer channel multiple
|
5 |
+
|
6 |
+
# anchors
|
7 |
+
anchors:
|
8 |
+
- [4,5, 8,10, 13,16] # P3/8
|
9 |
+
- [23,29, 43,55, 73,105] # P4/16
|
10 |
+
- [146,217, 231,300, 335,433] # P5/32
|
11 |
+
|
12 |
+
# YOLOv5 backbone
|
13 |
+
backbone:
|
14 |
+
# [from, number, module, args]
|
15 |
+
[[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
|
16 |
+
[-1, 3, C3, [128]],
|
17 |
+
[-1, 1, Conv, [256, 3, 2]], # 2-P3/8
|
18 |
+
[-1, 9, C3, [256]],
|
19 |
+
[-1, 1, Conv, [512, 3, 2]], # 4-P4/16
|
20 |
+
[-1, 9, C3, [512]],
|
21 |
+
[-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
|
22 |
+
[-1, 1, SPP, [1024, [3,5,7]]],
|
23 |
+
[-1, 3, C3, [1024, False]], # 8
|
24 |
+
]
|
25 |
+
|
26 |
+
# YOLOv5 head
|
27 |
+
head:
|
28 |
+
[[-1, 1, Conv, [512, 1, 1]],
|
29 |
+
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
30 |
+
[[-1, 5], 1, Concat, [1]], # cat backbone P4
|
31 |
+
[-1, 3, C3, [512, False]], # 12
|
32 |
+
|
33 |
+
[-1, 1, Conv, [256, 1, 1]],
|
34 |
+
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
35 |
+
[[-1, 3], 1, Concat, [1]], # cat backbone P3
|
36 |
+
[-1, 3, C3, [256, False]], # 16 (P3/8-small)
|
37 |
+
|
38 |
+
[-1, 1, Conv, [256, 3, 2]],
|
39 |
+
[[-1, 13], 1, Concat, [1]], # cat head P4
|
40 |
+
[-1, 3, C3, [512, False]], # 19 (P4/16-medium)
|
41 |
+
|
42 |
+
[-1, 1, Conv, [512, 3, 2]],
|
43 |
+
[[-1, 9], 1, Concat, [1]], # cat head P5
|
44 |
+
[-1, 3, C3, [1024, False]], # 22 (P5/32-large)
|
45 |
+
|
46 |
+
[[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
47 |
+
]
|