ajithradnus commited on
Commit
09cc77a
·
verified ·
1 Parent(s): 4e3a10f

Upload 108 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/ISSUE_TEMPLATE/bug_report.yml +56 -0
  2. .github/ISSUE_TEMPLATE/config.yml +5 -0
  3. .github/ISSUE_TEMPLATE/feature_request.yml +16 -0
  4. .github/workflows/publish_action.yml +20 -0
  5. modules/__init__.py +0 -0
  6. modules/__pycache__/__init__.cpython-311.pyc +0 -0
  7. modules/__pycache__/processing.cpython-311.pyc +0 -0
  8. modules/__pycache__/scripts.cpython-311.pyc +0 -0
  9. modules/__pycache__/scripts_postprocessing.cpython-311.pyc +0 -0
  10. modules/__pycache__/shared.cpython-311.pyc +0 -0
  11. modules/images.py +0 -0
  12. modules/processing.py +13 -0
  13. modules/scripts.py +13 -0
  14. modules/scripts_postprocessing.py +0 -0
  15. modules/shared.py +19 -0
  16. r_chainner/__pycache__/model_loading.cpython-311.pyc +0 -0
  17. r_chainner/__pycache__/types.cpython-311.pyc +0 -0
  18. r_chainner/archs/face/__pycache__/gfpganv1_clean_arch.cpython-311.pyc +0 -0
  19. r_chainner/archs/face/__pycache__/stylegan2_clean_arch.cpython-311.pyc +0 -0
  20. r_chainner/archs/face/gfpganv1_clean_arch.py +370 -0
  21. r_chainner/archs/face/stylegan2_clean_arch.py +453 -0
  22. r_chainner/model_loading.py +28 -0
  23. r_chainner/types.py +18 -0
  24. r_facelib/__init__.py +0 -0
  25. r_facelib/__pycache__/__init__.cpython-311.pyc +0 -0
  26. r_facelib/detection/__init__.py +102 -0
  27. r_facelib/detection/__pycache__/__init__.cpython-311.pyc +0 -0
  28. r_facelib/detection/__pycache__/align_trans.cpython-311.pyc +0 -0
  29. r_facelib/detection/__pycache__/matlab_cp2tform.cpython-311.pyc +0 -0
  30. r_facelib/detection/align_trans.py +219 -0
  31. r_facelib/detection/matlab_cp2tform.py +317 -0
  32. r_facelib/detection/retinaface/__pycache__/retinaface.cpython-311.pyc +0 -0
  33. r_facelib/detection/retinaface/__pycache__/retinaface_net.cpython-311.pyc +0 -0
  34. r_facelib/detection/retinaface/__pycache__/retinaface_utils.cpython-311.pyc +0 -0
  35. r_facelib/detection/retinaface/retinaface.py +389 -0
  36. r_facelib/detection/retinaface/retinaface_net.py +196 -0
  37. r_facelib/detection/retinaface/retinaface_utils.py +421 -0
  38. r_facelib/detection/yolov5face/__init__.py +0 -0
  39. r_facelib/detection/yolov5face/__pycache__/__init__.cpython-311.pyc +0 -0
  40. r_facelib/detection/yolov5face/__pycache__/face_detector.cpython-311.pyc +0 -0
  41. r_facelib/detection/yolov5face/face_detector.py +141 -0
  42. r_facelib/detection/yolov5face/models/__init__.py +0 -0
  43. r_facelib/detection/yolov5face/models/__pycache__/__init__.cpython-311.pyc +0 -0
  44. r_facelib/detection/yolov5face/models/__pycache__/common.cpython-311.pyc +0 -0
  45. r_facelib/detection/yolov5face/models/__pycache__/experimental.cpython-311.pyc +0 -0
  46. r_facelib/detection/yolov5face/models/__pycache__/yolo.cpython-311.pyc +0 -0
  47. r_facelib/detection/yolov5face/models/common.py +299 -0
  48. r_facelib/detection/yolov5face/models/experimental.py +45 -0
  49. r_facelib/detection/yolov5face/models/yolo.py +235 -0
  50. r_facelib/detection/yolov5face/models/yolov5l.yaml +47 -0
.github/ISSUE_TEMPLATE/bug_report.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug Report
2
+ description: You think somethings is broken
3
+ labels: ["bug", "new"]
4
+
5
+ body:
6
+ - type: checkboxes
7
+ attributes:
8
+ label: First, confirm
9
+ description: Make sure you use the latest version of the ReActor extension and you have already searched to see if an issue already exists for the bug you encountered before you create a new Issue.
10
+ options:
11
+ - label: I have read the [instruction](https://github.com/Gourieff/comfyui-reactor-node/blob/main/README.md) carefully
12
+ required: true
13
+ - label: I have searched the existing issues
14
+ required: true
15
+ - label: I have updated the extension to the latest version
16
+ required: true
17
+ - type: markdown
18
+ attributes:
19
+ value: |
20
+ *Please fill this form with as much information as possible and *provide screenshots if possible**
21
+ - type: textarea
22
+ id: what-did
23
+ attributes:
24
+ label: What happened?
25
+ description: Tell what happened in a very clear and simple way
26
+ validations:
27
+ required: true
28
+ - type: textarea
29
+ id: steps
30
+ attributes:
31
+ label: Steps to reproduce the problem
32
+ description: Please provide with precise step by step instructions on how to reproduce the bug
33
+ value: |
34
+ Your workflow
35
+ validations:
36
+ required: true
37
+ - type: textarea
38
+ id: sysinfo
39
+ attributes:
40
+ label: Sysinfo
41
+ description: Describe your platform. OS, browser, GPU, what other nodes are also enabled.
42
+ validations:
43
+ required: true
44
+ - type: textarea
45
+ id: logs
46
+ attributes:
47
+ label: Relevant console log
48
+ description: Please provide cmd/terminal logs from the moment you started UI to the momemt you got an error. This will be automatically formatted into code, so no need for backticks.
49
+ render: Shell
50
+ validations:
51
+ required: true
52
+ - type: textarea
53
+ id: misc
54
+ attributes:
55
+ label: Additional information
56
+ description: Please provide with any relevant additional info or context.
.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: ReActor Node Community Support
4
+ url: https://github.com/Gourieff/comfyui-reactor-node/discussions
5
+ about: Please ask and answer questions here.
.github/ISSUE_TEMPLATE/feature_request.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Feature request
2
+ description: Suggest an idea for this project
3
+ title: "[Feature]: "
4
+ labels: ["enhancement", "new"]
5
+
6
+ body:
7
+ - type: textarea
8
+ id: description
9
+ attributes:
10
+ label: Feature description
11
+ description: Describe the feature in a clear and simple way
12
+ value:
13
+ - type: markdown
14
+ attributes:
15
+ value: |
16
+ The best way to propose an idea is to start a new discussion via the [Discussions](https://github.com/Gourieff/comfyui-reactor-node/discussions) section (choose the "Idea" category)
.github/workflows/publish_action.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish to Comfy registry
2
+ on:
3
+ workflow_dispatch:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - "pyproject.toml"
9
+
10
+ jobs:
11
+ publish-node:
12
+ name: Publish Custom Node to registry
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Check out code
16
+ uses: actions/checkout@v4
17
+ - name: Publish Custom Node
18
+ uses: Comfy-Org/publish-node-action@main
19
+ with:
20
+ personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} ## Add your own personal access token to your Github secrets and reference it here.
modules/__init__.py ADDED
File without changes
modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (266 Bytes). View file
 
modules/__pycache__/processing.cpython-311.pyc ADDED
Binary file (1.43 kB). View file
 
modules/__pycache__/scripts.cpython-311.pyc ADDED
Binary file (1.07 kB). View file
 
modules/__pycache__/scripts_postprocessing.cpython-311.pyc ADDED
Binary file (280 Bytes). View file
 
modules/__pycache__/shared.cpython-311.pyc ADDED
Binary file (1.13 kB). View file
 
modules/images.py ADDED
File without changes
modules/processing.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class StableDiffusionProcessing:
2
+
3
+ def __init__(self, init_imgs):
4
+ self.init_images = init_imgs
5
+ self.width = init_imgs[0].width
6
+ self.height = init_imgs[0].height
7
+ self.extra_generation_params = {}
8
+
9
+
10
+ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
11
+
12
+ def __init__(self, init_img):
13
+ super().__init__(init_img)
modules/scripts.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ class Script:
5
+ pass
6
+
7
+
8
+ def basedir():
9
+ return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+
11
+
12
+ class PostprocessImageArgs:
13
+ pass
modules/scripts_postprocessing.py ADDED
File without changes
modules/shared.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Options:
2
+ img2img_background_color = "#ffffff" # Set to white for now
3
+
4
+
5
+ class State:
6
+ interrupted = False
7
+
8
+ def begin(self):
9
+ pass
10
+
11
+ def end(self):
12
+ pass
13
+
14
+
15
+ opts = Options()
16
+ state = State()
17
+ cmd_opts = None
18
+ sd_upscalers = []
19
+ face_restorers = []
r_chainner/__pycache__/model_loading.cpython-311.pyc ADDED
Binary file (1.37 kB). View file
 
r_chainner/__pycache__/types.cpython-311.pyc ADDED
Binary file (1.01 kB). View file
 
r_chainner/archs/face/__pycache__/gfpganv1_clean_arch.cpython-311.pyc ADDED
Binary file (17.7 kB). View file
 
r_chainner/archs/face/__pycache__/stylegan2_clean_arch.cpython-311.pyc ADDED
Binary file (22.1 kB). View file
 
r_chainner/archs/face/gfpganv1_clean_arch.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: skip-file
2
+ # type: ignore
3
+ import math
4
+ import random
5
+
6
+ import torch
7
+ from torch import nn
8
+ from torch.nn import functional as F
9
+
10
+ from r_chainner.archs.face.stylegan2_clean_arch import StyleGAN2GeneratorClean
11
+
12
+
13
+ class StyleGAN2GeneratorCSFT(StyleGAN2GeneratorClean):
14
+ """StyleGAN2 Generator with SFT modulation (Spatial Feature Transform).
15
+ It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
16
+ Args:
17
+ out_size (int): The spatial size of outputs.
18
+ num_style_feat (int): Channel number of style features. Default: 512.
19
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
20
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
21
+ narrow (float): The narrow ratio for channels. Default: 1.
22
+ sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ out_size,
28
+ num_style_feat=512,
29
+ num_mlp=8,
30
+ channel_multiplier=2,
31
+ narrow=1,
32
+ sft_half=False,
33
+ ):
34
+ super(StyleGAN2GeneratorCSFT, self).__init__(
35
+ out_size,
36
+ num_style_feat=num_style_feat,
37
+ num_mlp=num_mlp,
38
+ channel_multiplier=channel_multiplier,
39
+ narrow=narrow,
40
+ )
41
+ self.sft_half = sft_half
42
+
43
+ def forward(
44
+ self,
45
+ styles,
46
+ conditions,
47
+ input_is_latent=False,
48
+ noise=None,
49
+ randomize_noise=True,
50
+ truncation=1,
51
+ truncation_latent=None,
52
+ inject_index=None,
53
+ return_latents=False,
54
+ ):
55
+ """Forward function for StyleGAN2GeneratorCSFT.
56
+ Args:
57
+ styles (list[Tensor]): Sample codes of styles.
58
+ conditions (list[Tensor]): SFT conditions to generators.
59
+ input_is_latent (bool): Whether input is latent style. Default: False.
60
+ noise (Tensor | None): Input noise or None. Default: None.
61
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
62
+ truncation (float): The truncation ratio. Default: 1.
63
+ truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
64
+ inject_index (int | None): The injection index for mixing noise. Default: None.
65
+ return_latents (bool): Whether to return style latents. Default: False.
66
+ """
67
+ # style codes -> latents with Style MLP layer
68
+ if not input_is_latent:
69
+ styles = [self.style_mlp(s) for s in styles]
70
+ # noises
71
+ if noise is None:
72
+ if randomize_noise:
73
+ noise = [None] * self.num_layers # for each style conv layer
74
+ else: # use the stored noise
75
+ noise = [
76
+ getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
77
+ ]
78
+ # style truncation
79
+ if truncation < 1:
80
+ style_truncation = []
81
+ for style in styles:
82
+ style_truncation.append(
83
+ truncation_latent + truncation * (style - truncation_latent)
84
+ )
85
+ styles = style_truncation
86
+ # get style latents with injection
87
+ if len(styles) == 1:
88
+ inject_index = self.num_latent
89
+
90
+ if styles[0].ndim < 3:
91
+ # repeat latent code for all the layers
92
+ latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
93
+ else: # used for encoder with different latent code for each layer
94
+ latent = styles[0]
95
+ elif len(styles) == 2: # mixing noises
96
+ if inject_index is None:
97
+ inject_index = random.randint(1, self.num_latent - 1)
98
+ latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
99
+ latent2 = (
100
+ styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
101
+ )
102
+ latent = torch.cat([latent1, latent2], 1)
103
+
104
+ # main generation
105
+ out = self.constant_input(latent.shape[0])
106
+ out = self.style_conv1(out, latent[:, 0], noise=noise[0])
107
+ skip = self.to_rgb1(out, latent[:, 1])
108
+
109
+ i = 1
110
+ for conv1, conv2, noise1, noise2, to_rgb in zip(
111
+ self.style_convs[::2],
112
+ self.style_convs[1::2],
113
+ noise[1::2],
114
+ noise[2::2],
115
+ self.to_rgbs,
116
+ ):
117
+ out = conv1(out, latent[:, i], noise=noise1)
118
+
119
+ # the conditions may have fewer levels
120
+ if i < len(conditions):
121
+ # SFT part to combine the conditions
122
+ if self.sft_half: # only apply SFT to half of the channels
123
+ out_same, out_sft = torch.split(out, int(out.size(1) // 2), dim=1)
124
+ out_sft = out_sft * conditions[i - 1] + conditions[i]
125
+ out = torch.cat([out_same, out_sft], dim=1)
126
+ else: # apply SFT to all the channels
127
+ out = out * conditions[i - 1] + conditions[i]
128
+
129
+ out = conv2(out, latent[:, i + 1], noise=noise2)
130
+ skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
131
+ i += 2
132
+
133
+ image = skip
134
+
135
+ if return_latents:
136
+ return image, latent
137
+ else:
138
+ return image, None
139
+
140
+
141
+ class ResBlock(nn.Module):
142
+ """Residual block with bilinear upsampling/downsampling.
143
+ Args:
144
+ in_channels (int): Channel number of the input.
145
+ out_channels (int): Channel number of the output.
146
+ mode (str): Upsampling/downsampling mode. Options: down | up. Default: down.
147
+ """
148
+
149
+ def __init__(self, in_channels, out_channels, mode="down"):
150
+ super(ResBlock, self).__init__()
151
+
152
+ self.conv1 = nn.Conv2d(in_channels, in_channels, 3, 1, 1)
153
+ self.conv2 = nn.Conv2d(in_channels, out_channels, 3, 1, 1)
154
+ self.skip = nn.Conv2d(in_channels, out_channels, 1, bias=False)
155
+ if mode == "down":
156
+ self.scale_factor = 0.5
157
+ elif mode == "up":
158
+ self.scale_factor = 2
159
+
160
+ def forward(self, x):
161
+ out = F.leaky_relu_(self.conv1(x), negative_slope=0.2)
162
+ # upsample/downsample
163
+ out = F.interpolate(
164
+ out, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
165
+ )
166
+ out = F.leaky_relu_(self.conv2(out), negative_slope=0.2)
167
+ # skip
168
+ x = F.interpolate(
169
+ x, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
170
+ )
171
+ skip = self.skip(x)
172
+ out = out + skip
173
+ return out
174
+
175
+
176
+ class GFPGANv1Clean(nn.Module):
177
+ """The GFPGAN architecture: Unet + StyleGAN2 decoder with SFT.
178
+ It is the clean version without custom compiled CUDA extensions used in StyleGAN2.
179
+ Ref: GFP-GAN: Towards Real-World Blind Face Restoration with Generative Facial Prior.
180
+ Args:
181
+ out_size (int): The spatial size of outputs.
182
+ num_style_feat (int): Channel number of style features. Default: 512.
183
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
184
+ decoder_load_path (str): The path to the pre-trained decoder model (usually, the StyleGAN2). Default: None.
185
+ fix_decoder (bool): Whether to fix the decoder. Default: True.
186
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
187
+ input_is_latent (bool): Whether input is latent style. Default: False.
188
+ different_w (bool): Whether to use different latent w for different layers. Default: False.
189
+ narrow (float): The narrow ratio for channels. Default: 1.
190
+ sft_half (bool): Whether to apply SFT on half of the input channels. Default: False.
191
+ """
192
+
193
+ def __init__(
194
+ self,
195
+ state_dict,
196
+ ):
197
+ super(GFPGANv1Clean, self).__init__()
198
+
199
+ out_size = 512
200
+ num_style_feat = 512
201
+ channel_multiplier = 2
202
+ decoder_load_path = None
203
+ fix_decoder = False
204
+ num_mlp = 8
205
+ input_is_latent = True
206
+ different_w = True
207
+ narrow = 1
208
+ sft_half = True
209
+
210
+ self.model_arch = "GFPGAN"
211
+ self.sub_type = "Face SR"
212
+ self.scale = 8
213
+ self.in_nc = 3
214
+ self.out_nc = 3
215
+ self.state = state_dict
216
+
217
+ self.supports_fp16 = False
218
+ self.supports_bf16 = True
219
+ self.min_size_restriction = 512
220
+
221
+ self.input_is_latent = input_is_latent
222
+ self.different_w = different_w
223
+ self.num_style_feat = num_style_feat
224
+
225
+ unet_narrow = narrow * 0.5 # by default, use a half of input channels
226
+ channels = {
227
+ "4": int(512 * unet_narrow),
228
+ "8": int(512 * unet_narrow),
229
+ "16": int(512 * unet_narrow),
230
+ "32": int(512 * unet_narrow),
231
+ "64": int(256 * channel_multiplier * unet_narrow),
232
+ "128": int(128 * channel_multiplier * unet_narrow),
233
+ "256": int(64 * channel_multiplier * unet_narrow),
234
+ "512": int(32 * channel_multiplier * unet_narrow),
235
+ "1024": int(16 * channel_multiplier * unet_narrow),
236
+ }
237
+
238
+ self.log_size = int(math.log(out_size, 2))
239
+ first_out_size = 2 ** (int(math.log(out_size, 2)))
240
+
241
+ self.conv_body_first = nn.Conv2d(3, channels[f"{first_out_size}"], 1)
242
+
243
+ # downsample
244
+ in_channels = channels[f"{first_out_size}"]
245
+ self.conv_body_down = nn.ModuleList()
246
+ for i in range(self.log_size, 2, -1):
247
+ out_channels = channels[f"{2**(i - 1)}"]
248
+ self.conv_body_down.append(ResBlock(in_channels, out_channels, mode="down"))
249
+ in_channels = out_channels
250
+
251
+ self.final_conv = nn.Conv2d(in_channels, channels["4"], 3, 1, 1)
252
+
253
+ # upsample
254
+ in_channels = channels["4"]
255
+ self.conv_body_up = nn.ModuleList()
256
+ for i in range(3, self.log_size + 1):
257
+ out_channels = channels[f"{2**i}"]
258
+ self.conv_body_up.append(ResBlock(in_channels, out_channels, mode="up"))
259
+ in_channels = out_channels
260
+
261
+ # to RGB
262
+ self.toRGB = nn.ModuleList()
263
+ for i in range(3, self.log_size + 1):
264
+ self.toRGB.append(nn.Conv2d(channels[f"{2**i}"], 3, 1))
265
+
266
+ if different_w:
267
+ linear_out_channel = (int(math.log(out_size, 2)) * 2 - 2) * num_style_feat
268
+ else:
269
+ linear_out_channel = num_style_feat
270
+
271
+ self.final_linear = nn.Linear(channels["4"] * 4 * 4, linear_out_channel)
272
+
273
+ # the decoder: stylegan2 generator with SFT modulations
274
+ self.stylegan_decoder = StyleGAN2GeneratorCSFT(
275
+ out_size=out_size,
276
+ num_style_feat=num_style_feat,
277
+ num_mlp=num_mlp,
278
+ channel_multiplier=channel_multiplier,
279
+ narrow=narrow,
280
+ sft_half=sft_half,
281
+ )
282
+
283
+ # load pre-trained stylegan2 model if necessary
284
+ if decoder_load_path:
285
+ self.stylegan_decoder.load_state_dict(
286
+ torch.load(
287
+ decoder_load_path, map_location=lambda storage, loc: storage
288
+ )["params_ema"]
289
+ )
290
+ # fix decoder without updating params
291
+ if fix_decoder:
292
+ for _, param in self.stylegan_decoder.named_parameters():
293
+ param.requires_grad = False
294
+
295
+ # for SFT modulations (scale and shift)
296
+ self.condition_scale = nn.ModuleList()
297
+ self.condition_shift = nn.ModuleList()
298
+ for i in range(3, self.log_size + 1):
299
+ out_channels = channels[f"{2**i}"]
300
+ if sft_half:
301
+ sft_out_channels = out_channels
302
+ else:
303
+ sft_out_channels = out_channels * 2
304
+ self.condition_scale.append(
305
+ nn.Sequential(
306
+ nn.Conv2d(out_channels, out_channels, 3, 1, 1),
307
+ nn.LeakyReLU(0.2, True),
308
+ nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
309
+ )
310
+ )
311
+ self.condition_shift.append(
312
+ nn.Sequential(
313
+ nn.Conv2d(out_channels, out_channels, 3, 1, 1),
314
+ nn.LeakyReLU(0.2, True),
315
+ nn.Conv2d(out_channels, sft_out_channels, 3, 1, 1),
316
+ )
317
+ )
318
+ self.load_state_dict(state_dict)
319
+
320
+ def forward(
321
+ self, x, return_latents=False, return_rgb=True, randomize_noise=True, **kwargs
322
+ ):
323
+ """Forward function for GFPGANv1Clean.
324
+ Args:
325
+ x (Tensor): Input images.
326
+ return_latents (bool): Whether to return style latents. Default: False.
327
+ return_rgb (bool): Whether return intermediate rgb images. Default: True.
328
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
329
+ """
330
+ conditions = []
331
+ unet_skips = []
332
+ out_rgbs = []
333
+
334
+ # encoder
335
+ feat = F.leaky_relu_(self.conv_body_first(x), negative_slope=0.2)
336
+ for i in range(self.log_size - 2):
337
+ feat = self.conv_body_down[i](feat)
338
+ unet_skips.insert(0, feat)
339
+ feat = F.leaky_relu_(self.final_conv(feat), negative_slope=0.2)
340
+
341
+ # style code
342
+ style_code = self.final_linear(feat.view(feat.size(0), -1))
343
+ if self.different_w:
344
+ style_code = style_code.view(style_code.size(0), -1, self.num_style_feat)
345
+
346
+ # decode
347
+ for i in range(self.log_size - 2):
348
+ # add unet skip
349
+ feat = feat + unet_skips[i]
350
+ # ResUpLayer
351
+ feat = self.conv_body_up[i](feat)
352
+ # generate scale and shift for SFT layers
353
+ scale = self.condition_scale[i](feat)
354
+ conditions.append(scale.clone())
355
+ shift = self.condition_shift[i](feat)
356
+ conditions.append(shift.clone())
357
+ # generate rgb images
358
+ if return_rgb:
359
+ out_rgbs.append(self.toRGB[i](feat))
360
+
361
+ # decoder
362
+ image, _ = self.stylegan_decoder(
363
+ [style_code],
364
+ conditions,
365
+ return_latents=return_latents,
366
+ input_is_latent=self.input_is_latent,
367
+ randomize_noise=randomize_noise,
368
+ )
369
+
370
+ return image, out_rgbs
r_chainner/archs/face/stylegan2_clean_arch.py ADDED
@@ -0,0 +1,453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: skip-file
2
+ # type: ignore
3
+ import math
4
+
5
+ import torch
6
+ from torch import nn
7
+ from torch.nn import functional as F
8
+ from torch.nn import init
9
+ from torch.nn.modules.batchnorm import _BatchNorm
10
+
11
+
12
+ @torch.no_grad()
13
+ def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
14
+ """Initialize network weights.
15
+ Args:
16
+ module_list (list[nn.Module] | nn.Module): Modules to be initialized.
17
+ scale (float): Scale initialized weights, especially for residual
18
+ blocks. Default: 1.
19
+ bias_fill (float): The value to fill bias. Default: 0
20
+ kwargs (dict): Other arguments for initialization function.
21
+ """
22
+ if not isinstance(module_list, list):
23
+ module_list = [module_list]
24
+ for module in module_list:
25
+ for m in module.modules():
26
+ if isinstance(m, nn.Conv2d):
27
+ init.kaiming_normal_(m.weight, **kwargs)
28
+ m.weight.data *= scale
29
+ if m.bias is not None:
30
+ m.bias.data.fill_(bias_fill)
31
+ elif isinstance(m, nn.Linear):
32
+ init.kaiming_normal_(m.weight, **kwargs)
33
+ m.weight.data *= scale
34
+ if m.bias is not None:
35
+ m.bias.data.fill_(bias_fill)
36
+ elif isinstance(m, _BatchNorm):
37
+ init.constant_(m.weight, 1)
38
+ if m.bias is not None:
39
+ m.bias.data.fill_(bias_fill)
40
+
41
+
42
+ class NormStyleCode(nn.Module):
43
+ def forward(self, x):
44
+ """Normalize the style codes.
45
+ Args:
46
+ x (Tensor): Style codes with shape (b, c).
47
+ Returns:
48
+ Tensor: Normalized tensor.
49
+ """
50
+ return x * torch.rsqrt(torch.mean(x**2, dim=1, keepdim=True) + 1e-8)
51
+
52
+
53
+ class ModulatedConv2d(nn.Module):
54
+ """Modulated Conv2d used in StyleGAN2.
55
+ There is no bias in ModulatedConv2d.
56
+ Args:
57
+ in_channels (int): Channel number of the input.
58
+ out_channels (int): Channel number of the output.
59
+ kernel_size (int): Size of the convolving kernel.
60
+ num_style_feat (int): Channel number of style features.
61
+ demodulate (bool): Whether to demodulate in the conv layer. Default: True.
62
+ sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
63
+ eps (float): A value added to the denominator for numerical stability. Default: 1e-8.
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ in_channels,
69
+ out_channels,
70
+ kernel_size,
71
+ num_style_feat,
72
+ demodulate=True,
73
+ sample_mode=None,
74
+ eps=1e-8,
75
+ ):
76
+ super(ModulatedConv2d, self).__init__()
77
+ self.in_channels = in_channels
78
+ self.out_channels = out_channels
79
+ self.kernel_size = kernel_size
80
+ self.demodulate = demodulate
81
+ self.sample_mode = sample_mode
82
+ self.eps = eps
83
+
84
+ # modulation inside each modulated conv
85
+ self.modulation = nn.Linear(num_style_feat, in_channels, bias=True)
86
+ # initialization
87
+ default_init_weights(
88
+ self.modulation,
89
+ scale=1,
90
+ bias_fill=1,
91
+ a=0,
92
+ mode="fan_in",
93
+ nonlinearity="linear",
94
+ )
95
+
96
+ self.weight = nn.Parameter(
97
+ torch.randn(1, out_channels, in_channels, kernel_size, kernel_size)
98
+ / math.sqrt(in_channels * kernel_size**2)
99
+ )
100
+ self.padding = kernel_size // 2
101
+
102
+ def forward(self, x, style):
103
+ """Forward function.
104
+ Args:
105
+ x (Tensor): Tensor with shape (b, c, h, w).
106
+ style (Tensor): Tensor with shape (b, num_style_feat).
107
+ Returns:
108
+ Tensor: Modulated tensor after convolution.
109
+ """
110
+ b, c, h, w = x.shape # c = c_in
111
+ # weight modulation
112
+ style = self.modulation(style).view(b, 1, c, 1, 1)
113
+ # self.weight: (1, c_out, c_in, k, k); style: (b, 1, c, 1, 1)
114
+ weight = self.weight * style # (b, c_out, c_in, k, k)
115
+
116
+ if self.demodulate:
117
+ demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + self.eps)
118
+ weight = weight * demod.view(b, self.out_channels, 1, 1, 1)
119
+
120
+ weight = weight.view(
121
+ b * self.out_channels, c, self.kernel_size, self.kernel_size
122
+ )
123
+
124
+ # upsample or downsample if necessary
125
+ if self.sample_mode == "upsample":
126
+ x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
127
+ elif self.sample_mode == "downsample":
128
+ x = F.interpolate(x, scale_factor=0.5, mode="bilinear", align_corners=False)
129
+
130
+ b, c, h, w = x.shape
131
+ x = x.view(1, b * c, h, w)
132
+ # weight: (b*c_out, c_in, k, k), groups=b
133
+ out = F.conv2d(x, weight, padding=self.padding, groups=b)
134
+ out = out.view(b, self.out_channels, *out.shape[2:4])
135
+
136
+ return out
137
+
138
+ def __repr__(self):
139
+ return (
140
+ f"{self.__class__.__name__}(in_channels={self.in_channels}, out_channels={self.out_channels}, "
141
+ f"kernel_size={self.kernel_size}, demodulate={self.demodulate}, sample_mode={self.sample_mode})"
142
+ )
143
+
144
+
145
+ class StyleConv(nn.Module):
146
+ """Style conv used in StyleGAN2.
147
+ Args:
148
+ in_channels (int): Channel number of the input.
149
+ out_channels (int): Channel number of the output.
150
+ kernel_size (int): Size of the convolving kernel.
151
+ num_style_feat (int): Channel number of style features.
152
+ demodulate (bool): Whether demodulate in the conv layer. Default: True.
153
+ sample_mode (str | None): Indicating 'upsample', 'downsample' or None. Default: None.
154
+ """
155
+
156
+ def __init__(
157
+ self,
158
+ in_channels,
159
+ out_channels,
160
+ kernel_size,
161
+ num_style_feat,
162
+ demodulate=True,
163
+ sample_mode=None,
164
+ ):
165
+ super(StyleConv, self).__init__()
166
+ self.modulated_conv = ModulatedConv2d(
167
+ in_channels,
168
+ out_channels,
169
+ kernel_size,
170
+ num_style_feat,
171
+ demodulate=demodulate,
172
+ sample_mode=sample_mode,
173
+ )
174
+ self.weight = nn.Parameter(torch.zeros(1)) # for noise injection
175
+ self.bias = nn.Parameter(torch.zeros(1, out_channels, 1, 1))
176
+ self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
177
+
178
+ def forward(self, x, style, noise=None):
179
+ # modulate
180
+ out = self.modulated_conv(x, style) * 2**0.5 # for conversion
181
+ # noise injection
182
+ if noise is None:
183
+ b, _, h, w = out.shape
184
+ noise = out.new_empty(b, 1, h, w).normal_()
185
+ out = out + self.weight * noise
186
+ # add bias
187
+ out = out + self.bias
188
+ # activation
189
+ out = self.activate(out)
190
+ return out
191
+
192
+
193
+ class ToRGB(nn.Module):
194
+ """To RGB (image space) from features.
195
+ Args:
196
+ in_channels (int): Channel number of input.
197
+ num_style_feat (int): Channel number of style features.
198
+ upsample (bool): Whether to upsample. Default: True.
199
+ """
200
+
201
+ def __init__(self, in_channels, num_style_feat, upsample=True):
202
+ super(ToRGB, self).__init__()
203
+ self.upsample = upsample
204
+ self.modulated_conv = ModulatedConv2d(
205
+ in_channels,
206
+ 3,
207
+ kernel_size=1,
208
+ num_style_feat=num_style_feat,
209
+ demodulate=False,
210
+ sample_mode=None,
211
+ )
212
+ self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
213
+
214
+ def forward(self, x, style, skip=None):
215
+ """Forward function.
216
+ Args:
217
+ x (Tensor): Feature tensor with shape (b, c, h, w).
218
+ style (Tensor): Tensor with shape (b, num_style_feat).
219
+ skip (Tensor): Base/skip tensor. Default: None.
220
+ Returns:
221
+ Tensor: RGB images.
222
+ """
223
+ out = self.modulated_conv(x, style)
224
+ out = out + self.bias
225
+ if skip is not None:
226
+ if self.upsample:
227
+ skip = F.interpolate(
228
+ skip, scale_factor=2, mode="bilinear", align_corners=False
229
+ )
230
+ out = out + skip
231
+ return out
232
+
233
+
234
+ class ConstantInput(nn.Module):
235
+ """Constant input.
236
+ Args:
237
+ num_channel (int): Channel number of constant input.
238
+ size (int): Spatial size of constant input.
239
+ """
240
+
241
+ def __init__(self, num_channel, size):
242
+ super(ConstantInput, self).__init__()
243
+ self.weight = nn.Parameter(torch.randn(1, num_channel, size, size))
244
+
245
+ def forward(self, batch):
246
+ out = self.weight.repeat(batch, 1, 1, 1)
247
+ return out
248
+
249
+
250
+ class StyleGAN2GeneratorClean(nn.Module):
251
+ """Clean version of StyleGAN2 Generator.
252
+ Args:
253
+ out_size (int): The spatial size of outputs.
254
+ num_style_feat (int): Channel number of style features. Default: 512.
255
+ num_mlp (int): Layer number of MLP style layers. Default: 8.
256
+ channel_multiplier (int): Channel multiplier for large networks of StyleGAN2. Default: 2.
257
+ narrow (float): Narrow ratio for channels. Default: 1.0.
258
+ """
259
+
260
+ def __init__(
261
+ self, out_size, num_style_feat=512, num_mlp=8, channel_multiplier=2, narrow=1
262
+ ):
263
+ super(StyleGAN2GeneratorClean, self).__init__()
264
+ # Style MLP layers
265
+ self.num_style_feat = num_style_feat
266
+ style_mlp_layers = [NormStyleCode()]
267
+ for i in range(num_mlp):
268
+ style_mlp_layers.extend(
269
+ [
270
+ nn.Linear(num_style_feat, num_style_feat, bias=True),
271
+ nn.LeakyReLU(negative_slope=0.2, inplace=True),
272
+ ]
273
+ )
274
+ self.style_mlp = nn.Sequential(*style_mlp_layers)
275
+ # initialization
276
+ default_init_weights(
277
+ self.style_mlp,
278
+ scale=1,
279
+ bias_fill=0,
280
+ a=0.2,
281
+ mode="fan_in",
282
+ nonlinearity="leaky_relu",
283
+ )
284
+
285
+ # channel list
286
+ channels = {
287
+ "4": int(512 * narrow),
288
+ "8": int(512 * narrow),
289
+ "16": int(512 * narrow),
290
+ "32": int(512 * narrow),
291
+ "64": int(256 * channel_multiplier * narrow),
292
+ "128": int(128 * channel_multiplier * narrow),
293
+ "256": int(64 * channel_multiplier * narrow),
294
+ "512": int(32 * channel_multiplier * narrow),
295
+ "1024": int(16 * channel_multiplier * narrow),
296
+ }
297
+ self.channels = channels
298
+
299
+ self.constant_input = ConstantInput(channels["4"], size=4)
300
+ self.style_conv1 = StyleConv(
301
+ channels["4"],
302
+ channels["4"],
303
+ kernel_size=3,
304
+ num_style_feat=num_style_feat,
305
+ demodulate=True,
306
+ sample_mode=None,
307
+ )
308
+ self.to_rgb1 = ToRGB(channels["4"], num_style_feat, upsample=False)
309
+
310
+ self.log_size = int(math.log(out_size, 2))
311
+ self.num_layers = (self.log_size - 2) * 2 + 1
312
+ self.num_latent = self.log_size * 2 - 2
313
+
314
+ self.style_convs = nn.ModuleList()
315
+ self.to_rgbs = nn.ModuleList()
316
+ self.noises = nn.Module()
317
+
318
+ in_channels = channels["4"]
319
+ # noise
320
+ for layer_idx in range(self.num_layers):
321
+ resolution = 2 ** ((layer_idx + 5) // 2)
322
+ shape = [1, 1, resolution, resolution]
323
+ self.noises.register_buffer(f"noise{layer_idx}", torch.randn(*shape))
324
+ # style convs and to_rgbs
325
+ for i in range(3, self.log_size + 1):
326
+ out_channels = channels[f"{2**i}"]
327
+ self.style_convs.append(
328
+ StyleConv(
329
+ in_channels,
330
+ out_channels,
331
+ kernel_size=3,
332
+ num_style_feat=num_style_feat,
333
+ demodulate=True,
334
+ sample_mode="upsample",
335
+ )
336
+ )
337
+ self.style_convs.append(
338
+ StyleConv(
339
+ out_channels,
340
+ out_channels,
341
+ kernel_size=3,
342
+ num_style_feat=num_style_feat,
343
+ demodulate=True,
344
+ sample_mode=None,
345
+ )
346
+ )
347
+ self.to_rgbs.append(ToRGB(out_channels, num_style_feat, upsample=True))
348
+ in_channels = out_channels
349
+
350
+ def make_noise(self):
351
+ """Make noise for noise injection."""
352
+ device = self.constant_input.weight.device
353
+ noises = [torch.randn(1, 1, 4, 4, device=device)]
354
+
355
+ for i in range(3, self.log_size + 1):
356
+ for _ in range(2):
357
+ noises.append(torch.randn(1, 1, 2**i, 2**i, device=device))
358
+
359
+ return noises
360
+
361
+ def get_latent(self, x):
362
+ return self.style_mlp(x)
363
+
364
+ def mean_latent(self, num_latent):
365
+ latent_in = torch.randn(
366
+ num_latent, self.num_style_feat, device=self.constant_input.weight.device
367
+ )
368
+ latent = self.style_mlp(latent_in).mean(0, keepdim=True)
369
+ return latent
370
+
371
+ def forward(
372
+ self,
373
+ styles,
374
+ input_is_latent=False,
375
+ noise=None,
376
+ randomize_noise=True,
377
+ truncation=1,
378
+ truncation_latent=None,
379
+ inject_index=None,
380
+ return_latents=False,
381
+ ):
382
+ """Forward function for StyleGAN2GeneratorClean.
383
+ Args:
384
+ styles (list[Tensor]): Sample codes of styles.
385
+ input_is_latent (bool): Whether input is latent style. Default: False.
386
+ noise (Tensor | None): Input noise or None. Default: None.
387
+ randomize_noise (bool): Randomize noise, used when 'noise' is False. Default: True.
388
+ truncation (float): The truncation ratio. Default: 1.
389
+ truncation_latent (Tensor | None): The truncation latent tensor. Default: None.
390
+ inject_index (int | None): The injection index for mixing noise. Default: None.
391
+ return_latents (bool): Whether to return style latents. Default: False.
392
+ """
393
+ # style codes -> latents with Style MLP layer
394
+ if not input_is_latent:
395
+ styles = [self.style_mlp(s) for s in styles]
396
+ # noises
397
+ if noise is None:
398
+ if randomize_noise:
399
+ noise = [None] * self.num_layers # for each style conv layer
400
+ else: # use the stored noise
401
+ noise = [
402
+ getattr(self.noises, f"noise{i}") for i in range(self.num_layers)
403
+ ]
404
+ # style truncation
405
+ if truncation < 1:
406
+ style_truncation = []
407
+ for style in styles:
408
+ style_truncation.append(
409
+ truncation_latent + truncation * (style - truncation_latent)
410
+ )
411
+ styles = style_truncation
412
+ # get style latents with injection
413
+ if len(styles) == 1:
414
+ inject_index = self.num_latent
415
+
416
+ if styles[0].ndim < 3:
417
+ # repeat latent code for all the layers
418
+ latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
419
+ else: # used for encoder with different latent code for each layer
420
+ latent = styles[0]
421
+ elif len(styles) == 2: # mixing noises
422
+ if inject_index is None:
423
+ inject_index = random.randint(1, self.num_latent - 1)
424
+ latent1 = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
425
+ latent2 = (
426
+ styles[1].unsqueeze(1).repeat(1, self.num_latent - inject_index, 1)
427
+ )
428
+ latent = torch.cat([latent1, latent2], 1)
429
+
430
+ # main generation
431
+ out = self.constant_input(latent.shape[0])
432
+ out = self.style_conv1(out, latent[:, 0], noise=noise[0])
433
+ skip = self.to_rgb1(out, latent[:, 1])
434
+
435
+ i = 1
436
+ for conv1, conv2, noise1, noise2, to_rgb in zip(
437
+ self.style_convs[::2],
438
+ self.style_convs[1::2],
439
+ noise[1::2],
440
+ noise[2::2],
441
+ self.to_rgbs,
442
+ ):
443
+ out = conv1(out, latent[:, i], noise=noise1)
444
+ out = conv2(out, latent[:, i + 1], noise=noise2)
445
+ skip = to_rgb(out, latent[:, i + 2], skip) # feature back to the rgb space
446
+ i += 2
447
+
448
+ image = skip
449
+
450
+ if return_latents:
451
+ return image, latent
452
+ else:
453
+ return image, None
r_chainner/model_loading.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
2
+ from r_chainner.types import PyTorchModel
3
+
4
+
5
+ class UnsupportedModel(Exception):
6
+ pass
7
+
8
+
9
+ def load_state_dict(state_dict) -> PyTorchModel:
10
+
11
+ state_dict_keys = list(state_dict.keys())
12
+
13
+ if "params_ema" in state_dict_keys:
14
+ state_dict = state_dict["params_ema"]
15
+ elif "params-ema" in state_dict_keys:
16
+ state_dict = state_dict["params-ema"]
17
+ elif "params" in state_dict_keys:
18
+ state_dict = state_dict["params"]
19
+
20
+ state_dict_keys = list(state_dict.keys())
21
+
22
+ # GFPGAN
23
+ if (
24
+ "toRGB.0.weight" in state_dict_keys
25
+ and "stylegan_decoder.style_mlp.1.weight" in state_dict_keys
26
+ ):
27
+ model = GFPGANv1Clean(state_dict)
28
+ return model
r_chainner/types.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+
3
+ from r_chainner.archs.face.gfpganv1_clean_arch import GFPGANv1Clean
4
+
5
+
6
+ PyTorchFaceModels = (GFPGANv1Clean,)
7
+ PyTorchFaceModel = Union[GFPGANv1Clean]
8
+
9
+
10
+ def is_pytorch_face_model(model: object):
11
+ return isinstance(model, PyTorchFaceModels)
12
+
13
+ PyTorchModels = (*PyTorchFaceModels, )
14
+ PyTorchModel = Union[PyTorchFaceModel]
15
+
16
+
17
+ def is_pytorch_model(model: object):
18
+ return isinstance(model, PyTorchModels)
r_facelib/__init__.py ADDED
File without changes
r_facelib/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (268 Bytes). View file
 
r_facelib/detection/__init__.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from torch import nn
4
+ from copy import deepcopy
5
+ import pathlib
6
+
7
+ from r_facelib.utils import load_file_from_url
8
+ from r_facelib.utils import download_pretrained_models
9
+ from r_facelib.detection.yolov5face.models.common import Conv
10
+
11
+ from .retinaface.retinaface import RetinaFace
12
+ from .yolov5face.face_detector import YoloDetector
13
+
14
+
15
+ def init_detection_model(model_name, half=False, device='cuda'):
16
+ if 'retinaface' in model_name:
17
+ model = init_retinaface_model(model_name, half, device)
18
+ elif 'YOLOv5' in model_name:
19
+ model = init_yolov5face_model(model_name, device)
20
+ else:
21
+ raise NotImplementedError(f'{model_name} is not implemented.')
22
+
23
+ return model
24
+
25
+
26
+ def init_retinaface_model(model_name, half=False, device='cuda'):
27
+ if model_name == 'retinaface_resnet50':
28
+ model = RetinaFace(network_name='resnet50', half=half)
29
+ model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth'
30
+ elif model_name == 'retinaface_mobile0.25':
31
+ model = RetinaFace(network_name='mobile0.25', half=half)
32
+ model_url = 'https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_mobilenet0.25_Final.pth'
33
+ else:
34
+ raise NotImplementedError(f'{model_name} is not implemented.')
35
+
36
+ model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
37
+ load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
38
+ # remove unnecessary 'module.'
39
+ for k, v in deepcopy(load_net).items():
40
+ if k.startswith('module.'):
41
+ load_net[k[7:]] = v
42
+ load_net.pop(k)
43
+ model.load_state_dict(load_net, strict=True)
44
+ model.eval()
45
+ model = model.to(device)
46
+
47
+ return model
48
+
49
+
50
+ def init_yolov5face_model(model_name, device='cuda'):
51
+ current_dir = str(pathlib.Path(__file__).parent.resolve())
52
+ if model_name == 'YOLOv5l':
53
+ model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5l.yaml', device=device)
54
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5l-face.pth'
55
+ elif model_name == 'YOLOv5n':
56
+ model = YoloDetector(config_name=current_dir+'/yolov5face/models/yolov5n.yaml', device=device)
57
+ model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/yolov5n-face.pth'
58
+ else:
59
+ raise NotImplementedError(f'{model_name} is not implemented.')
60
+
61
+ model_path = load_file_from_url(url=model_url, model_dir='../../models/facedetection', progress=True, file_name=None)
62
+ load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
63
+ model.detector.load_state_dict(load_net, strict=True)
64
+ model.detector.eval()
65
+ model.detector = model.detector.to(device).float()
66
+
67
+ for m in model.detector.modules():
68
+ if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
69
+ m.inplace = True # pytorch 1.7.0 compatibility
70
+ elif isinstance(m, Conv):
71
+ m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
72
+
73
+ return model
74
+
75
+
76
+ # Download from Google Drive
77
+ # def init_yolov5face_model(model_name, device='cuda'):
78
+ # if model_name == 'YOLOv5l':
79
+ # model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5l.yaml', device=device)
80
+ # f_id = {'yolov5l-face.pth': '131578zMA6B2x8VQHyHfa6GEPtulMCNzV'}
81
+ # elif model_name == 'YOLOv5n':
82
+ # model = YoloDetector(config_name='facelib/detection/yolov5face/models/yolov5n.yaml', device=device)
83
+ # f_id = {'yolov5n-face.pth': '1fhcpFvWZqghpGXjYPIne2sw1Fy4yhw6o'}
84
+ # else:
85
+ # raise NotImplementedError(f'{model_name} is not implemented.')
86
+
87
+ # model_path = os.path.join('../../models/facedetection', list(f_id.keys())[0])
88
+ # if not os.path.exists(model_path):
89
+ # download_pretrained_models(file_ids=f_id, save_path_root='../../models/facedetection')
90
+
91
+ # load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
92
+ # model.detector.load_state_dict(load_net, strict=True)
93
+ # model.detector.eval()
94
+ # model.detector = model.detector.to(device).float()
95
+
96
+ # for m in model.detector.modules():
97
+ # if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
98
+ # m.inplace = True # pytorch 1.7.0 compatibility
99
+ # elif isinstance(m, Conv):
100
+ # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
101
+
102
+ # return model
r_facelib/detection/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (4.99 kB). View file
 
r_facelib/detection/__pycache__/align_trans.cpython-311.pyc ADDED
Binary file (9.78 kB). View file
 
r_facelib/detection/__pycache__/matlab_cp2tform.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
r_facelib/detection/align_trans.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+ from .matlab_cp2tform import get_similarity_transform_for_cv2
5
+
6
+ # reference facial points, a list of coordinates (x,y)
7
+ REFERENCE_FACIAL_POINTS = [[30.29459953, 51.69630051], [65.53179932, 51.50139999], [48.02519989, 71.73660278],
8
+ [33.54930115, 92.3655014], [62.72990036, 92.20410156]]
9
+
10
+ DEFAULT_CROP_SIZE = (96, 112)
11
+
12
+
13
+ class FaceWarpException(Exception):
14
+
15
+ def __str__(self):
16
+ return 'In File {}:{}'.format(__file__, super.__str__(self))
17
+
18
+
19
+ def get_reference_facial_points(output_size=None, inner_padding_factor=0.0, outer_padding=(0, 0), default_square=False):
20
+ """
21
+ Function:
22
+ ----------
23
+ get reference 5 key points according to crop settings:
24
+ 0. Set default crop_size:
25
+ if default_square:
26
+ crop_size = (112, 112)
27
+ else:
28
+ crop_size = (96, 112)
29
+ 1. Pad the crop_size by inner_padding_factor in each side;
30
+ 2. Resize crop_size into (output_size - outer_padding*2),
31
+ pad into output_size with outer_padding;
32
+ 3. Output reference_5point;
33
+ Parameters:
34
+ ----------
35
+ @output_size: (w, h) or None
36
+ size of aligned face image
37
+ @inner_padding_factor: (w_factor, h_factor)
38
+ padding factor for inner (w, h)
39
+ @outer_padding: (w_pad, h_pad)
40
+ each row is a pair of coordinates (x, y)
41
+ @default_square: True or False
42
+ if True:
43
+ default crop_size = (112, 112)
44
+ else:
45
+ default crop_size = (96, 112);
46
+ !!! make sure, if output_size is not None:
47
+ (output_size - outer_padding)
48
+ = some_scale * (default crop_size * (1.0 +
49
+ inner_padding_factor))
50
+ Returns:
51
+ ----------
52
+ @reference_5point: 5x2 np.array
53
+ each row is a pair of transformed coordinates (x, y)
54
+ """
55
+
56
+ tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
57
+ tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
58
+
59
+ # 0) make the inner region a square
60
+ if default_square:
61
+ size_diff = max(tmp_crop_size) - tmp_crop_size
62
+ tmp_5pts += size_diff / 2
63
+ tmp_crop_size += size_diff
64
+
65
+ if (output_size and output_size[0] == tmp_crop_size[0] and output_size[1] == tmp_crop_size[1]):
66
+
67
+ return tmp_5pts
68
+
69
+ if (inner_padding_factor == 0 and outer_padding == (0, 0)):
70
+ if output_size is None:
71
+ return tmp_5pts
72
+ else:
73
+ raise FaceWarpException('No paddings to do, output_size must be None or {}'.format(tmp_crop_size))
74
+
75
+ # check output size
76
+ if not (0 <= inner_padding_factor <= 1.0):
77
+ raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')
78
+
79
+ if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0) and output_size is None):
80
+ output_size = tmp_crop_size * \
81
+ (1 + inner_padding_factor * 2).astype(np.int32)
82
+ output_size += np.array(outer_padding)
83
+ if not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1]):
84
+ raise FaceWarpException('Not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1])')
85
+
86
+ # 1) pad the inner region according inner_padding_factor
87
+ if inner_padding_factor > 0:
88
+ size_diff = tmp_crop_size * inner_padding_factor * 2
89
+ tmp_5pts += size_diff / 2
90
+ tmp_crop_size += np.round(size_diff).astype(np.int32)
91
+
92
+ # 2) resize the padded inner region
93
+ size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
94
+
95
+ if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
96
+ raise FaceWarpException('Must have (output_size - outer_padding)'
97
+ '= some_scale * (crop_size * (1.0 + inner_padding_factor)')
98
+
99
+ scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
100
+ tmp_5pts = tmp_5pts * scale_factor
101
+ # size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
102
+ # tmp_5pts = tmp_5pts + size_diff / 2
103
+ tmp_crop_size = size_bf_outer_pad
104
+
105
+ # 3) add outer_padding to make output_size
106
+ reference_5point = tmp_5pts + np.array(outer_padding)
107
+ tmp_crop_size = output_size
108
+
109
+ return reference_5point
110
+
111
+
112
+ def get_affine_transform_matrix(src_pts, dst_pts):
113
+ """
114
+ Function:
115
+ ----------
116
+ get affine transform matrix 'tfm' from src_pts to dst_pts
117
+ Parameters:
118
+ ----------
119
+ @src_pts: Kx2 np.array
120
+ source points matrix, each row is a pair of coordinates (x, y)
121
+ @dst_pts: Kx2 np.array
122
+ destination points matrix, each row is a pair of coordinates (x, y)
123
+ Returns:
124
+ ----------
125
+ @tfm: 2x3 np.array
126
+ transform matrix from src_pts to dst_pts
127
+ """
128
+
129
+ tfm = np.float32([[1, 0, 0], [0, 1, 0]])
130
+ n_pts = src_pts.shape[0]
131
+ ones = np.ones((n_pts, 1), src_pts.dtype)
132
+ src_pts_ = np.hstack([src_pts, ones])
133
+ dst_pts_ = np.hstack([dst_pts, ones])
134
+
135
+ A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
136
+
137
+ if rank == 3:
138
+ tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]], [A[0, 1], A[1, 1], A[2, 1]]])
139
+ elif rank == 2:
140
+ tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
141
+
142
+ return tfm
143
+
144
+
145
+ def warp_and_crop_face(src_img, facial_pts, reference_pts=None, crop_size=(96, 112), align_type='smilarity'):
146
+ """
147
+ Function:
148
+ ----------
149
+ apply affine transform 'trans' to uv
150
+ Parameters:
151
+ ----------
152
+ @src_img: 3x3 np.array
153
+ input image
154
+ @facial_pts: could be
155
+ 1)a list of K coordinates (x,y)
156
+ or
157
+ 2) Kx2 or 2xK np.array
158
+ each row or col is a pair of coordinates (x, y)
159
+ @reference_pts: could be
160
+ 1) a list of K coordinates (x,y)
161
+ or
162
+ 2) Kx2 or 2xK np.array
163
+ each row or col is a pair of coordinates (x, y)
164
+ or
165
+ 3) None
166
+ if None, use default reference facial points
167
+ @crop_size: (w, h)
168
+ output face image size
169
+ @align_type: transform type, could be one of
170
+ 1) 'similarity': use similarity transform
171
+ 2) 'cv2_affine': use the first 3 points to do affine transform,
172
+ by calling cv2.getAffineTransform()
173
+ 3) 'affine': use all points to do affine transform
174
+ Returns:
175
+ ----------
176
+ @face_img: output face image with size (w, h) = @crop_size
177
+ """
178
+
179
+ if reference_pts is None:
180
+ if crop_size[0] == 96 and crop_size[1] == 112:
181
+ reference_pts = REFERENCE_FACIAL_POINTS
182
+ else:
183
+ default_square = False
184
+ inner_padding_factor = 0
185
+ outer_padding = (0, 0)
186
+ output_size = crop_size
187
+
188
+ reference_pts = get_reference_facial_points(output_size, inner_padding_factor, outer_padding,
189
+ default_square)
190
+
191
+ ref_pts = np.float32(reference_pts)
192
+ ref_pts_shp = ref_pts.shape
193
+ if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
194
+ raise FaceWarpException('reference_pts.shape must be (K,2) or (2,K) and K>2')
195
+
196
+ if ref_pts_shp[0] == 2:
197
+ ref_pts = ref_pts.T
198
+
199
+ src_pts = np.float32(facial_pts)
200
+ src_pts_shp = src_pts.shape
201
+ if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
202
+ raise FaceWarpException('facial_pts.shape must be (K,2) or (2,K) and K>2')
203
+
204
+ if src_pts_shp[0] == 2:
205
+ src_pts = src_pts.T
206
+
207
+ if src_pts.shape != ref_pts.shape:
208
+ raise FaceWarpException('facial_pts and reference_pts must have the same shape')
209
+
210
+ if align_type == 'cv2_affine':
211
+ tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
212
+ elif align_type == 'affine':
213
+ tfm = get_affine_transform_matrix(src_pts, ref_pts)
214
+ else:
215
+ tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
216
+
217
+ face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
218
+
219
+ return face_img
r_facelib/detection/matlab_cp2tform.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from numpy.linalg import inv, lstsq
3
+ from numpy.linalg import matrix_rank as rank
4
+ from numpy.linalg import norm
5
+
6
+
7
+ class MatlabCp2tormException(Exception):
8
+
9
+ def __str__(self):
10
+ return 'In File {}:{}'.format(__file__, super.__str__(self))
11
+
12
+
13
+ def tformfwd(trans, uv):
14
+ """
15
+ Function:
16
+ ----------
17
+ apply affine transform 'trans' to uv
18
+
19
+ Parameters:
20
+ ----------
21
+ @trans: 3x3 np.array
22
+ transform matrix
23
+ @uv: Kx2 np.array
24
+ each row is a pair of coordinates (x, y)
25
+
26
+ Returns:
27
+ ----------
28
+ @xy: Kx2 np.array
29
+ each row is a pair of transformed coordinates (x, y)
30
+ """
31
+ uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
32
+ xy = np.dot(uv, trans)
33
+ xy = xy[:, 0:-1]
34
+ return xy
35
+
36
+
37
+ def tforminv(trans, uv):
38
+ """
39
+ Function:
40
+ ----------
41
+ apply the inverse of affine transform 'trans' to uv
42
+
43
+ Parameters:
44
+ ----------
45
+ @trans: 3x3 np.array
46
+ transform matrix
47
+ @uv: Kx2 np.array
48
+ each row is a pair of coordinates (x, y)
49
+
50
+ Returns:
51
+ ----------
52
+ @xy: Kx2 np.array
53
+ each row is a pair of inverse-transformed coordinates (x, y)
54
+ """
55
+ Tinv = inv(trans)
56
+ xy = tformfwd(Tinv, uv)
57
+ return xy
58
+
59
+
60
+ def findNonreflectiveSimilarity(uv, xy, options=None):
61
+ options = {'K': 2}
62
+
63
+ K = options['K']
64
+ M = xy.shape[0]
65
+ x = xy[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
66
+ y = xy[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
67
+
68
+ tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
69
+ tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
70
+ X = np.vstack((tmp1, tmp2))
71
+
72
+ u = uv[:, 0].reshape((-1, 1)) # use reshape to keep a column vector
73
+ v = uv[:, 1].reshape((-1, 1)) # use reshape to keep a column vector
74
+ U = np.vstack((u, v))
75
+
76
+ # We know that X * r = U
77
+ if rank(X) >= 2 * K:
78
+ r, _, _, _ = lstsq(X, U, rcond=-1)
79
+ r = np.squeeze(r)
80
+ else:
81
+ raise Exception('cp2tform:twoUniquePointsReq')
82
+ sc = r[0]
83
+ ss = r[1]
84
+ tx = r[2]
85
+ ty = r[3]
86
+
87
+ Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
88
+ T = inv(Tinv)
89
+ T[:, 2] = np.array([0, 0, 1])
90
+
91
+ return T, Tinv
92
+
93
+
94
+ def findSimilarity(uv, xy, options=None):
95
+ options = {'K': 2}
96
+
97
+ # uv = np.array(uv)
98
+ # xy = np.array(xy)
99
+
100
+ # Solve for trans1
101
+ trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
102
+
103
+ # Solve for trans2
104
+
105
+ # manually reflect the xy data across the Y-axis
106
+ xyR = xy
107
+ xyR[:, 0] = -1 * xyR[:, 0]
108
+
109
+ trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
110
+
111
+ # manually reflect the tform to undo the reflection done on xyR
112
+ TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
113
+
114
+ trans2 = np.dot(trans2r, TreflectY)
115
+
116
+ # Figure out if trans1 or trans2 is better
117
+ xy1 = tformfwd(trans1, uv)
118
+ norm1 = norm(xy1 - xy)
119
+
120
+ xy2 = tformfwd(trans2, uv)
121
+ norm2 = norm(xy2 - xy)
122
+
123
+ if norm1 <= norm2:
124
+ return trans1, trans1_inv
125
+ else:
126
+ trans2_inv = inv(trans2)
127
+ return trans2, trans2_inv
128
+
129
+
130
+ def get_similarity_transform(src_pts, dst_pts, reflective=True):
131
+ """
132
+ Function:
133
+ ----------
134
+ Find Similarity Transform Matrix 'trans':
135
+ u = src_pts[:, 0]
136
+ v = src_pts[:, 1]
137
+ x = dst_pts[:, 0]
138
+ y = dst_pts[:, 1]
139
+ [x, y, 1] = [u, v, 1] * trans
140
+
141
+ Parameters:
142
+ ----------
143
+ @src_pts: Kx2 np.array
144
+ source points, each row is a pair of coordinates (x, y)
145
+ @dst_pts: Kx2 np.array
146
+ destination points, each row is a pair of transformed
147
+ coordinates (x, y)
148
+ @reflective: True or False
149
+ if True:
150
+ use reflective similarity transform
151
+ else:
152
+ use non-reflective similarity transform
153
+
154
+ Returns:
155
+ ----------
156
+ @trans: 3x3 np.array
157
+ transform matrix from uv to xy
158
+ trans_inv: 3x3 np.array
159
+ inverse of trans, transform matrix from xy to uv
160
+ """
161
+
162
+ if reflective:
163
+ trans, trans_inv = findSimilarity(src_pts, dst_pts)
164
+ else:
165
+ trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
166
+
167
+ return trans, trans_inv
168
+
169
+
170
+ def cvt_tform_mat_for_cv2(trans):
171
+ """
172
+ Function:
173
+ ----------
174
+ Convert Transform Matrix 'trans' into 'cv2_trans' which could be
175
+ directly used by cv2.warpAffine():
176
+ u = src_pts[:, 0]
177
+ v = src_pts[:, 1]
178
+ x = dst_pts[:, 0]
179
+ y = dst_pts[:, 1]
180
+ [x, y].T = cv_trans * [u, v, 1].T
181
+
182
+ Parameters:
183
+ ----------
184
+ @trans: 3x3 np.array
185
+ transform matrix from uv to xy
186
+
187
+ Returns:
188
+ ----------
189
+ @cv2_trans: 2x3 np.array
190
+ transform matrix from src_pts to dst_pts, could be directly used
191
+ for cv2.warpAffine()
192
+ """
193
+ cv2_trans = trans[:, 0:2].T
194
+
195
+ return cv2_trans
196
+
197
+
198
+ def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
199
+ """
200
+ Function:
201
+ ----------
202
+ Find Similarity Transform Matrix 'cv2_trans' which could be
203
+ directly used by cv2.warpAffine():
204
+ u = src_pts[:, 0]
205
+ v = src_pts[:, 1]
206
+ x = dst_pts[:, 0]
207
+ y = dst_pts[:, 1]
208
+ [x, y].T = cv_trans * [u, v, 1].T
209
+
210
+ Parameters:
211
+ ----------
212
+ @src_pts: Kx2 np.array
213
+ source points, each row is a pair of coordinates (x, y)
214
+ @dst_pts: Kx2 np.array
215
+ destination points, each row is a pair of transformed
216
+ coordinates (x, y)
217
+ reflective: True or False
218
+ if True:
219
+ use reflective similarity transform
220
+ else:
221
+ use non-reflective similarity transform
222
+
223
+ Returns:
224
+ ----------
225
+ @cv2_trans: 2x3 np.array
226
+ transform matrix from src_pts to dst_pts, could be directly used
227
+ for cv2.warpAffine()
228
+ """
229
+ trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
230
+ cv2_trans = cvt_tform_mat_for_cv2(trans)
231
+
232
+ return cv2_trans
233
+
234
+
235
+ if __name__ == '__main__':
236
+ """
237
+ u = [0, 6, -2]
238
+ v = [0, 3, 5]
239
+ x = [-1, 0, 4]
240
+ y = [-1, -10, 4]
241
+
242
+ # In Matlab, run:
243
+ #
244
+ # uv = [u'; v'];
245
+ # xy = [x'; y'];
246
+ # tform_sim=cp2tform(uv,xy,'similarity');
247
+ #
248
+ # trans = tform_sim.tdata.T
249
+ # ans =
250
+ # -0.0764 -1.6190 0
251
+ # 1.6190 -0.0764 0
252
+ # -3.2156 0.0290 1.0000
253
+ # trans_inv = tform_sim.tdata.Tinv
254
+ # ans =
255
+ #
256
+ # -0.0291 0.6163 0
257
+ # -0.6163 -0.0291 0
258
+ # -0.0756 1.9826 1.0000
259
+ # xy_m=tformfwd(tform_sim, u,v)
260
+ #
261
+ # xy_m =
262
+ #
263
+ # -3.2156 0.0290
264
+ # 1.1833 -9.9143
265
+ # 5.0323 2.8853
266
+ # uv_m=tforminv(tform_sim, x,y)
267
+ #
268
+ # uv_m =
269
+ #
270
+ # 0.5698 1.3953
271
+ # 6.0872 2.2733
272
+ # -2.6570 4.3314
273
+ """
274
+ u = [0, 6, -2]
275
+ v = [0, 3, 5]
276
+ x = [-1, 0, 4]
277
+ y = [-1, -10, 4]
278
+
279
+ uv = np.array((u, v)).T
280
+ xy = np.array((x, y)).T
281
+
282
+ print('\n--->uv:')
283
+ print(uv)
284
+ print('\n--->xy:')
285
+ print(xy)
286
+
287
+ trans, trans_inv = get_similarity_transform(uv, xy)
288
+
289
+ print('\n--->trans matrix:')
290
+ print(trans)
291
+
292
+ print('\n--->trans_inv matrix:')
293
+ print(trans_inv)
294
+
295
+ print('\n---> apply transform to uv')
296
+ print('\nxy_m = uv_augmented * trans')
297
+ uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
298
+ xy_m = np.dot(uv_aug, trans)
299
+ print(xy_m)
300
+
301
+ print('\nxy_m = tformfwd(trans, uv)')
302
+ xy_m = tformfwd(trans, uv)
303
+ print(xy_m)
304
+
305
+ print('\n---> apply inverse transform to xy')
306
+ print('\nuv_m = xy_augmented * trans_inv')
307
+ xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
308
+ uv_m = np.dot(xy_aug, trans_inv)
309
+ print(uv_m)
310
+
311
+ print('\nuv_m = tformfwd(trans_inv, xy)')
312
+ uv_m = tformfwd(trans_inv, xy)
313
+ print(uv_m)
314
+
315
+ uv_m = tforminv(trans, xy)
316
+ print('\nuv_m = tforminv(trans, xy)')
317
+ print(uv_m)
r_facelib/detection/retinaface/__pycache__/retinaface.cpython-311.pyc ADDED
Binary file (20.8 kB). View file
 
r_facelib/detection/retinaface/__pycache__/retinaface_net.cpython-311.pyc ADDED
Binary file (13 kB). View file
 
r_facelib/detection/retinaface/__pycache__/retinaface_utils.cpython-311.pyc ADDED
Binary file (26.8 kB). View file
 
r_facelib/detection/retinaface/retinaface.py ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ from PIL import Image
7
+ from torchvision.models._utils import IntermediateLayerGetter as IntermediateLayerGetter
8
+
9
+ from modules import shared
10
+
11
+ from r_facelib.detection.align_trans import get_reference_facial_points, warp_and_crop_face
12
+ from r_facelib.detection.retinaface.retinaface_net import FPN, SSH, MobileNetV1, make_bbox_head, make_class_head, make_landmark_head
13
+ from r_facelib.detection.retinaface.retinaface_utils import (PriorBox, batched_decode, batched_decode_landm, decode, decode_landm,
14
+ py_cpu_nms)
15
+
16
+ #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
+ if torch.cuda.is_available():
18
+ device = torch.device('cuda')
19
+ elif torch.backends.mps.is_available():
20
+ device = torch.device('mps')
21
+ # elif hasattr(torch,'dml'):
22
+ # device = torch.device('dml')
23
+ elif hasattr(torch,'dml') or hasattr(torch,'privateuseone'): # AMD
24
+ if shared.cmd_opts is not None: # A1111
25
+ if shared.cmd_opts.device_id is not None:
26
+ device = torch.device(f'privateuseone:{shared.cmd_opts.device_id}')
27
+ else:
28
+ device = torch.device('privateuseone:0')
29
+ else:
30
+ device = torch.device('privateuseone:0')
31
+ else:
32
+ device = torch.device('cpu')
33
+
34
+
35
+ def generate_config(network_name):
36
+
37
+ cfg_mnet = {
38
+ 'name': 'mobilenet0.25',
39
+ 'min_sizes': [[16, 32], [64, 128], [256, 512]],
40
+ 'steps': [8, 16, 32],
41
+ 'variance': [0.1, 0.2],
42
+ 'clip': False,
43
+ 'loc_weight': 2.0,
44
+ 'gpu_train': True,
45
+ 'batch_size': 32,
46
+ 'ngpu': 1,
47
+ 'epoch': 250,
48
+ 'decay1': 190,
49
+ 'decay2': 220,
50
+ 'image_size': 640,
51
+ 'return_layers': {
52
+ 'stage1': 1,
53
+ 'stage2': 2,
54
+ 'stage3': 3
55
+ },
56
+ 'in_channel': 32,
57
+ 'out_channel': 64
58
+ }
59
+
60
+ cfg_re50 = {
61
+ 'name': 'Resnet50',
62
+ 'min_sizes': [[16, 32], [64, 128], [256, 512]],
63
+ 'steps': [8, 16, 32],
64
+ 'variance': [0.1, 0.2],
65
+ 'clip': False,
66
+ 'loc_weight': 2.0,
67
+ 'gpu_train': True,
68
+ 'batch_size': 24,
69
+ 'ngpu': 4,
70
+ 'epoch': 100,
71
+ 'decay1': 70,
72
+ 'decay2': 90,
73
+ 'image_size': 840,
74
+ 'return_layers': {
75
+ 'layer2': 1,
76
+ 'layer3': 2,
77
+ 'layer4': 3
78
+ },
79
+ 'in_channel': 256,
80
+ 'out_channel': 256
81
+ }
82
+
83
+ if network_name == 'mobile0.25':
84
+ return cfg_mnet
85
+ elif network_name == 'resnet50':
86
+ return cfg_re50
87
+ else:
88
+ raise NotImplementedError(f'network_name={network_name}')
89
+
90
+
91
+ class RetinaFace(nn.Module):
92
+
93
+ def __init__(self, network_name='resnet50', half=False, phase='test'):
94
+ super(RetinaFace, self).__init__()
95
+ self.half_inference = half
96
+ cfg = generate_config(network_name)
97
+ self.backbone = cfg['name']
98
+
99
+ self.model_name = f'retinaface_{network_name}'
100
+ self.cfg = cfg
101
+ self.phase = phase
102
+ self.target_size, self.max_size = 1600, 2150
103
+ self.resize, self.scale, self.scale1 = 1., None, None
104
+ self.mean_tensor = torch.tensor([[[[104.]], [[117.]], [[123.]]]]).to(device)
105
+ self.reference = get_reference_facial_points(default_square=True)
106
+ # Build network.
107
+ backbone = None
108
+ if cfg['name'] == 'mobilenet0.25':
109
+ backbone = MobileNetV1()
110
+ self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
111
+ elif cfg['name'] == 'Resnet50':
112
+ import torchvision.models as models
113
+ backbone = models.resnet50(pretrained=False)
114
+ self.body = IntermediateLayerGetter(backbone, cfg['return_layers'])
115
+
116
+ in_channels_stage2 = cfg['in_channel']
117
+ in_channels_list = [
118
+ in_channels_stage2 * 2,
119
+ in_channels_stage2 * 4,
120
+ in_channels_stage2 * 8,
121
+ ]
122
+
123
+ out_channels = cfg['out_channel']
124
+ self.fpn = FPN(in_channels_list, out_channels)
125
+ self.ssh1 = SSH(out_channels, out_channels)
126
+ self.ssh2 = SSH(out_channels, out_channels)
127
+ self.ssh3 = SSH(out_channels, out_channels)
128
+
129
+ self.ClassHead = make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
130
+ self.BboxHead = make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
131
+ self.LandmarkHead = make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
132
+
133
+ self.to(device)
134
+ self.eval()
135
+ if self.half_inference:
136
+ self.half()
137
+
138
+ def forward(self, inputs):
139
+ self.to(device)
140
+ out = self.body(inputs)
141
+
142
+ if self.backbone == 'mobilenet0.25' or self.backbone == 'Resnet50':
143
+ out = list(out.values())
144
+ # FPN
145
+ fpn = self.fpn(out)
146
+
147
+ # SSH
148
+ feature1 = self.ssh1(fpn[0])
149
+ feature2 = self.ssh2(fpn[1])
150
+ feature3 = self.ssh3(fpn[2])
151
+ features = [feature1, feature2, feature3]
152
+
153
+ bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
154
+ classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1)
155
+ tmp = [self.LandmarkHead[i](feature) for i, feature in enumerate(features)]
156
+ ldm_regressions = (torch.cat(tmp, dim=1))
157
+
158
+ if self.phase == 'train':
159
+ output = (bbox_regressions, classifications, ldm_regressions)
160
+ else:
161
+ output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
162
+ return output
163
+
164
+ def __detect_faces(self, inputs):
165
+ # get scale
166
+ height, width = inputs.shape[2:]
167
+ self.scale = torch.tensor([width, height, width, height], dtype=torch.float32).to(device)
168
+ tmp = [width, height, width, height, width, height, width, height, width, height]
169
+ self.scale1 = torch.tensor(tmp, dtype=torch.float32).to(device)
170
+
171
+ # forawrd
172
+ inputs = inputs.to(device)
173
+ if self.half_inference:
174
+ inputs = inputs.half()
175
+ loc, conf, landmarks = self(inputs)
176
+
177
+ # get priorbox
178
+ priorbox = PriorBox(self.cfg, image_size=inputs.shape[2:])
179
+ priors = priorbox.forward().to(device)
180
+
181
+ return loc, conf, landmarks, priors
182
+
183
+ # single image detection
184
+ def transform(self, image, use_origin_size):
185
+ # convert to opencv format
186
+ if isinstance(image, Image.Image):
187
+ image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
188
+ image = image.astype(np.float32)
189
+
190
+ # testing scale
191
+ im_size_min = np.min(image.shape[0:2])
192
+ im_size_max = np.max(image.shape[0:2])
193
+ resize = float(self.target_size) / float(im_size_min)
194
+
195
+ # prevent bigger axis from being more than max_size
196
+ if np.round(resize * im_size_max) > self.max_size:
197
+ resize = float(self.max_size) / float(im_size_max)
198
+ resize = 1 if use_origin_size else resize
199
+
200
+ # resize
201
+ if resize != 1:
202
+ image = cv2.resize(image, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
203
+
204
+ # convert to torch.tensor format
205
+ # image -= (104, 117, 123)
206
+ image = image.transpose(2, 0, 1)
207
+ image = torch.from_numpy(image).unsqueeze(0)
208
+
209
+ return image, resize
210
+
211
+ def detect_faces(
212
+ self,
213
+ image,
214
+ conf_threshold=0.8,
215
+ nms_threshold=0.4,
216
+ use_origin_size=True,
217
+ ):
218
+ """
219
+ Params:
220
+ imgs: BGR image
221
+ """
222
+ image, self.resize = self.transform(image, use_origin_size)
223
+ image = image.to(device)
224
+ if self.half_inference:
225
+ image = image.half()
226
+ image = image - self.mean_tensor
227
+
228
+ loc, conf, landmarks, priors = self.__detect_faces(image)
229
+
230
+ boxes = decode(loc.data.squeeze(0), priors.data, self.cfg['variance'])
231
+ boxes = boxes * self.scale / self.resize
232
+ boxes = boxes.cpu().numpy()
233
+
234
+ scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
235
+
236
+ landmarks = decode_landm(landmarks.squeeze(0), priors, self.cfg['variance'])
237
+ landmarks = landmarks * self.scale1 / self.resize
238
+ landmarks = landmarks.cpu().numpy()
239
+
240
+ # ignore low scores
241
+ inds = np.where(scores > conf_threshold)[0]
242
+ boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]
243
+
244
+ # sort
245
+ order = scores.argsort()[::-1]
246
+ boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
247
+
248
+ # do NMS
249
+ bounding_boxes = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
250
+ keep = py_cpu_nms(bounding_boxes, nms_threshold)
251
+ bounding_boxes, landmarks = bounding_boxes[keep, :], landmarks[keep]
252
+ # self.t['forward_pass'].toc()
253
+ # print(self.t['forward_pass'].average_time)
254
+ # import sys
255
+ # sys.stdout.flush()
256
+ return np.concatenate((bounding_boxes, landmarks), axis=1)
257
+
258
+ def __align_multi(self, image, boxes, landmarks, limit=None):
259
+
260
+ if len(boxes) < 1:
261
+ return [], []
262
+
263
+ if limit:
264
+ boxes = boxes[:limit]
265
+ landmarks = landmarks[:limit]
266
+
267
+ faces = []
268
+ for landmark in landmarks:
269
+ facial5points = [[landmark[2 * j], landmark[2 * j + 1]] for j in range(5)]
270
+
271
+ warped_face = warp_and_crop_face(np.array(image), facial5points, self.reference, crop_size=(112, 112))
272
+ faces.append(warped_face)
273
+
274
+ return np.concatenate((boxes, landmarks), axis=1), faces
275
+
276
+ def align_multi(self, img, conf_threshold=0.8, limit=None):
277
+
278
+ rlt = self.detect_faces(img, conf_threshold=conf_threshold)
279
+ boxes, landmarks = rlt[:, 0:5], rlt[:, 5:]
280
+
281
+ return self.__align_multi(img, boxes, landmarks, limit)
282
+
283
+ # batched detection
284
+ def batched_transform(self, frames, use_origin_size):
285
+ """
286
+ Arguments:
287
+ frames: a list of PIL.Image, or torch.Tensor(shape=[n, h, w, c],
288
+ type=np.float32, BGR format).
289
+ use_origin_size: whether to use origin size.
290
+ """
291
+ from_PIL = True if isinstance(frames[0], Image.Image) else False
292
+
293
+ # convert to opencv format
294
+ if from_PIL:
295
+ frames = [cv2.cvtColor(np.asarray(frame), cv2.COLOR_RGB2BGR) for frame in frames]
296
+ frames = np.asarray(frames, dtype=np.float32)
297
+
298
+ # testing scale
299
+ im_size_min = np.min(frames[0].shape[0:2])
300
+ im_size_max = np.max(frames[0].shape[0:2])
301
+ resize = float(self.target_size) / float(im_size_min)
302
+
303
+ # prevent bigger axis from being more than max_size
304
+ if np.round(resize * im_size_max) > self.max_size:
305
+ resize = float(self.max_size) / float(im_size_max)
306
+ resize = 1 if use_origin_size else resize
307
+
308
+ # resize
309
+ if resize != 1:
310
+ if not from_PIL:
311
+ frames = F.interpolate(frames, scale_factor=resize)
312
+ else:
313
+ frames = [
314
+ cv2.resize(frame, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
315
+ for frame in frames
316
+ ]
317
+
318
+ # convert to torch.tensor format
319
+ if not from_PIL:
320
+ frames = frames.transpose(1, 2).transpose(1, 3).contiguous()
321
+ else:
322
+ frames = frames.transpose((0, 3, 1, 2))
323
+ frames = torch.from_numpy(frames)
324
+
325
+ return frames, resize
326
+
327
+ def batched_detect_faces(self, frames, conf_threshold=0.8, nms_threshold=0.4, use_origin_size=True):
328
+ """
329
+ Arguments:
330
+ frames: a list of PIL.Image, or np.array(shape=[n, h, w, c],
331
+ type=np.uint8, BGR format).
332
+ conf_threshold: confidence threshold.
333
+ nms_threshold: nms threshold.
334
+ use_origin_size: whether to use origin size.
335
+ Returns:
336
+ final_bounding_boxes: list of np.array ([n_boxes, 5],
337
+ type=np.float32).
338
+ final_landmarks: list of np.array ([n_boxes, 10], type=np.float32).
339
+ """
340
+ # self.t['forward_pass'].tic()
341
+ frames, self.resize = self.batched_transform(frames, use_origin_size)
342
+ frames = frames.to(device)
343
+ frames = frames - self.mean_tensor
344
+
345
+ b_loc, b_conf, b_landmarks, priors = self.__detect_faces(frames)
346
+
347
+ final_bounding_boxes, final_landmarks = [], []
348
+
349
+ # decode
350
+ priors = priors.unsqueeze(0)
351
+ b_loc = batched_decode(b_loc, priors, self.cfg['variance']) * self.scale / self.resize
352
+ b_landmarks = batched_decode_landm(b_landmarks, priors, self.cfg['variance']) * self.scale1 / self.resize
353
+ b_conf = b_conf[:, :, 1]
354
+
355
+ # index for selection
356
+ b_indice = b_conf > conf_threshold
357
+
358
+ # concat
359
+ b_loc_and_conf = torch.cat((b_loc, b_conf.unsqueeze(-1)), dim=2).float()
360
+
361
+ for pred, landm, inds in zip(b_loc_and_conf, b_landmarks, b_indice):
362
+
363
+ # ignore low scores
364
+ pred, landm = pred[inds, :], landm[inds, :]
365
+ if pred.shape[0] == 0:
366
+ final_bounding_boxes.append(np.array([], dtype=np.float32))
367
+ final_landmarks.append(np.array([], dtype=np.float32))
368
+ continue
369
+
370
+ # sort
371
+ # order = score.argsort(descending=True)
372
+ # box, landm, score = box[order], landm[order], score[order]
373
+
374
+ # to CPU
375
+ bounding_boxes, landm = pred.cpu().numpy(), landm.cpu().numpy()
376
+
377
+ # NMS
378
+ keep = py_cpu_nms(bounding_boxes, nms_threshold)
379
+ bounding_boxes, landmarks = bounding_boxes[keep, :], landm[keep]
380
+
381
+ # append
382
+ final_bounding_boxes.append(bounding_boxes)
383
+ final_landmarks.append(landmarks)
384
+ # self.t['forward_pass'].toc(average=True)
385
+ # self.batch_time += self.t['forward_pass'].diff
386
+ # self.total_frame += len(frames)
387
+ # print(self.batch_time / self.total_frame)
388
+
389
+ return final_bounding_boxes, final_landmarks
r_facelib/detection/retinaface/retinaface_net.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ def conv_bn(inp, oup, stride=1, leaky=0):
7
+ return nn.Sequential(
8
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup),
9
+ nn.LeakyReLU(negative_slope=leaky, inplace=True))
10
+
11
+
12
+ def conv_bn_no_relu(inp, oup, stride):
13
+ return nn.Sequential(
14
+ nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
15
+ nn.BatchNorm2d(oup),
16
+ )
17
+
18
+
19
+ def conv_bn1X1(inp, oup, stride, leaky=0):
20
+ return nn.Sequential(
21
+ nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), nn.BatchNorm2d(oup),
22
+ nn.LeakyReLU(negative_slope=leaky, inplace=True))
23
+
24
+
25
+ def conv_dw(inp, oup, stride, leaky=0.1):
26
+ return nn.Sequential(
27
+ nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
28
+ nn.BatchNorm2d(inp),
29
+ nn.LeakyReLU(negative_slope=leaky, inplace=True),
30
+ nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
31
+ nn.BatchNorm2d(oup),
32
+ nn.LeakyReLU(negative_slope=leaky, inplace=True),
33
+ )
34
+
35
+
36
+ class SSH(nn.Module):
37
+
38
+ def __init__(self, in_channel, out_channel):
39
+ super(SSH, self).__init__()
40
+ assert out_channel % 4 == 0
41
+ leaky = 0
42
+ if (out_channel <= 64):
43
+ leaky = 0.1
44
+ self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
45
+
46
+ self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
47
+ self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
48
+
49
+ self.conv7X7_2 = conv_bn(out_channel // 4, out_channel // 4, stride=1, leaky=leaky)
50
+ self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
51
+
52
+ def forward(self, input):
53
+ conv3X3 = self.conv3X3(input)
54
+
55
+ conv5X5_1 = self.conv5X5_1(input)
56
+ conv5X5 = self.conv5X5_2(conv5X5_1)
57
+
58
+ conv7X7_2 = self.conv7X7_2(conv5X5_1)
59
+ conv7X7 = self.conv7x7_3(conv7X7_2)
60
+
61
+ out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
62
+ out = F.relu(out)
63
+ return out
64
+
65
+
66
+ class FPN(nn.Module):
67
+
68
+ def __init__(self, in_channels_list, out_channels):
69
+ super(FPN, self).__init__()
70
+ leaky = 0
71
+ if (out_channels <= 64):
72
+ leaky = 0.1
73
+ self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride=1, leaky=leaky)
74
+ self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride=1, leaky=leaky)
75
+ self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride=1, leaky=leaky)
76
+
77
+ self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
78
+ self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
79
+
80
+ def forward(self, input):
81
+ # names = list(input.keys())
82
+ # input = list(input.values())
83
+
84
+ output1 = self.output1(input[0])
85
+ output2 = self.output2(input[1])
86
+ output3 = self.output3(input[2])
87
+
88
+ up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode='nearest')
89
+ output2 = output2 + up3
90
+ output2 = self.merge2(output2)
91
+
92
+ up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode='nearest')
93
+ output1 = output1 + up2
94
+ output1 = self.merge1(output1)
95
+
96
+ out = [output1, output2, output3]
97
+ return out
98
+
99
+
100
+ class MobileNetV1(nn.Module):
101
+
102
+ def __init__(self):
103
+ super(MobileNetV1, self).__init__()
104
+ self.stage1 = nn.Sequential(
105
+ conv_bn(3, 8, 2, leaky=0.1), # 3
106
+ conv_dw(8, 16, 1), # 7
107
+ conv_dw(16, 32, 2), # 11
108
+ conv_dw(32, 32, 1), # 19
109
+ conv_dw(32, 64, 2), # 27
110
+ conv_dw(64, 64, 1), # 43
111
+ )
112
+ self.stage2 = nn.Sequential(
113
+ conv_dw(64, 128, 2), # 43 + 16 = 59
114
+ conv_dw(128, 128, 1), # 59 + 32 = 91
115
+ conv_dw(128, 128, 1), # 91 + 32 = 123
116
+ conv_dw(128, 128, 1), # 123 + 32 = 155
117
+ conv_dw(128, 128, 1), # 155 + 32 = 187
118
+ conv_dw(128, 128, 1), # 187 + 32 = 219
119
+ )
120
+ self.stage3 = nn.Sequential(
121
+ conv_dw(128, 256, 2), # 219 +3 2 = 241
122
+ conv_dw(256, 256, 1), # 241 + 64 = 301
123
+ )
124
+ self.avg = nn.AdaptiveAvgPool2d((1, 1))
125
+ self.fc = nn.Linear(256, 1000)
126
+
127
+ def forward(self, x):
128
+ x = self.stage1(x)
129
+ x = self.stage2(x)
130
+ x = self.stage3(x)
131
+ x = self.avg(x)
132
+ # x = self.model(x)
133
+ x = x.view(-1, 256)
134
+ x = self.fc(x)
135
+ return x
136
+
137
+
138
+ class ClassHead(nn.Module):
139
+
140
+ def __init__(self, inchannels=512, num_anchors=3):
141
+ super(ClassHead, self).__init__()
142
+ self.num_anchors = num_anchors
143
+ self.conv1x1 = nn.Conv2d(inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0)
144
+
145
+ def forward(self, x):
146
+ out = self.conv1x1(x)
147
+ out = out.permute(0, 2, 3, 1).contiguous()
148
+
149
+ return out.view(out.shape[0], -1, 2)
150
+
151
+
152
+ class BboxHead(nn.Module):
153
+
154
+ def __init__(self, inchannels=512, num_anchors=3):
155
+ super(BboxHead, self).__init__()
156
+ self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0)
157
+
158
+ def forward(self, x):
159
+ out = self.conv1x1(x)
160
+ out = out.permute(0, 2, 3, 1).contiguous()
161
+
162
+ return out.view(out.shape[0], -1, 4)
163
+
164
+
165
+ class LandmarkHead(nn.Module):
166
+
167
+ def __init__(self, inchannels=512, num_anchors=3):
168
+ super(LandmarkHead, self).__init__()
169
+ self.conv1x1 = nn.Conv2d(inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0)
170
+
171
+ def forward(self, x):
172
+ out = self.conv1x1(x)
173
+ out = out.permute(0, 2, 3, 1).contiguous()
174
+
175
+ return out.view(out.shape[0], -1, 10)
176
+
177
+
178
+ def make_class_head(fpn_num=3, inchannels=64, anchor_num=2):
179
+ classhead = nn.ModuleList()
180
+ for i in range(fpn_num):
181
+ classhead.append(ClassHead(inchannels, anchor_num))
182
+ return classhead
183
+
184
+
185
+ def make_bbox_head(fpn_num=3, inchannels=64, anchor_num=2):
186
+ bboxhead = nn.ModuleList()
187
+ for i in range(fpn_num):
188
+ bboxhead.append(BboxHead(inchannels, anchor_num))
189
+ return bboxhead
190
+
191
+
192
+ def make_landmark_head(fpn_num=3, inchannels=64, anchor_num=2):
193
+ landmarkhead = nn.ModuleList()
194
+ for i in range(fpn_num):
195
+ landmarkhead.append(LandmarkHead(inchannels, anchor_num))
196
+ return landmarkhead
r_facelib/detection/retinaface/retinaface_utils.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torchvision
4
+ from itertools import product as product
5
+ from math import ceil
6
+
7
+
8
+ class PriorBox(object):
9
+
10
+ def __init__(self, cfg, image_size=None, phase='train'):
11
+ super(PriorBox, self).__init__()
12
+ self.min_sizes = cfg['min_sizes']
13
+ self.steps = cfg['steps']
14
+ self.clip = cfg['clip']
15
+ self.image_size = image_size
16
+ self.feature_maps = [[ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] for step in self.steps]
17
+ self.name = 's'
18
+
19
+ def forward(self):
20
+ anchors = []
21
+ for k, f in enumerate(self.feature_maps):
22
+ min_sizes = self.min_sizes[k]
23
+ for i, j in product(range(f[0]), range(f[1])):
24
+ for min_size in min_sizes:
25
+ s_kx = min_size / self.image_size[1]
26
+ s_ky = min_size / self.image_size[0]
27
+ dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
28
+ dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
29
+ for cy, cx in product(dense_cy, dense_cx):
30
+ anchors += [cx, cy, s_kx, s_ky]
31
+
32
+ # back to torch land
33
+ output = torch.Tensor(anchors).view(-1, 4)
34
+ if self.clip:
35
+ output.clamp_(max=1, min=0)
36
+ return output
37
+
38
+
39
+ def py_cpu_nms(dets, thresh):
40
+ """Pure Python NMS baseline."""
41
+ keep = torchvision.ops.nms(
42
+ boxes=torch.Tensor(dets[:, :4]),
43
+ scores=torch.Tensor(dets[:, 4]),
44
+ iou_threshold=thresh,
45
+ )
46
+
47
+ return list(keep)
48
+
49
+
50
+ def point_form(boxes):
51
+ """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
52
+ representation for comparison to point form ground truth data.
53
+ Args:
54
+ boxes: (tensor) center-size default boxes from priorbox layers.
55
+ Return:
56
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
57
+ """
58
+ return torch.cat(
59
+ (
60
+ boxes[:, :2] - boxes[:, 2:] / 2, # xmin, ymin
61
+ boxes[:, :2] + boxes[:, 2:] / 2),
62
+ 1) # xmax, ymax
63
+
64
+
65
+ def center_size(boxes):
66
+ """ Convert prior_boxes to (cx, cy, w, h)
67
+ representation for comparison to center-size form ground truth data.
68
+ Args:
69
+ boxes: (tensor) point_form boxes
70
+ Return:
71
+ boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
72
+ """
73
+ return torch.cat(
74
+ (boxes[:, 2:] + boxes[:, :2]) / 2, # cx, cy
75
+ boxes[:, 2:] - boxes[:, :2],
76
+ 1) # w, h
77
+
78
+
79
+ def intersect(box_a, box_b):
80
+ """ We resize both tensors to [A,B,2] without new malloc:
81
+ [A,2] -> [A,1,2] -> [A,B,2]
82
+ [B,2] -> [1,B,2] -> [A,B,2]
83
+ Then we compute the area of intersect between box_a and box_b.
84
+ Args:
85
+ box_a: (tensor) bounding boxes, Shape: [A,4].
86
+ box_b: (tensor) bounding boxes, Shape: [B,4].
87
+ Return:
88
+ (tensor) intersection area, Shape: [A,B].
89
+ """
90
+ A = box_a.size(0)
91
+ B = box_b.size(0)
92
+ max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
93
+ min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
94
+ inter = torch.clamp((max_xy - min_xy), min=0)
95
+ return inter[:, :, 0] * inter[:, :, 1]
96
+
97
+
98
+ def jaccard(box_a, box_b):
99
+ """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
100
+ is simply the intersection over union of two boxes. Here we operate on
101
+ ground truth boxes and default boxes.
102
+ E.g.:
103
+ A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
104
+ Args:
105
+ box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
106
+ box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
107
+ Return:
108
+ jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
109
+ """
110
+ inter = intersect(box_a, box_b)
111
+ area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
112
+ area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
113
+ union = area_a + area_b - inter
114
+ return inter / union # [A,B]
115
+
116
+
117
+ def matrix_iou(a, b):
118
+ """
119
+ return iou of a and b, numpy version for data augenmentation
120
+ """
121
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
122
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
123
+
124
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
125
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
126
+ area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
127
+ return area_i / (area_a[:, np.newaxis] + area_b - area_i)
128
+
129
+
130
+ def matrix_iof(a, b):
131
+ """
132
+ return iof of a and b, numpy version for data augenmentation
133
+ """
134
+ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
135
+ rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
136
+
137
+ area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
138
+ area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
139
+ return area_i / np.maximum(area_a[:, np.newaxis], 1)
140
+
141
+
142
+ def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
143
+ """Match each prior box with the ground truth box of the highest jaccard
144
+ overlap, encode the bounding boxes, then return the matched indices
145
+ corresponding to both confidence and location preds.
146
+ Args:
147
+ threshold: (float) The overlap threshold used when matching boxes.
148
+ truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
149
+ priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
150
+ variances: (tensor) Variances corresponding to each prior coord,
151
+ Shape: [num_priors, 4].
152
+ labels: (tensor) All the class labels for the image, Shape: [num_obj].
153
+ landms: (tensor) Ground truth landms, Shape [num_obj, 10].
154
+ loc_t: (tensor) Tensor to be filled w/ encoded location targets.
155
+ conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
156
+ landm_t: (tensor) Tensor to be filled w/ encoded landm targets.
157
+ idx: (int) current batch index
158
+ Return:
159
+ The matched indices corresponding to 1)location 2)confidence
160
+ 3)landm preds.
161
+ """
162
+ # jaccard index
163
+ overlaps = jaccard(truths, point_form(priors))
164
+ # (Bipartite Matching)
165
+ # [1,num_objects] best prior for each ground truth
166
+ best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
167
+
168
+ # ignore hard gt
169
+ valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
170
+ best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
171
+ if best_prior_idx_filter.shape[0] <= 0:
172
+ loc_t[idx] = 0
173
+ conf_t[idx] = 0
174
+ return
175
+
176
+ # [1,num_priors] best ground truth for each prior
177
+ best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
178
+ best_truth_idx.squeeze_(0)
179
+ best_truth_overlap.squeeze_(0)
180
+ best_prior_idx.squeeze_(1)
181
+ best_prior_idx_filter.squeeze_(1)
182
+ best_prior_overlap.squeeze_(1)
183
+ best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2) # ensure best prior
184
+ # TODO refactor: index best_prior_idx with long tensor
185
+ # ensure every gt matches with its prior of max overlap
186
+ for j in range(best_prior_idx.size(0)): # 判别此anchor是预测哪一个boxes
187
+ best_truth_idx[best_prior_idx[j]] = j
188
+ matches = truths[best_truth_idx] # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
189
+ conf = labels[best_truth_idx] # Shape: [num_priors] 此处为每一个anchor对应的label取出来
190
+ conf[best_truth_overlap < threshold] = 0 # label as background overlap<0.35的全部作为负样本
191
+ loc = encode(matches, priors, variances)
192
+
193
+ matches_landm = landms[best_truth_idx]
194
+ landm = encode_landm(matches_landm, priors, variances)
195
+ loc_t[idx] = loc # [num_priors,4] encoded offsets to learn
196
+ conf_t[idx] = conf # [num_priors] top class label for each prior
197
+ landm_t[idx] = landm
198
+
199
+
200
+ def encode(matched, priors, variances):
201
+ """Encode the variances from the priorbox layers into the ground truth boxes
202
+ we have matched (based on jaccard overlap) with the prior boxes.
203
+ Args:
204
+ matched: (tensor) Coords of ground truth for each prior in point-form
205
+ Shape: [num_priors, 4].
206
+ priors: (tensor) Prior boxes in center-offset form
207
+ Shape: [num_priors,4].
208
+ variances: (list[float]) Variances of priorboxes
209
+ Return:
210
+ encoded boxes (tensor), Shape: [num_priors, 4]
211
+ """
212
+
213
+ # dist b/t match center and prior's center
214
+ g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
215
+ # encode variance
216
+ g_cxcy /= (variances[0] * priors[:, 2:])
217
+ # match wh / prior wh
218
+ g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
219
+ g_wh = torch.log(g_wh) / variances[1]
220
+ # return target for smooth_l1_loss
221
+ return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
222
+
223
+
224
+ def encode_landm(matched, priors, variances):
225
+ """Encode the variances from the priorbox layers into the ground truth boxes
226
+ we have matched (based on jaccard overlap) with the prior boxes.
227
+ Args:
228
+ matched: (tensor) Coords of ground truth for each prior in point-form
229
+ Shape: [num_priors, 10].
230
+ priors: (tensor) Prior boxes in center-offset form
231
+ Shape: [num_priors,4].
232
+ variances: (list[float]) Variances of priorboxes
233
+ Return:
234
+ encoded landm (tensor), Shape: [num_priors, 10]
235
+ """
236
+
237
+ # dist b/t match center and prior's center
238
+ matched = torch.reshape(matched, (matched.size(0), 5, 2))
239
+ priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
240
+ priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
241
+ priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
242
+ priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
243
+ priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
244
+ g_cxcy = matched[:, :, :2] - priors[:, :, :2]
245
+ # encode variance
246
+ g_cxcy /= (variances[0] * priors[:, :, 2:])
247
+ # g_cxcy /= priors[:, :, 2:]
248
+ g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
249
+ # return target for smooth_l1_loss
250
+ return g_cxcy
251
+
252
+
253
+ # Adapted from https://github.com/Hakuyume/chainer-ssd
254
+ def decode(loc, priors, variances):
255
+ """Decode locations from predictions using priors to undo
256
+ the encoding we did for offset regression at train time.
257
+ Args:
258
+ loc (tensor): location predictions for loc layers,
259
+ Shape: [num_priors,4]
260
+ priors (tensor): Prior boxes in center-offset form.
261
+ Shape: [num_priors,4].
262
+ variances: (list[float]) Variances of priorboxes
263
+ Return:
264
+ decoded bounding box predictions
265
+ """
266
+
267
+ boxes = torch.cat((priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
268
+ priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
269
+ boxes[:, :2] -= boxes[:, 2:] / 2
270
+ boxes[:, 2:] += boxes[:, :2]
271
+ return boxes
272
+
273
+
274
+ def decode_landm(pre, priors, variances):
275
+ """Decode landm from predictions using priors to undo
276
+ the encoding we did for offset regression at train time.
277
+ Args:
278
+ pre (tensor): landm predictions for loc layers,
279
+ Shape: [num_priors,10]
280
+ priors (tensor): Prior boxes in center-offset form.
281
+ Shape: [num_priors,4].
282
+ variances: (list[float]) Variances of priorboxes
283
+ Return:
284
+ decoded landm predictions
285
+ """
286
+ tmp = (
287
+ priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
288
+ priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
289
+ priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
290
+ priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
291
+ priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
292
+ )
293
+ landms = torch.cat(tmp, dim=1)
294
+ return landms
295
+
296
+
297
+ def batched_decode(b_loc, priors, variances):
298
+ """Decode locations from predictions using priors to undo
299
+ the encoding we did for offset regression at train time.
300
+ Args:
301
+ b_loc (tensor): location predictions for loc layers,
302
+ Shape: [num_batches,num_priors,4]
303
+ priors (tensor): Prior boxes in center-offset form.
304
+ Shape: [1,num_priors,4].
305
+ variances: (list[float]) Variances of priorboxes
306
+ Return:
307
+ decoded bounding box predictions
308
+ """
309
+ boxes = (
310
+ priors[:, :, :2] + b_loc[:, :, :2] * variances[0] * priors[:, :, 2:],
311
+ priors[:, :, 2:] * torch.exp(b_loc[:, :, 2:] * variances[1]),
312
+ )
313
+ boxes = torch.cat(boxes, dim=2)
314
+
315
+ boxes[:, :, :2] -= boxes[:, :, 2:] / 2
316
+ boxes[:, :, 2:] += boxes[:, :, :2]
317
+ return boxes
318
+
319
+
320
+ def batched_decode_landm(pre, priors, variances):
321
+ """Decode landm from predictions using priors to undo
322
+ the encoding we did for offset regression at train time.
323
+ Args:
324
+ pre (tensor): landm predictions for loc layers,
325
+ Shape: [num_batches,num_priors,10]
326
+ priors (tensor): Prior boxes in center-offset form.
327
+ Shape: [1,num_priors,4].
328
+ variances: (list[float]) Variances of priorboxes
329
+ Return:
330
+ decoded landm predictions
331
+ """
332
+ landms = (
333
+ priors[:, :, :2] + pre[:, :, :2] * variances[0] * priors[:, :, 2:],
334
+ priors[:, :, :2] + pre[:, :, 2:4] * variances[0] * priors[:, :, 2:],
335
+ priors[:, :, :2] + pre[:, :, 4:6] * variances[0] * priors[:, :, 2:],
336
+ priors[:, :, :2] + pre[:, :, 6:8] * variances[0] * priors[:, :, 2:],
337
+ priors[:, :, :2] + pre[:, :, 8:10] * variances[0] * priors[:, :, 2:],
338
+ )
339
+ landms = torch.cat(landms, dim=2)
340
+ return landms
341
+
342
+
343
+ def log_sum_exp(x):
344
+ """Utility function for computing log_sum_exp while determining
345
+ This will be used to determine unaveraged confidence loss across
346
+ all examples in a batch.
347
+ Args:
348
+ x (Variable(tensor)): conf_preds from conf layers
349
+ """
350
+ x_max = x.data.max()
351
+ return torch.log(torch.sum(torch.exp(x - x_max), 1, keepdim=True)) + x_max
352
+
353
+
354
+ # Original author: Francisco Massa:
355
+ # https://github.com/fmassa/object-detection.torch
356
+ # Ported to PyTorch by Max deGroot (02/01/2017)
357
+ def nms(boxes, scores, overlap=0.5, top_k=200):
358
+ """Apply non-maximum suppression at test time to avoid detecting too many
359
+ overlapping bounding boxes for a given object.
360
+ Args:
361
+ boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
362
+ scores: (tensor) The class predscores for the img, Shape:[num_priors].
363
+ overlap: (float) The overlap thresh for suppressing unnecessary boxes.
364
+ top_k: (int) The Maximum number of box preds to consider.
365
+ Return:
366
+ The indices of the kept boxes with respect to num_priors.
367
+ """
368
+
369
+ keep = torch.Tensor(scores.size(0)).fill_(0).long()
370
+ if boxes.numel() == 0:
371
+ return keep
372
+ x1 = boxes[:, 0]
373
+ y1 = boxes[:, 1]
374
+ x2 = boxes[:, 2]
375
+ y2 = boxes[:, 3]
376
+ area = torch.mul(x2 - x1, y2 - y1)
377
+ v, idx = scores.sort(0) # sort in ascending order
378
+ # I = I[v >= 0.01]
379
+ idx = idx[-top_k:] # indices of the top-k largest vals
380
+ xx1 = boxes.new()
381
+ yy1 = boxes.new()
382
+ xx2 = boxes.new()
383
+ yy2 = boxes.new()
384
+ w = boxes.new()
385
+ h = boxes.new()
386
+
387
+ # keep = torch.Tensor()
388
+ count = 0
389
+ while idx.numel() > 0:
390
+ i = idx[-1] # index of current largest val
391
+ # keep.append(i)
392
+ keep[count] = i
393
+ count += 1
394
+ if idx.size(0) == 1:
395
+ break
396
+ idx = idx[:-1] # remove kept element from view
397
+ # load bboxes of next highest vals
398
+ torch.index_select(x1, 0, idx, out=xx1)
399
+ torch.index_select(y1, 0, idx, out=yy1)
400
+ torch.index_select(x2, 0, idx, out=xx2)
401
+ torch.index_select(y2, 0, idx, out=yy2)
402
+ # store element-wise max with next highest score
403
+ xx1 = torch.clamp(xx1, min=x1[i])
404
+ yy1 = torch.clamp(yy1, min=y1[i])
405
+ xx2 = torch.clamp(xx2, max=x2[i])
406
+ yy2 = torch.clamp(yy2, max=y2[i])
407
+ w.resize_as_(xx2)
408
+ h.resize_as_(yy2)
409
+ w = xx2 - xx1
410
+ h = yy2 - yy1
411
+ # check sizes of xx1 and xx2.. after each iteration
412
+ w = torch.clamp(w, min=0.0)
413
+ h = torch.clamp(h, min=0.0)
414
+ inter = w * h
415
+ # IoU = i / (area(a) + area(b) - i)
416
+ rem_areas = torch.index_select(area, 0, idx) # load remaining areas)
417
+ union = (rem_areas - inter) + area[i]
418
+ IoU = inter / union # store result in iou
419
+ # keep only elements with an IoU <= overlap
420
+ idx = idx[IoU.le(overlap)]
421
+ return keep, count
r_facelib/detection/yolov5face/__init__.py ADDED
File without changes
r_facelib/detection/yolov5face/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (289 Bytes). View file
 
r_facelib/detection/yolov5face/__pycache__/face_detector.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
r_facelib/detection/yolov5face/face_detector.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ from pathlib import Path
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+ from torch import torch_version
8
+
9
+ from r_facelib.detection.yolov5face.models.common import Conv
10
+ from r_facelib.detection.yolov5face.models.yolo import Model
11
+ from r_facelib.detection.yolov5face.utils.datasets import letterbox
12
+ from r_facelib.detection.yolov5face.utils.general import (
13
+ check_img_size,
14
+ non_max_suppression_face,
15
+ scale_coords,
16
+ scale_coords_landmarks,
17
+ )
18
+
19
+ print(f"Torch version: {torch.__version__}")
20
+ IS_HIGH_VERSION = torch_version.__version__ >= "1.9.0"
21
+
22
+ def isListempty(inList):
23
+ if isinstance(inList, list): # Is a list
24
+ return all(map(isListempty, inList))
25
+ return False # Not a list
26
+
27
+ class YoloDetector:
28
+ def __init__(
29
+ self,
30
+ config_name,
31
+ min_face=10,
32
+ target_size=None,
33
+ device='cuda',
34
+ ):
35
+ """
36
+ config_name: name of .yaml config with network configuration from models/ folder.
37
+ min_face : minimal face size in pixels.
38
+ target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080.
39
+ None for original resolution.
40
+ """
41
+ self._class_path = Path(__file__).parent.absolute()
42
+ self.target_size = target_size
43
+ self.min_face = min_face
44
+ self.detector = Model(cfg=config_name)
45
+ self.device = device
46
+
47
+
48
+ def _preprocess(self, imgs):
49
+ """
50
+ Preprocessing image before passing through the network. Resize and conversion to torch tensor.
51
+ """
52
+ pp_imgs = []
53
+ for img in imgs:
54
+ h0, w0 = img.shape[:2] # orig hw
55
+ if self.target_size:
56
+ r = self.target_size / min(h0, w0) # resize image to img_size
57
+ if r < 1:
58
+ img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR)
59
+
60
+ imgsz = check_img_size(max(img.shape[:2]), s=self.detector.stride.max()) # check img_size
61
+ img = letterbox(img, new_shape=imgsz)[0]
62
+ pp_imgs.append(img)
63
+ pp_imgs = np.array(pp_imgs)
64
+ pp_imgs = pp_imgs.transpose(0, 3, 1, 2)
65
+ pp_imgs = torch.from_numpy(pp_imgs).to(self.device)
66
+ pp_imgs = pp_imgs.float() # uint8 to fp16/32
67
+ return pp_imgs / 255.0 # 0 - 255 to 0.0 - 1.0
68
+
69
+ def _postprocess(self, imgs, origimgs, pred, conf_thres, iou_thres):
70
+ """
71
+ Postprocessing of raw pytorch model output.
72
+ Returns:
73
+ bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
74
+ points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
75
+ """
76
+ bboxes = [[] for _ in range(len(origimgs))]
77
+ landmarks = [[] for _ in range(len(origimgs))]
78
+
79
+ pred = non_max_suppression_face(pred, conf_thres, iou_thres)
80
+
81
+ for image_id, origimg in enumerate(origimgs):
82
+ img_shape = origimg.shape
83
+ image_height, image_width = img_shape[:2]
84
+ gn = torch.tensor(img_shape)[[1, 0, 1, 0]] # normalization gain whwh
85
+ gn_lks = torch.tensor(img_shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] # normalization gain landmarks
86
+ det = pred[image_id].cpu()
87
+ scale_coords(imgs[image_id].shape[1:], det[:, :4], img_shape).round()
88
+ scale_coords_landmarks(imgs[image_id].shape[1:], det[:, 5:15], img_shape).round()
89
+
90
+ for j in range(det.size()[0]):
91
+ box = (det[j, :4].view(1, 4) / gn).view(-1).tolist()
92
+ box = list(
93
+ map(int, [box[0] * image_width, box[1] * image_height, box[2] * image_width, box[3] * image_height])
94
+ )
95
+ if box[3] - box[1] < self.min_face:
96
+ continue
97
+ lm = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
98
+ lm = list(map(int, [i * image_width if j % 2 == 0 else i * image_height for j, i in enumerate(lm)]))
99
+ lm = [lm[i : i + 2] for i in range(0, len(lm), 2)]
100
+ bboxes[image_id].append(box)
101
+ landmarks[image_id].append(lm)
102
+ return bboxes, landmarks
103
+
104
+ def detect_faces(self, imgs, conf_thres=0.7, iou_thres=0.5):
105
+ """
106
+ Get bbox coordinates and keypoints of faces on original image.
107
+ Params:
108
+ imgs: image or list of images to detect faces on with BGR order (convert to RGB order for inference)
109
+ conf_thres: confidence threshold for each prediction
110
+ iou_thres: threshold for NMS (filter of intersecting bboxes)
111
+ Returns:
112
+ bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
113
+ points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
114
+ """
115
+ # Pass input images through face detector
116
+ images = imgs if isinstance(imgs, list) else [imgs]
117
+ images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
118
+ origimgs = copy.deepcopy(images)
119
+
120
+ images = self._preprocess(images)
121
+
122
+ if IS_HIGH_VERSION:
123
+ with torch.inference_mode(): # for pytorch>=1.9
124
+ pred = self.detector(images)[0]
125
+ else:
126
+ with torch.no_grad(): # for pytorch<1.9
127
+ pred = self.detector(images)[0]
128
+
129
+ bboxes, points = self._postprocess(images, origimgs, pred, conf_thres, iou_thres)
130
+
131
+ # return bboxes, points
132
+ if not isListempty(points):
133
+ bboxes = np.array(bboxes).reshape(-1,4)
134
+ points = np.array(points).reshape(-1,10)
135
+ padding = bboxes[:,0].reshape(-1,1)
136
+ return np.concatenate((bboxes, padding, points), axis=1)
137
+ else:
138
+ return None
139
+
140
+ def __call__(self, *args):
141
+ return self.predict(*args)
r_facelib/detection/yolov5face/models/__init__.py ADDED
File without changes
r_facelib/detection/yolov5face/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (296 Bytes). View file
 
r_facelib/detection/yolov5face/models/__pycache__/common.cpython-311.pyc ADDED
Binary file (25.3 kB). View file
 
r_facelib/detection/yolov5face/models/__pycache__/experimental.cpython-311.pyc ADDED
Binary file (4.83 kB). View file
 
r_facelib/detection/yolov5face/models/__pycache__/yolo.cpython-311.pyc ADDED
Binary file (19.8 kB). View file
 
r_facelib/detection/yolov5face/models/common.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file contains modules common to various models
2
+
3
+ import math
4
+
5
+ import numpy as np
6
+ import torch
7
+ from torch import nn
8
+
9
+ from r_facelib.detection.yolov5face.utils.datasets import letterbox
10
+ from r_facelib.detection.yolov5face.utils.general import (
11
+ make_divisible,
12
+ non_max_suppression,
13
+ scale_coords,
14
+ xyxy2xywh,
15
+ )
16
+
17
+
18
+ def autopad(k, p=None): # kernel, padding
19
+ # Pad to 'same'
20
+ if p is None:
21
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
22
+ return p
23
+
24
+
25
+ def channel_shuffle(x, groups):
26
+ batchsize, num_channels, height, width = x.data.size()
27
+ channels_per_group = torch.div(num_channels, groups, rounding_mode="trunc")
28
+
29
+ # reshape
30
+ x = x.view(batchsize, groups, channels_per_group, height, width)
31
+ x = torch.transpose(x, 1, 2).contiguous()
32
+
33
+ # flatten
34
+ return x.view(batchsize, -1, height, width)
35
+
36
+
37
+ def DWConv(c1, c2, k=1, s=1, act=True):
38
+ # Depthwise convolution
39
+ return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
40
+
41
+
42
+ class Conv(nn.Module):
43
+ # Standard convolution
44
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
45
+ super().__init__()
46
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
47
+ self.bn = nn.BatchNorm2d(c2)
48
+ self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
49
+
50
+ def forward(self, x):
51
+ return self.act(self.bn(self.conv(x)))
52
+
53
+ def fuseforward(self, x):
54
+ return self.act(self.conv(x))
55
+
56
+
57
+ class StemBlock(nn.Module):
58
+ def __init__(self, c1, c2, k=3, s=2, p=None, g=1, act=True):
59
+ super().__init__()
60
+ self.stem_1 = Conv(c1, c2, k, s, p, g, act)
61
+ self.stem_2a = Conv(c2, c2 // 2, 1, 1, 0)
62
+ self.stem_2b = Conv(c2 // 2, c2, 3, 2, 1)
63
+ self.stem_2p = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
64
+ self.stem_3 = Conv(c2 * 2, c2, 1, 1, 0)
65
+
66
+ def forward(self, x):
67
+ stem_1_out = self.stem_1(x)
68
+ stem_2a_out = self.stem_2a(stem_1_out)
69
+ stem_2b_out = self.stem_2b(stem_2a_out)
70
+ stem_2p_out = self.stem_2p(stem_1_out)
71
+ return self.stem_3(torch.cat((stem_2b_out, stem_2p_out), 1))
72
+
73
+
74
+ class Bottleneck(nn.Module):
75
+ # Standard bottleneck
76
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
77
+ super().__init__()
78
+ c_ = int(c2 * e) # hidden channels
79
+ self.cv1 = Conv(c1, c_, 1, 1)
80
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
81
+ self.add = shortcut and c1 == c2
82
+
83
+ def forward(self, x):
84
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
85
+
86
+
87
+ class BottleneckCSP(nn.Module):
88
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
89
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
90
+ super().__init__()
91
+ c_ = int(c2 * e) # hidden channels
92
+ self.cv1 = Conv(c1, c_, 1, 1)
93
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
94
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
95
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
96
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
97
+ self.act = nn.LeakyReLU(0.1, inplace=True)
98
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
99
+
100
+ def forward(self, x):
101
+ y1 = self.cv3(self.m(self.cv1(x)))
102
+ y2 = self.cv2(x)
103
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
104
+
105
+
106
+ class C3(nn.Module):
107
+ # CSP Bottleneck with 3 convolutions
108
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
109
+ super().__init__()
110
+ c_ = int(c2 * e) # hidden channels
111
+ self.cv1 = Conv(c1, c_, 1, 1)
112
+ self.cv2 = Conv(c1, c_, 1, 1)
113
+ self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
114
+ self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
115
+
116
+ def forward(self, x):
117
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
118
+
119
+
120
+ class ShuffleV2Block(nn.Module):
121
+ def __init__(self, inp, oup, stride):
122
+ super().__init__()
123
+
124
+ if not 1 <= stride <= 3:
125
+ raise ValueError("illegal stride value")
126
+ self.stride = stride
127
+
128
+ branch_features = oup // 2
129
+
130
+ if self.stride > 1:
131
+ self.branch1 = nn.Sequential(
132
+ self.depthwise_conv(inp, inp, kernel_size=3, stride=self.stride, padding=1),
133
+ nn.BatchNorm2d(inp),
134
+ nn.Conv2d(inp, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
135
+ nn.BatchNorm2d(branch_features),
136
+ nn.SiLU(),
137
+ )
138
+ else:
139
+ self.branch1 = nn.Sequential()
140
+
141
+ self.branch2 = nn.Sequential(
142
+ nn.Conv2d(
143
+ inp if (self.stride > 1) else branch_features,
144
+ branch_features,
145
+ kernel_size=1,
146
+ stride=1,
147
+ padding=0,
148
+ bias=False,
149
+ ),
150
+ nn.BatchNorm2d(branch_features),
151
+ nn.SiLU(),
152
+ self.depthwise_conv(branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1),
153
+ nn.BatchNorm2d(branch_features),
154
+ nn.Conv2d(branch_features, branch_features, kernel_size=1, stride=1, padding=0, bias=False),
155
+ nn.BatchNorm2d(branch_features),
156
+ nn.SiLU(),
157
+ )
158
+
159
+ @staticmethod
160
+ def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):
161
+ return nn.Conv2d(i, o, kernel_size, stride, padding, bias=bias, groups=i)
162
+
163
+ def forward(self, x):
164
+ if self.stride == 1:
165
+ x1, x2 = x.chunk(2, dim=1)
166
+ out = torch.cat((x1, self.branch2(x2)), dim=1)
167
+ else:
168
+ out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
169
+ out = channel_shuffle(out, 2)
170
+ return out
171
+
172
+
173
+ class SPP(nn.Module):
174
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
175
+ def __init__(self, c1, c2, k=(5, 9, 13)):
176
+ super().__init__()
177
+ c_ = c1 // 2 # hidden channels
178
+ self.cv1 = Conv(c1, c_, 1, 1)
179
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
180
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
181
+
182
+ def forward(self, x):
183
+ x = self.cv1(x)
184
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
185
+
186
+
187
+ class Focus(nn.Module):
188
+ # Focus wh information into c-space
189
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
190
+ super().__init__()
191
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
192
+
193
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
194
+ return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
195
+
196
+
197
+ class Concat(nn.Module):
198
+ # Concatenate a list of tensors along dimension
199
+ def __init__(self, dimension=1):
200
+ super().__init__()
201
+ self.d = dimension
202
+
203
+ def forward(self, x):
204
+ return torch.cat(x, self.d)
205
+
206
+
207
+ class NMS(nn.Module):
208
+ # Non-Maximum Suppression (NMS) module
209
+ conf = 0.25 # confidence threshold
210
+ iou = 0.45 # IoU threshold
211
+ classes = None # (optional list) filter by class
212
+
213
+ def forward(self, x):
214
+ return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
215
+
216
+
217
+ class AutoShape(nn.Module):
218
+ # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
219
+ img_size = 640 # inference size (pixels)
220
+ conf = 0.25 # NMS confidence threshold
221
+ iou = 0.45 # NMS IoU threshold
222
+ classes = None # (optional list) filter by class
223
+
224
+ def __init__(self, model):
225
+ super().__init__()
226
+ self.model = model.eval()
227
+
228
+ def autoshape(self):
229
+ print("autoShape already enabled, skipping... ") # model already converted to model.autoshape()
230
+ return self
231
+
232
+ def forward(self, imgs, size=640, augment=False, profile=False):
233
+ # Inference from various sources. For height=720, width=1280, RGB images example inputs are:
234
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3)
235
+ # PIL: = Image.open('image.jpg') # HWC x(720,1280,3)
236
+ # numpy: = np.zeros((720,1280,3)) # HWC
237
+ # torch: = torch.zeros(16,3,720,1280) # BCHW
238
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
239
+
240
+ p = next(self.model.parameters()) # for device and type
241
+ if isinstance(imgs, torch.Tensor): # torch
242
+ return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
243
+
244
+ # Pre-process
245
+ n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
246
+ shape0, shape1 = [], [] # image and inference shapes
247
+ for i, im in enumerate(imgs):
248
+ im = np.array(im) # to numpy
249
+ if im.shape[0] < 5: # image in CHW
250
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
251
+ im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
252
+ s = im.shape[:2] # HWC
253
+ shape0.append(s) # image shape
254
+ g = size / max(s) # gain
255
+ shape1.append([y * g for y in s])
256
+ imgs[i] = im # update
257
+ shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
258
+ x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
259
+ x = np.stack(x, 0) if n > 1 else x[0][None] # stack
260
+ x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
261
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0 # uint8 to fp16/32
262
+
263
+ # Inference
264
+ with torch.no_grad():
265
+ y = self.model(x, augment, profile)[0] # forward
266
+ y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
267
+
268
+ # Post-process
269
+ for i in range(n):
270
+ scale_coords(shape1, y[i][:, :4], shape0[i])
271
+
272
+ return Detections(imgs, y, self.names)
273
+
274
+
275
+ class Detections:
276
+ # detections class for YOLOv5 inference results
277
+ def __init__(self, imgs, pred, names=None):
278
+ super().__init__()
279
+ d = pred[0].device # device
280
+ gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1.0, 1.0], device=d) for im in imgs] # normalizations
281
+ self.imgs = imgs # list of images as numpy arrays
282
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
283
+ self.names = names # class names
284
+ self.xyxy = pred # xyxy pixels
285
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
286
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
287
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
288
+ self.n = len(self.pred)
289
+
290
+ def __len__(self):
291
+ return self.n
292
+
293
+ def tolist(self):
294
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
295
+ x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)]
296
+ for d in x:
297
+ for k in ["imgs", "pred", "xyxy", "xyxyn", "xywh", "xywhn"]:
298
+ setattr(d, k, getattr(d, k)[0]) # pop out of list
299
+ return x
r_facelib/detection/yolov5face/models/experimental.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # This file contains experimental modules
2
+
3
+ import numpy as np
4
+ import torch
5
+ from torch import nn
6
+
7
+ from r_facelib.detection.yolov5face.models.common import Conv
8
+
9
+
10
+ class CrossConv(nn.Module):
11
+ # Cross Convolution Downsample
12
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
13
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
14
+ super().__init__()
15
+ c_ = int(c2 * e) # hidden channels
16
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
17
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
18
+ self.add = shortcut and c1 == c2
19
+
20
+ def forward(self, x):
21
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
22
+
23
+
24
+ class MixConv2d(nn.Module):
25
+ # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
26
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
27
+ super().__init__()
28
+ groups = len(k)
29
+ if equal_ch: # equal c_ per group
30
+ i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices
31
+ c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
32
+ else: # equal weight.numel() per group
33
+ b = [c2] + [0] * groups
34
+ a = np.eye(groups + 1, groups, k=-1)
35
+ a -= np.roll(a, 1, axis=1)
36
+ a *= np.array(k) ** 2
37
+ a[0] = 1
38
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
39
+
40
+ self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
41
+ self.bn = nn.BatchNorm2d(c2)
42
+ self.act = nn.LeakyReLU(0.1, inplace=True)
43
+
44
+ def forward(self, x):
45
+ return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
r_facelib/detection/yolov5face/models/yolo.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from copy import deepcopy
3
+ from pathlib import Path
4
+
5
+ import torch
6
+ import yaml # for torch hub
7
+ from torch import nn
8
+
9
+ from r_facelib.detection.yolov5face.models.common import (
10
+ C3,
11
+ NMS,
12
+ SPP,
13
+ AutoShape,
14
+ Bottleneck,
15
+ BottleneckCSP,
16
+ Concat,
17
+ Conv,
18
+ DWConv,
19
+ Focus,
20
+ ShuffleV2Block,
21
+ StemBlock,
22
+ )
23
+ from r_facelib.detection.yolov5face.models.experimental import CrossConv, MixConv2d
24
+ from r_facelib.detection.yolov5face.utils.autoanchor import check_anchor_order
25
+ from r_facelib.detection.yolov5face.utils.general import make_divisible
26
+ from r_facelib.detection.yolov5face.utils.torch_utils import copy_attr, fuse_conv_and_bn
27
+
28
+
29
+ class Detect(nn.Module):
30
+ stride = None # strides computed during build
31
+ export = False # onnx export
32
+
33
+ def __init__(self, nc=80, anchors=(), ch=()): # detection layer
34
+ super().__init__()
35
+ self.nc = nc # number of classes
36
+ self.no = nc + 5 + 10 # number of outputs per anchor
37
+
38
+ self.nl = len(anchors) # number of detection layers
39
+ self.na = len(anchors[0]) // 2 # number of anchors
40
+ self.grid = [torch.zeros(1)] * self.nl # init grid
41
+ a = torch.tensor(anchors).float().view(self.nl, -1, 2)
42
+ self.register_buffer("anchors", a) # shape(nl,na,2)
43
+ self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
44
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
45
+
46
+ def forward(self, x):
47
+ z = [] # inference output
48
+ if self.export:
49
+ for i in range(self.nl):
50
+ x[i] = self.m[i](x[i])
51
+ return x
52
+ for i in range(self.nl):
53
+ x[i] = self.m[i](x[i]) # conv
54
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
55
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
56
+
57
+ if not self.training: # inference
58
+ if self.grid[i].shape[2:4] != x[i].shape[2:4]:
59
+ self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
60
+
61
+ y = torch.full_like(x[i], 0)
62
+ y[..., [0, 1, 2, 3, 4, 15]] = x[i][..., [0, 1, 2, 3, 4, 15]].sigmoid()
63
+ y[..., 5:15] = x[i][..., 5:15]
64
+
65
+ y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
66
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
67
+
68
+ y[..., 5:7] = (
69
+ y[..., 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
70
+ ) # landmark x1 y1
71
+ y[..., 7:9] = (
72
+ y[..., 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
73
+ ) # landmark x2 y2
74
+ y[..., 9:11] = (
75
+ y[..., 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
76
+ ) # landmark x3 y3
77
+ y[..., 11:13] = (
78
+ y[..., 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
79
+ ) # landmark x4 y4
80
+ y[..., 13:15] = (
81
+ y[..., 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[i]
82
+ ) # landmark x5 y5
83
+
84
+ z.append(y.view(bs, -1, self.no))
85
+
86
+ return x if self.training else (torch.cat(z, 1), x)
87
+
88
+ @staticmethod
89
+ def _make_grid(nx=20, ny=20):
90
+ # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij") # for pytorch>=1.10
91
+ yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
92
+ return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
93
+
94
+
95
+ class Model(nn.Module):
96
+ def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None): # model, input channels, number of classes
97
+ super().__init__()
98
+ self.yaml_file = Path(cfg).name
99
+ with Path(cfg).open(encoding="utf8") as f:
100
+ self.yaml = yaml.safe_load(f) # model dict
101
+
102
+ # Define model
103
+ ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
104
+ if nc and nc != self.yaml["nc"]:
105
+ self.yaml["nc"] = nc # override yaml value
106
+
107
+ self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
108
+ self.names = [str(i) for i in range(self.yaml["nc"])] # default names
109
+
110
+ # Build strides, anchors
111
+ m = self.model[-1] # Detect()
112
+ if isinstance(m, Detect):
113
+ s = 128 # 2x min stride
114
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
115
+ m.anchors /= m.stride.view(-1, 1, 1)
116
+ check_anchor_order(m)
117
+ self.stride = m.stride
118
+ self._initialize_biases() # only run once
119
+
120
+ def forward(self, x):
121
+ return self.forward_once(x) # single-scale inference, train
122
+
123
+ def forward_once(self, x):
124
+ y = [] # outputs
125
+ for m in self.model:
126
+ if m.f != -1: # if not from previous layer
127
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
128
+
129
+ x = m(x) # run
130
+ y.append(x if m.i in self.save else None) # save output
131
+
132
+ return x
133
+
134
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
135
+ # https://arxiv.org/abs/1708.02002 section 3.3
136
+ m = self.model[-1] # Detect() module
137
+ for mi, s in zip(m.m, m.stride): # from
138
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
139
+ b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
140
+ b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
141
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
142
+
143
+ def _print_biases(self):
144
+ m = self.model[-1] # Detect() module
145
+ for mi in m.m: # from
146
+ b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
147
+ print(("%6g Conv2d.bias:" + "%10.3g" * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
148
+
149
+ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
150
+ print("Fusing layers... ")
151
+ for m in self.model.modules():
152
+ if isinstance(m, Conv) and hasattr(m, "bn"):
153
+ m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
154
+ delattr(m, "bn") # remove batchnorm
155
+ m.forward = m.fuseforward # update forward
156
+ elif type(m) is nn.Upsample:
157
+ m.recompute_scale_factor = None # torch 1.11.0 compatibility
158
+ return self
159
+
160
+ def nms(self, mode=True): # add or remove NMS module
161
+ present = isinstance(self.model[-1], NMS) # last layer is NMS
162
+ if mode and not present:
163
+ print("Adding NMS... ")
164
+ m = NMS() # module
165
+ m.f = -1 # from
166
+ m.i = self.model[-1].i + 1 # index
167
+ self.model.add_module(name=str(m.i), module=m) # add
168
+ self.eval()
169
+ elif not mode and present:
170
+ print("Removing NMS... ")
171
+ self.model = self.model[:-1] # remove
172
+ return self
173
+
174
+ def autoshape(self): # add autoShape module
175
+ print("Adding autoShape... ")
176
+ m = AutoShape(self) # wrap model
177
+ copy_attr(m, self, include=("yaml", "nc", "hyp", "names", "stride"), exclude=()) # copy attributes
178
+ return m
179
+
180
+
181
+ def parse_model(d, ch): # model_dict, input_channels(3)
182
+ anchors, nc, gd, gw = d["anchors"], d["nc"], d["depth_multiple"], d["width_multiple"]
183
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
184
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
185
+
186
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
187
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
188
+ m = eval(m) if isinstance(m, str) else m # eval strings
189
+ for j, a in enumerate(args):
190
+ try:
191
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
192
+ except:
193
+ pass
194
+
195
+ n = max(round(n * gd), 1) if n > 1 else n # depth gain
196
+ if m in [
197
+ Conv,
198
+ Bottleneck,
199
+ SPP,
200
+ DWConv,
201
+ MixConv2d,
202
+ Focus,
203
+ CrossConv,
204
+ BottleneckCSP,
205
+ C3,
206
+ ShuffleV2Block,
207
+ StemBlock,
208
+ ]:
209
+ c1, c2 = ch[f], args[0]
210
+
211
+ c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
212
+
213
+ args = [c1, c2, *args[1:]]
214
+ if m in [BottleneckCSP, C3]:
215
+ args.insert(2, n)
216
+ n = 1
217
+ elif m is nn.BatchNorm2d:
218
+ args = [ch[f]]
219
+ elif m is Concat:
220
+ c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
221
+ elif m is Detect:
222
+ args.append([ch[x + 1] for x in f])
223
+ if isinstance(args[1], int): # number of anchors
224
+ args[1] = [list(range(args[1] * 2))] * len(f)
225
+ else:
226
+ c2 = ch[f]
227
+
228
+ m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
229
+ t = str(m)[8:-2].replace("__main__.", "") # module type
230
+ np = sum(x.numel() for x in m_.parameters()) # number params
231
+ m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
232
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
233
+ layers.append(m_)
234
+ ch.append(c2)
235
+ return nn.Sequential(*layers), sorted(save)
r_facelib/detection/yolov5face/models/yolov5l.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parameters
2
+ nc: 1 # number of classes
3
+ depth_multiple: 1.0 # model depth multiple
4
+ width_multiple: 1.0 # layer channel multiple
5
+
6
+ # anchors
7
+ anchors:
8
+ - [4,5, 8,10, 13,16] # P3/8
9
+ - [23,29, 43,55, 73,105] # P4/16
10
+ - [146,217, 231,300, 335,433] # P5/32
11
+
12
+ # YOLOv5 backbone
13
+ backbone:
14
+ # [from, number, module, args]
15
+ [[-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
16
+ [-1, 3, C3, [128]],
17
+ [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
18
+ [-1, 9, C3, [256]],
19
+ [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
20
+ [-1, 9, C3, [512]],
21
+ [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
22
+ [-1, 1, SPP, [1024, [3,5,7]]],
23
+ [-1, 3, C3, [1024, False]], # 8
24
+ ]
25
+
26
+ # YOLOv5 head
27
+ head:
28
+ [[-1, 1, Conv, [512, 1, 1]],
29
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
30
+ [[-1, 5], 1, Concat, [1]], # cat backbone P4
31
+ [-1, 3, C3, [512, False]], # 12
32
+
33
+ [-1, 1, Conv, [256, 1, 1]],
34
+ [-1, 1, nn.Upsample, [None, 2, 'nearest']],
35
+ [[-1, 3], 1, Concat, [1]], # cat backbone P3
36
+ [-1, 3, C3, [256, False]], # 16 (P3/8-small)
37
+
38
+ [-1, 1, Conv, [256, 3, 2]],
39
+ [[-1, 13], 1, Concat, [1]], # cat head P4
40
+ [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
41
+
42
+ [-1, 1, Conv, [512, 3, 2]],
43
+ [[-1, 9], 1, Concat, [1]], # cat head P5
44
+ [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
45
+
46
+ [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
47
+ ]