victorgg commited on
Commit
742d952
·
verified ·
1 Parent(s): 3b2a305

Upload folder using huggingface_hub

Browse files
Image.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import cv2
3
+ import numpy as np
4
+
5
+ emap = np.load("emap.npy")
6
+ input_std = 255.0
7
+ input_mean = 0.0
8
+
9
+ def postprocess_face(face_tensor):
10
+ face_tensor = face_tensor.squeeze().cpu().detach()
11
+ face_np = (face_tensor.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
12
+ face_np = cv2.cvtColor(face_np, cv2.COLOR_RGB2BGR)
13
+
14
+ return face_np
15
+
16
+ def getBlob(aimg, input_size = (128, 128)):
17
+ blob = cv2.dnn.blobFromImage(aimg, 1.0 / input_std, input_size,
18
+ (input_mean, input_mean, input_mean), swapRB=True)
19
+ return blob
20
+
21
+ def getLatent(source_face):
22
+ latent = source_face.normed_embedding.reshape((1,-1))
23
+ latent = np.dot(latent, emap)
24
+ latent /= np.linalg.norm(latent)
25
+
26
+ return latent
27
+
28
+ def blend_swapped_image(swapped_face, target_image, M):
29
+ # get image size
30
+ h, w = target_image.shape[:2]
31
+
32
+ # create inverse affine transform
33
+ M_inv = cv2.invertAffineTransform(M)
34
+
35
+ # warp swapped face back to target space
36
+ warped_face = cv2.warpAffine(
37
+ swapped_face,
38
+ M_inv,
39
+ (w, h),
40
+ borderValue=0.0
41
+ )
42
+
43
+ # create initial white mask
44
+ img_white = np.full(
45
+ (swapped_face.shape[0], swapped_face.shape[1]),
46
+ 255,
47
+ dtype=np.float32
48
+ )
49
+
50
+ # warp white mask to target space
51
+ img_mask = cv2.warpAffine(
52
+ img_white,
53
+ M_inv,
54
+ (w, h),
55
+ borderValue=0.0
56
+ )
57
+
58
+ # threshold and refine mask
59
+ img_mask[img_mask > 20] = 255
60
+
61
+ # calculate mask size for kernel scaling
62
+ mask_h_inds, mask_w_inds = np.where(img_mask == 255)
63
+ if len(mask_h_inds) > 0 and len(mask_w_inds) > 0: # safety check
64
+ mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
65
+ mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
66
+ mask_size = int(np.sqrt(mask_h * mask_w))
67
+
68
+ # erode mask
69
+ k = max(mask_size // 10, 10)
70
+ kernel = np.ones((k, k), np.uint8)
71
+ img_mask = cv2.erode(img_mask, kernel, iterations=1)
72
+
73
+ # blur mask
74
+ k = max(mask_size // 20, 5)
75
+ kernel_size = (k, k)
76
+ blur_size = tuple(2 * i + 1 for i in kernel_size)
77
+ img_mask = cv2.GaussianBlur(img_mask, blur_size, 0)
78
+
79
+ # normalize mask
80
+ img_mask = img_mask / 255.0
81
+ img_mask = np.reshape(img_mask, [img_mask.shape[0], img_mask.shape[1], 1])
82
+
83
+ # blend images using mask
84
+ result = img_mask * warped_face + (1 - img_mask) * target_image.astype(np.float32)
85
+ result = result.astype(np.uint8)
86
+
87
+ return result
88
+
89
+ def drawKeypoints(image, keypoints, colorBGR, keypointsRadius=2):
90
+ for kp in keypoints:
91
+ x, y = int(kp[0]), int(kp[1])
92
+ cv2.circle(image, (x, y), radius=keypointsRadius, color=colorBGR, thickness=-1) # BGR format, -1 means filled circle
LICENSE ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU AFFERO GENERAL PUBLIC LICENSE
2
+ Version 3, 19 November 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU Affero General Public License is a free, copyleft license for
11
+ software and other kinds of works, specifically designed to ensure
12
+ cooperation with the community in the case of network server software.
13
+
14
+ The licenses for most software and other practical works are designed
15
+ to take away your freedom to share and change the works. By contrast,
16
+ our General Public Licenses are intended to guarantee your freedom to
17
+ share and change all versions of a program--to make sure it remains free
18
+ software for all its users.
19
+
20
+ When we speak of free software, we are referring to freedom, not
21
+ price. Our General Public Licenses are designed to make sure that you
22
+ have the freedom to distribute copies of free software (and charge for
23
+ them if you wish), that you receive source code or can get it if you
24
+ want it, that you can change the software or use pieces of it in new
25
+ free programs, and that you know you can do these things.
26
+
27
+ Developers that use our General Public Licenses protect your rights
28
+ with two steps: (1) assert copyright on the software, and (2) offer
29
+ you this License which gives you legal permission to copy, distribute
30
+ and/or modify the software.
31
+
32
+ A secondary benefit of defending all users' freedom is that
33
+ improvements made in alternate versions of the program, if they
34
+ receive widespread use, become available for other developers to
35
+ incorporate. Many developers of free software are heartened and
36
+ encouraged by the resulting cooperation. However, in the case of
37
+ software used on network servers, this result may fail to come about.
38
+ The GNU General Public License permits making a modified version and
39
+ letting the public access it on a server without ever releasing its
40
+ source code to the public.
41
+
42
+ The GNU Affero General Public License is designed specifically to
43
+ ensure that, in such cases, the modified source code becomes available
44
+ to the community. It requires the operator of a network server to
45
+ provide the source code of the modified version running there to the
46
+ users of that server. Therefore, public use of a modified version, on
47
+ a publicly accessible server, gives the public access to the source
48
+ code of the modified version.
49
+
50
+ An older license, called the Affero General Public License and
51
+ published by Affero, was designed to accomplish similar goals. This is
52
+ a different license, not a version of the Affero GPL, but Affero has
53
+ released a new version of the Affero GPL which permits relicensing under
54
+ this license.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ TERMS AND CONDITIONS
60
+
61
+ 0. Definitions.
62
+
63
+ "This License" refers to version 3 of the GNU Affero General Public License.
64
+
65
+ "Copyright" also means copyright-like laws that apply to other kinds of
66
+ works, such as semiconductor masks.
67
+
68
+ "The Program" refers to any copyrightable work licensed under this
69
+ License. Each licensee is addressed as "you". "Licensees" and
70
+ "recipients" may be individuals or organizations.
71
+
72
+ To "modify" a work means to copy from or adapt all or part of the work
73
+ in a fashion requiring copyright permission, other than the making of an
74
+ exact copy. The resulting work is called a "modified version" of the
75
+ earlier work or a work "based on" the earlier work.
76
+
77
+ A "covered work" means either the unmodified Program or a work based
78
+ on the Program.
79
+
80
+ To "propagate" a work means to do anything with it that, without
81
+ permission, would make you directly or secondarily liable for
82
+ infringement under applicable copyright law, except executing it on a
83
+ computer or modifying a private copy. Propagation includes copying,
84
+ distribution (with or without modification), making available to the
85
+ public, and in some countries other activities as well.
86
+
87
+ To "convey" a work means any kind of propagation that enables other
88
+ parties to make or receive copies. Mere interaction with a user through
89
+ a computer network, with no transfer of a copy, is not conveying.
90
+
91
+ An interactive user interface displays "Appropriate Legal Notices"
92
+ to the extent that it includes a convenient and prominently visible
93
+ feature that (1) displays an appropriate copyright notice, and (2)
94
+ tells the user that there is no warranty for the work (except to the
95
+ extent that warranties are provided), that licensees may convey the
96
+ work under this License, and how to view a copy of this License. If
97
+ the interface presents a list of user commands or options, such as a
98
+ menu, a prominent item in the list meets this criterion.
99
+
100
+ 1. Source Code.
101
+
102
+ The "source code" for a work means the preferred form of the work
103
+ for making modifications to it. "Object code" means any non-source
104
+ form of a work.
105
+
106
+ A "Standard Interface" means an interface that either is an official
107
+ standard defined by a recognized standards body, or, in the case of
108
+ interfaces specified for a particular programming language, one that
109
+ is widely used among developers working in that language.
110
+
111
+ The "System Libraries" of an executable work include anything, other
112
+ than the work as a whole, that (a) is included in the normal form of
113
+ packaging a Major Component, but which is not part of that Major
114
+ Component, and (b) serves only to enable use of the work with that
115
+ Major Component, or to implement a Standard Interface for which an
116
+ implementation is available to the public in source code form. A
117
+ "Major Component", in this context, means a major essential component
118
+ (kernel, window system, and so on) of the specific operating system
119
+ (if any) on which the executable work runs, or a compiler used to
120
+ produce the work, or an object code interpreter used to run it.
121
+
122
+ The "Corresponding Source" for a work in object code form means all
123
+ the source code needed to generate, install, and (for an executable
124
+ work) run the object code and to modify the work, including scripts to
125
+ control those activities. However, it does not include the work's
126
+ System Libraries, or general-purpose tools or generally available free
127
+ programs which are used unmodified in performing those activities but
128
+ which are not part of the work. For example, Corresponding Source
129
+ includes interface definition files associated with source files for
130
+ the work, and the source code for shared libraries and dynamically
131
+ linked subprograms that the work is specifically designed to require,
132
+ such as by intimate data communication or control flow between those
133
+ subprograms and other parts of the work.
134
+
135
+ The Corresponding Source need not include anything that users
136
+ can regenerate automatically from other parts of the Corresponding
137
+ Source.
138
+
139
+ The Corresponding Source for a work in source code form is that
140
+ same work.
141
+
142
+ 2. Basic Permissions.
143
+
144
+ All rights granted under this License are granted for the term of
145
+ copyright on the Program, and are irrevocable provided the stated
146
+ conditions are met. This License explicitly affirms your unlimited
147
+ permission to run the unmodified Program. The output from running a
148
+ covered work is covered by this License only if the output, given its
149
+ content, constitutes a covered work. This License acknowledges your
150
+ rights of fair use or other equivalent, as provided by copyright law.
151
+
152
+ You may make, run and propagate covered works that you do not
153
+ convey, without conditions so long as your license otherwise remains
154
+ in force. You may convey covered works to others for the sole purpose
155
+ of having them make modifications exclusively for you, or provide you
156
+ with facilities for running those works, provided that you comply with
157
+ the terms of this License in conveying all material for which you do
158
+ not control copyright. Those thus making or running the covered works
159
+ for you must do so exclusively on your behalf, under your direction
160
+ and control, on terms that prohibit them from making any copies of
161
+ your copyrighted material outside their relationship with you.
162
+
163
+ Conveying under any other circumstances is permitted solely under
164
+ the conditions stated below. Sublicensing is not allowed; section 10
165
+ makes it unnecessary.
166
+
167
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168
+
169
+ No covered work shall be deemed part of an effective technological
170
+ measure under any applicable law fulfilling obligations under article
171
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172
+ similar laws prohibiting or restricting circumvention of such
173
+ measures.
174
+
175
+ When you convey a covered work, you waive any legal power to forbid
176
+ circumvention of technological measures to the extent such circumvention
177
+ is effected by exercising rights under this License with respect to
178
+ the covered work, and you disclaim any intention to limit operation or
179
+ modification of the work as a means of enforcing, against the work's
180
+ users, your or third parties' legal rights to forbid circumvention of
181
+ technological measures.
182
+
183
+ 4. Conveying Verbatim Copies.
184
+
185
+ You may convey verbatim copies of the Program's source code as you
186
+ receive it, in any medium, provided that you conspicuously and
187
+ appropriately publish on each copy an appropriate copyright notice;
188
+ keep intact all notices stating that this License and any
189
+ non-permissive terms added in accord with section 7 apply to the code;
190
+ keep intact all notices of the absence of any warranty; and give all
191
+ recipients a copy of this License along with the Program.
192
+
193
+ You may charge any price or no price for each copy that you convey,
194
+ and you may offer support or warranty protection for a fee.
195
+
196
+ 5. Conveying Modified Source Versions.
197
+
198
+ You may convey a work based on the Program, or the modifications to
199
+ produce it from the Program, in the form of source code under the
200
+ terms of section 4, provided that you also meet all of these conditions:
201
+
202
+ a) The work must carry prominent notices stating that you modified
203
+ it, and giving a relevant date.
204
+
205
+ b) The work must carry prominent notices stating that it is
206
+ released under this License and any conditions added under section
207
+ 7. This requirement modifies the requirement in section 4 to
208
+ "keep intact all notices".
209
+
210
+ c) You must license the entire work, as a whole, under this
211
+ License to anyone who comes into possession of a copy. This
212
+ License will therefore apply, along with any applicable section 7
213
+ additional terms, to the whole of the work, and all its parts,
214
+ regardless of how they are packaged. This License gives no
215
+ permission to license the work in any other way, but it does not
216
+ invalidate such permission if you have separately received it.
217
+
218
+ d) If the work has interactive user interfaces, each must display
219
+ Appropriate Legal Notices; however, if the Program has interactive
220
+ interfaces that do not display Appropriate Legal Notices, your
221
+ work need not make them do so.
222
+
223
+ A compilation of a covered work with other separate and independent
224
+ works, which are not by their nature extensions of the covered work,
225
+ and which are not combined with it such as to form a larger program,
226
+ in or on a volume of a storage or distribution medium, is called an
227
+ "aggregate" if the compilation and its resulting copyright are not
228
+ used to limit the access or legal rights of the compilation's users
229
+ beyond what the individual works permit. Inclusion of a covered work
230
+ in an aggregate does not cause this License to apply to the other
231
+ parts of the aggregate.
232
+
233
+ 6. Conveying Non-Source Forms.
234
+
235
+ You may convey a covered work in object code form under the terms
236
+ of sections 4 and 5, provided that you also convey the
237
+ machine-readable Corresponding Source under the terms of this License,
238
+ in one of these ways:
239
+
240
+ a) Convey the object code in, or embodied in, a physical product
241
+ (including a physical distribution medium), accompanied by the
242
+ Corresponding Source fixed on a durable physical medium
243
+ customarily used for software interchange.
244
+
245
+ b) Convey the object code in, or embodied in, a physical product
246
+ (including a physical distribution medium), accompanied by a
247
+ written offer, valid for at least three years and valid for as
248
+ long as you offer spare parts or customer support for that product
249
+ model, to give anyone who possesses the object code either (1) a
250
+ copy of the Corresponding Source for all the software in the
251
+ product that is covered by this License, on a durable physical
252
+ medium customarily used for software interchange, for a price no
253
+ more than your reasonable cost of physically performing this
254
+ conveying of source, or (2) access to copy the
255
+ Corresponding Source from a network server at no charge.
256
+
257
+ c) Convey individual copies of the object code with a copy of the
258
+ written offer to provide the Corresponding Source. This
259
+ alternative is allowed only occasionally and noncommercially, and
260
+ only if you received the object code with such an offer, in accord
261
+ with subsection 6b.
262
+
263
+ d) Convey the object code by offering access from a designated
264
+ place (gratis or for a charge), and offer equivalent access to the
265
+ Corresponding Source in the same way through the same place at no
266
+ further charge. You need not require recipients to copy the
267
+ Corresponding Source along with the object code. If the place to
268
+ copy the object code is a network server, the Corresponding Source
269
+ may be on a different server (operated by you or a third party)
270
+ that supports equivalent copying facilities, provided you maintain
271
+ clear directions next to the object code saying where to find the
272
+ Corresponding Source. Regardless of what server hosts the
273
+ Corresponding Source, you remain obligated to ensure that it is
274
+ available for as long as needed to satisfy these requirements.
275
+
276
+ e) Convey the object code using peer-to-peer transmission, provided
277
+ you inform other peers where the object code and Corresponding
278
+ Source of the work are being offered to the general public at no
279
+ charge under subsection 6d.
280
+
281
+ A separable portion of the object code, whose source code is excluded
282
+ from the Corresponding Source as a System Library, need not be
283
+ included in conveying the object code work.
284
+
285
+ A "User Product" is either (1) a "consumer product", which means any
286
+ tangible personal property which is normally used for personal, family,
287
+ or household purposes, or (2) anything designed or sold for incorporation
288
+ into a dwelling. In determining whether a product is a consumer product,
289
+ doubtful cases shall be resolved in favor of coverage. For a particular
290
+ product received by a particular user, "normally used" refers to a
291
+ typical or common use of that class of product, regardless of the status
292
+ of the particular user or of the way in which the particular user
293
+ actually uses, or expects or is expected to use, the product. A product
294
+ is a consumer product regardless of whether the product has substantial
295
+ commercial, industrial or non-consumer uses, unless such uses represent
296
+ the only significant mode of use of the product.
297
+
298
+ "Installation Information" for a User Product means any methods,
299
+ procedures, authorization keys, or other information required to install
300
+ and execute modified versions of a covered work in that User Product from
301
+ a modified version of its Corresponding Source. The information must
302
+ suffice to ensure that the continued functioning of the modified object
303
+ code is in no case prevented or interfered with solely because
304
+ modification has been made.
305
+
306
+ If you convey an object code work under this section in, or with, or
307
+ specifically for use in, a User Product, and the conveying occurs as
308
+ part of a transaction in which the right of possession and use of the
309
+ User Product is transferred to the recipient in perpetuity or for a
310
+ fixed term (regardless of how the transaction is characterized), the
311
+ Corresponding Source conveyed under this section must be accompanied
312
+ by the Installation Information. But this requirement does not apply
313
+ if neither you nor any third party retains the ability to install
314
+ modified object code on the User Product (for example, the work has
315
+ been installed in ROM).
316
+
317
+ The requirement to provide Installation Information does not include a
318
+ requirement to continue to provide support service, warranty, or updates
319
+ for a work that has been modified or installed by the recipient, or for
320
+ the User Product in which it has been modified or installed. Access to a
321
+ network may be denied when the modification itself materially and
322
+ adversely affects the operation of the network or violates the rules and
323
+ protocols for communication across the network.
324
+
325
+ Corresponding Source conveyed, and Installation Information provided,
326
+ in accord with this section must be in a format that is publicly
327
+ documented (and with an implementation available to the public in
328
+ source code form), and must require no special password or key for
329
+ unpacking, reading or copying.
330
+
331
+ 7. Additional Terms.
332
+
333
+ "Additional permissions" are terms that supplement the terms of this
334
+ License by making exceptions from one or more of its conditions.
335
+ Additional permissions that are applicable to the entire Program shall
336
+ be treated as though they were included in this License, to the extent
337
+ that they are valid under applicable law. If additional permissions
338
+ apply only to part of the Program, that part may be used separately
339
+ under those permissions, but the entire Program remains governed by
340
+ this License without regard to the additional permissions.
341
+
342
+ When you convey a copy of a covered work, you may at your option
343
+ remove any additional permissions from that copy, or from any part of
344
+ it. (Additional permissions may be written to require their own
345
+ removal in certain cases when you modify the work.) You may place
346
+ additional permissions on material, added by you to a covered work,
347
+ for which you have or can give appropriate copyright permission.
348
+
349
+ Notwithstanding any other provision of this License, for material you
350
+ add to a covered work, you may (if authorized by the copyright holders of
351
+ that material) supplement the terms of this License with terms:
352
+
353
+ a) Disclaiming warranty or limiting liability differently from the
354
+ terms of sections 15 and 16 of this License; or
355
+
356
+ b) Requiring preservation of specified reasonable legal notices or
357
+ author attributions in that material or in the Appropriate Legal
358
+ Notices displayed by works containing it; or
359
+
360
+ c) Prohibiting misrepresentation of the origin of that material, or
361
+ requiring that modified versions of such material be marked in
362
+ reasonable ways as different from the original version; or
363
+
364
+ d) Limiting the use for publicity purposes of names of licensors or
365
+ authors of the material; or
366
+
367
+ e) Declining to grant rights under trademark law for use of some
368
+ trade names, trademarks, or service marks; or
369
+
370
+ f) Requiring indemnification of licensors and authors of that
371
+ material by anyone who conveys the material (or modified versions of
372
+ it) with contractual assumptions of liability to the recipient, for
373
+ any liability that these contractual assumptions directly impose on
374
+ those licensors and authors.
375
+
376
+ All other non-permissive additional terms are considered "further
377
+ restrictions" within the meaning of section 10. If the Program as you
378
+ received it, or any part of it, contains a notice stating that it is
379
+ governed by this License along with a term that is a further
380
+ restriction, you may remove that term. If a license document contains
381
+ a further restriction but permits relicensing or conveying under this
382
+ License, you may add to a covered work material governed by the terms
383
+ of that license document, provided that the further restriction does
384
+ not survive such relicensing or conveying.
385
+
386
+ If you add terms to a covered work in accord with this section, you
387
+ must place, in the relevant source files, a statement of the
388
+ additional terms that apply to those files, or a notice indicating
389
+ where to find the applicable terms.
390
+
391
+ Additional terms, permissive or non-permissive, may be stated in the
392
+ form of a separately written license, or stated as exceptions;
393
+ the above requirements apply either way.
394
+
395
+ 8. Termination.
396
+
397
+ You may not propagate or modify a covered work except as expressly
398
+ provided under this License. Any attempt otherwise to propagate or
399
+ modify it is void, and will automatically terminate your rights under
400
+ this License (including any patent licenses granted under the third
401
+ paragraph of section 11).
402
+
403
+ However, if you cease all violation of this License, then your
404
+ license from a particular copyright holder is reinstated (a)
405
+ provisionally, unless and until the copyright holder explicitly and
406
+ finally terminates your license, and (b) permanently, if the copyright
407
+ holder fails to notify you of the violation by some reasonable means
408
+ prior to 60 days after the cessation.
409
+
410
+ Moreover, your license from a particular copyright holder is
411
+ reinstated permanently if the copyright holder notifies you of the
412
+ violation by some reasonable means, this is the first time you have
413
+ received notice of violation of this License (for any work) from that
414
+ copyright holder, and you cure the violation prior to 30 days after
415
+ your receipt of the notice.
416
+
417
+ Termination of your rights under this section does not terminate the
418
+ licenses of parties who have received copies or rights from you under
419
+ this License. If your rights have been terminated and not permanently
420
+ reinstated, you do not qualify to receive new licenses for the same
421
+ material under section 10.
422
+
423
+ 9. Acceptance Not Required for Having Copies.
424
+
425
+ You are not required to accept this License in order to receive or
426
+ run a copy of the Program. Ancillary propagation of a covered work
427
+ occurring solely as a consequence of using peer-to-peer transmission
428
+ to receive a copy likewise does not require acceptance. However,
429
+ nothing other than this License grants you permission to propagate or
430
+ modify any covered work. These actions infringe copyright if you do
431
+ not accept this License. Therefore, by modifying or propagating a
432
+ covered work, you indicate your acceptance of this License to do so.
433
+
434
+ 10. Automatic Licensing of Downstream Recipients.
435
+
436
+ Each time you convey a covered work, the recipient automatically
437
+ receives a license from the original licensors, to run, modify and
438
+ propagate that work, subject to this License. You are not responsible
439
+ for enforcing compliance by third parties with this License.
440
+
441
+ An "entity transaction" is a transaction transferring control of an
442
+ organization, or substantially all assets of one, or subdividing an
443
+ organization, or merging organizations. If propagation of a covered
444
+ work results from an entity transaction, each party to that
445
+ transaction who receives a copy of the work also receives whatever
446
+ licenses to the work the party's predecessor in interest had or could
447
+ give under the previous paragraph, plus a right to possession of the
448
+ Corresponding Source of the work from the predecessor in interest, if
449
+ the predecessor has it or can get it with reasonable efforts.
450
+
451
+ You may not impose any further restrictions on the exercise of the
452
+ rights granted or affirmed under this License. For example, you may
453
+ not impose a license fee, royalty, or other charge for exercise of
454
+ rights granted under this License, and you may not initiate litigation
455
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
456
+ any patent claim is infringed by making, using, selling, offering for
457
+ sale, or importing the Program or any portion of it.
458
+
459
+ 11. Patents.
460
+
461
+ A "contributor" is a copyright holder who authorizes use under this
462
+ License of the Program or a work on which the Program is based. The
463
+ work thus licensed is called the contributor's "contributor version".
464
+
465
+ A contributor's "essential patent claims" are all patent claims
466
+ owned or controlled by the contributor, whether already acquired or
467
+ hereafter acquired, that would be infringed by some manner, permitted
468
+ by this License, of making, using, or selling its contributor version,
469
+ but do not include claims that would be infringed only as a
470
+ consequence of further modification of the contributor version. For
471
+ purposes of this definition, "control" includes the right to grant
472
+ patent sublicenses in a manner consistent with the requirements of
473
+ this License.
474
+
475
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
476
+ patent license under the contributor's essential patent claims, to
477
+ make, use, sell, offer for sale, import and otherwise run, modify and
478
+ propagate the contents of its contributor version.
479
+
480
+ In the following three paragraphs, a "patent license" is any express
481
+ agreement or commitment, however denominated, not to enforce a patent
482
+ (such as an express permission to practice a patent or covenant not to
483
+ sue for patent infringement). To "grant" such a patent license to a
484
+ party means to make such an agreement or commitment not to enforce a
485
+ patent against the party.
486
+
487
+ If you convey a covered work, knowingly relying on a patent license,
488
+ and the Corresponding Source of the work is not available for anyone
489
+ to copy, free of charge and under the terms of this License, through a
490
+ publicly available network server or other readily accessible means,
491
+ then you must either (1) cause the Corresponding Source to be so
492
+ available, or (2) arrange to deprive yourself of the benefit of the
493
+ patent license for this particular work, or (3) arrange, in a manner
494
+ consistent with the requirements of this License, to extend the patent
495
+ license to downstream recipients. "Knowingly relying" means you have
496
+ actual knowledge that, but for the patent license, your conveying the
497
+ covered work in a country, or your recipient's use of the covered work
498
+ in a country, would infringe one or more identifiable patents in that
499
+ country that you have reason to believe are valid.
500
+
501
+ If, pursuant to or in connection with a single transaction or
502
+ arrangement, you convey, or propagate by procuring conveyance of, a
503
+ covered work, and grant a patent license to some of the parties
504
+ receiving the covered work authorizing them to use, propagate, modify
505
+ or convey a specific copy of the covered work, then the patent license
506
+ you grant is automatically extended to all recipients of the covered
507
+ work and works based on it.
508
+
509
+ A patent license is "discriminatory" if it does not include within
510
+ the scope of its coverage, prohibits the exercise of, or is
511
+ conditioned on the non-exercise of one or more of the rights that are
512
+ specifically granted under this License. You may not convey a covered
513
+ work if you are a party to an arrangement with a third party that is
514
+ in the business of distributing software, under which you make payment
515
+ to the third party based on the extent of your activity of conveying
516
+ the work, and under which the third party grants, to any of the
517
+ parties who would receive the covered work from you, a discriminatory
518
+ patent license (a) in connection with copies of the covered work
519
+ conveyed by you (or copies made from those copies), or (b) primarily
520
+ for and in connection with specific products or compilations that
521
+ contain the covered work, unless you entered into that arrangement,
522
+ or that patent license was granted, prior to 28 March 2007.
523
+
524
+ Nothing in this License shall be construed as excluding or limiting
525
+ any implied license or other defenses to infringement that may
526
+ otherwise be available to you under applicable patent law.
527
+
528
+ 12. No Surrender of Others' Freedom.
529
+
530
+ If conditions are imposed on you (whether by court order, agreement or
531
+ otherwise) that contradict the conditions of this License, they do not
532
+ excuse you from the conditions of this License. If you cannot convey a
533
+ covered work so as to satisfy simultaneously your obligations under this
534
+ License and any other pertinent obligations, then as a consequence you may
535
+ not convey it at all. For example, if you agree to terms that obligate you
536
+ to collect a royalty for further conveying from those to whom you convey
537
+ the Program, the only way you could satisfy both those terms and this
538
+ License would be to refrain entirely from conveying the Program.
539
+
540
+ 13. Remote Network Interaction; Use with the GNU General Public License.
541
+
542
+ Notwithstanding any other provision of this License, if you modify the
543
+ Program, your modified version must prominently offer all users
544
+ interacting with it remotely through a computer network (if your version
545
+ supports such interaction) an opportunity to receive the Corresponding
546
+ Source of your version by providing access to the Corresponding Source
547
+ from a network server at no charge, through some standard or customary
548
+ means of facilitating copying of software. This Corresponding Source
549
+ shall include the Corresponding Source for any work covered by version 3
550
+ of the GNU General Public License that is incorporated pursuant to the
551
+ following paragraph.
552
+
553
+ Notwithstanding any other provision of this License, you have
554
+ permission to link or combine any covered work with a work licensed
555
+ under version 3 of the GNU General Public License into a single
556
+ combined work, and to convey the resulting work. The terms of this
557
+ License will continue to apply to the part which is the covered work,
558
+ but the work with which it is combined will remain governed by version
559
+ 3 of the GNU General Public License.
560
+
561
+ 14. Revised Versions of this License.
562
+
563
+ The Free Software Foundation may publish revised and/or new versions of
564
+ the GNU Affero General Public License from time to time. Such new versions
565
+ will be similar in spirit to the present version, but may differ in detail to
566
+ address new problems or concerns.
567
+
568
+ Each version is given a distinguishing version number. If the
569
+ Program specifies that a certain numbered version of the GNU Affero General
570
+ Public License "or any later version" applies to it, you have the
571
+ option of following the terms and conditions either of that numbered
572
+ version or of any later version published by the Free Software
573
+ Foundation. If the Program does not specify a version number of the
574
+ GNU Affero General Public License, you may choose any version ever published
575
+ by the Free Software Foundation.
576
+
577
+ If the Program specifies that a proxy can decide which future
578
+ versions of the GNU Affero General Public License can be used, that proxy's
579
+ public statement of acceptance of a version permanently authorizes you
580
+ to choose that version for the Program.
581
+
582
+ Later license versions may give you additional or different
583
+ permissions. However, no additional obligations are imposed on any
584
+ author or copyright holder as a result of your choosing to follow a
585
+ later version.
586
+
587
+ 15. Disclaimer of Warranty.
588
+
589
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597
+
598
+ 16. Limitation of Liability.
599
+
600
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608
+ SUCH DAMAGES.
609
+
610
+ 17. Interpretation of Sections 15 and 16.
611
+
612
+ If the disclaimer of warranty and limitation of liability provided
613
+ above cannot be given local legal effect according to their terms,
614
+ reviewing courts shall apply local law that most closely approximates
615
+ an absolute waiver of all civil liability in connection with the
616
+ Program, unless a warranty or assumption of liability accompanies a
617
+ copy of the Program in return for a fee.
618
+
619
+ END OF TERMS AND CONDITIONS
620
+
621
+ How to Apply These Terms to Your New Programs
622
+
623
+ If you develop a new program, and you want it to be of the greatest
624
+ possible use to the public, the best way to achieve this is to make it
625
+ free software which everyone can redistribute and change under these terms.
626
+
627
+ To do so, attach the following notices to the program. It is safest
628
+ to attach them to the start of each source file to most effectively
629
+ state the exclusion of warranty; and each file should have at least
630
+ the "copyright" line and a pointer to where the full notice is found.
631
+
632
+ <one line to give the program's name and a brief idea of what it does.>
633
+ Copyright (C) <year> <name of author>
634
+
635
+ This program is free software: you can redistribute it and/or modify
636
+ it under the terms of the GNU Affero General Public License as published
637
+ by the Free Software Foundation, either version 3 of the License, or
638
+ (at your option) any later version.
639
+
640
+ This program is distributed in the hope that it will be useful,
641
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
642
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643
+ GNU Affero General Public License for more details.
644
+
645
+ You should have received a copy of the GNU Affero General Public License
646
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
647
+
648
+ Also add information on how to contact you by electronic and paper mail.
649
+
650
+ If your software can interact with users remotely through a computer
651
+ network, you should also make sure that it provides a way for users to
652
+ get its source. For example, if your program is a web application, its
653
+ interface could display a "Source" link that leads users to an archive
654
+ of the code. There are many ways you could offer source, and different
655
+ solutions will be better for different programs; see section 13 for the
656
+ specific requirements.
657
+
658
+ You should also get your employer (if you work as a programmer) or school,
659
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
660
+ For more information on this, and how to apply and follow the GNU AGPL, see
661
+ <https://www.gnu.org/licenses/>.
ModelFormat.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import onnx
3
+ import torch
4
+
5
+ from StyleTransferModel_128 import StyleTransferModel
6
+
7
+ def save_as_onnx_model(torch_model_path, save_emap=True, img_size = 128, originalInswapperClassCompatible = True):
8
+ output_path = torch_model_path.replace(".pth", ".onnx")
9
+
10
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
+ # Initialize model with the pretrained weights
12
+ torch_model = StyleTransferModel().to(device)
13
+ torch_model.load_state_dict(torch.load(torch_model_path, map_location=device), strict=False)
14
+
15
+ # set the model to inference mode
16
+ torch_model.eval()
17
+
18
+ if originalInswapperClassCompatible:
19
+ dynamic_axes = None
20
+ else:
21
+ image_axe = {0: 'batch_size', 1: 'channels', 2: 'height', 3: 'width'}
22
+ dynamic_axes = {'target': image_axe, # variable length axes
23
+ 'source': {0: 'batch_size'},
24
+ 'output' : image_axe}
25
+
26
+ torch.onnx.export(torch_model, # model being run
27
+ {
28
+ 'target' :torch.randn(1, 3, img_size, img_size, requires_grad=True).to(device),
29
+ 'source': torch.randn(1, 512, requires_grad=True).to(device),
30
+ }, # model input (or a tuple for multiple inputs)
31
+ output_path, # where to save the model (can be a file or file-like object)
32
+ export_params=True, # store the trained parameter weights inside the model file
33
+ opset_version=11, # the ONNX version to export the model to
34
+ do_constant_folding=True, # whether to execute constant folding for optimization
35
+ input_names = ['target', "source"], # the model's input names
36
+ output_names = ['output'], # the model's output names
37
+ dynamic_axes=dynamic_axes)
38
+
39
+ model = onnx.load(output_path)
40
+
41
+ if save_emap :
42
+ emap = np.load("emap.npy")
43
+
44
+ emap_tensor = onnx.helper.make_tensor(
45
+ name='emap',
46
+ data_type=onnx.TensorProto.FLOAT,
47
+ dims=[512, 512],
48
+ vals=emap
49
+ )
50
+
51
+ model.graph.initializer.append(emap_tensor)
52
+
53
+ onnx.save(model, output_path)
README.md CHANGED
@@ -1,12 +1,130 @@
1
- ---
2
- title: Hg
3
- emoji: 👁
4
- colorFrom: pink
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.15.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ReSwapper
2
+
3
+ ReSwapper aims to reproduce the implementation of inswapper. This repository provides code for training, inference, and includes pretrained weights.
4
+
5
+ Here is the comparesion of the output of Inswapper and Reswapper.
6
+ | Target | Source | Inswapper Output | Reswapper Output<br>(256 resolution)<br>(Step 1399500) | Reswapper Output<br>(Step 1019500) | Reswapper Output<br>(Step 429500) |
7
+ |--------|--------|--------|--------|--------|--------|
8
+ | ![image](example/1/target.jpg) |![image](example/1/source.jpg) | ![image](example/1/inswapperOutput.jpg) | ![image](example/1/reswapperOutput-1399500_256.jpg) |![image](example/1/reswapperOutput-1019500.jpg) | ![image](example/1/reswapperOutput-429500.jpg) |
9
+ | ![image](example/2/target.jpg) |![image](example/2/source.jpg) | ![image](example/2/inswapperOutput.jpg) | ![image](example/2/reswapperOutput-1399500_256.jpg) | ![image](example/2/reswapperOutput-1019500.jpg) | ![image](example/2/reswapperOutput-429500.jpg) |
10
+ | ![image](example/3/target.jpg) |![image](example/3/source.png) | ![image](example/3/inswapperOutput.jpg) | ![image](example/3/reswapperOutput-1399500_256.jpg) | ![image](example/3/reswapperOutput-1019500.jpg) | ![image](example/3/reswapperOutput-429500.jpg) |
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ git clone https://github.com/somanchiu/ReSwapper.git
16
+ cd ReSwapper
17
+ python -m venv venv
18
+
19
+ venv\scripts\activate
20
+
21
+ pip install -r requirements.txt
22
+
23
+ pip install torch torchvision --force --index-url https://download.pytorch.org/whl/cu121
24
+ pip install onnxruntime-gpu --force --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
25
+ ```
26
+
27
+ ## The details of inswapper
28
+
29
+ ### Model architecture
30
+ The inswapper model architecture can be visualized in [Netron](https://netron.app). You can compare with ReSwapper implementation to see architectural similarities. Exporting the model with opset_version=10 makes it easier to compare the graph in Netron. However, it will cause issue #8.
31
+
32
+ We can also use the following Python code to get more details:
33
+ ```python
34
+ model = onnx.load('test.onnx')
35
+ printable_graph=onnx.helper.printable_graph(model.graph)
36
+ ```
37
+
38
+ The model architectures of InSwapper and SimSwap are extremely similar and worth paying attention to.
39
+
40
+ ### Model inputs
41
+ - target: [1, 3, 128, 128] shape image in RGB format with face alignment, normalized to [-1, 1] range
42
+ - source (latent): [1, 512] shape vector, the features of the source face
43
+ - Calculation of latent, "emap" can be extracted from the original inswapper model.
44
+ ```python
45
+ latent = source_face.normed_embedding.reshape((1,-1))
46
+ latent = np.dot(latent, emap)
47
+ latent /= np.linalg.norm(latent)
48
+ ```
49
+ - It can also be used to calculate the similarity between two faces using cosine similarity.
50
+
51
+ ### Model output
52
+ Model inswapper_128 not only changes facial features, but also body shape.
53
+
54
+ | Target | Source | Inswapper Output | Reswapper Output<br>(Step 429500) |
55
+ |--------|--------|--------|--------|
56
+ | ![image](example/1/target.jpg) |![image](example/1/source.jpg) | ![image](example/1/inswapperOutput.gif) | ![image](example/1/reswapperOutput.gif) |
57
+
58
+ ### Loss Functions
59
+ There is no information released from insightface. It is an important part of the training. However, there are a lot of articles and papers that can be referenced. By reading a substantial number of articles and papers on face swapping, ID fidelity, and style transfer, you'll frequently encounter the following keywords:
60
+ - content loss
61
+ - style loss/id loss
62
+ - perceptual loss
63
+
64
+ ### Face alignment
65
+ Face alignment is handled incorrectly at resolutions other than 128. To resolve this issue, add an offset to "dst" in both x and y directions in the function "face_align.estimate_norm". The offset is approximately given by the formula: Offset = (128/32768) * Resolution - 0.5
66
+
67
+ ## Training
68
+ ### 0. Pretrained weights (Optional)
69
+ If you don't want to train the model from scratch, you can download the pretrained weights and pass model_path into the train function in train.py.
70
+
71
+ ### 1. Dataset Preparation
72
+ Download [FFHQ](https://www.kaggle.com/datasets/arnaud58/flickrfaceshq-dataset-ffhq) to use as target and source images. For the swaped face images, we can use the inswapper output.
73
+
74
+ ### 2. Model Training
75
+
76
+ Optimizer: Adam
77
+
78
+ Learning rate: 0.0001
79
+
80
+ Modify the code in train.py if needed. Then, execute:
81
+ ```python
82
+ python train.py
83
+ ```
84
+
85
+ The model will be saved as "reswapper-\<total steps\>.pth". You can also save the model as ONNX using the ModelFormat.save_as_onnx_model function. The ONNX model can then be used with the original INSwapper class.
86
+
87
+ All losses will be logged into TensorBoard.
88
+
89
+ Using images with different resolutions simultaneously to train the model will enhance its generalization ability. To apply this strategy, you can pass "resolutions" into the train function.
90
+
91
+ Generalization ability of the model trained with resolutions of 128 and 256:
92
+
93
+ | Output<br>resolution | 128 | 160 | 256 |
94
+ |--------|--------|--------|--------|
95
+ |Output| ![image](example/GeneralizationAbility/1399500_128.jpg) |![image](example/GeneralizationAbility/1399500_160.jpg) |![image](example/GeneralizationAbility/1399500_256.jpg) |
96
+
97
+ Enhancing data diversity will improve output quality, you can pass "enableDataAugmentation" into the train function to perform data augmentation.
98
+
99
+ | Target | Source | Inswapper Output | Reswapper Output<br>(Step 1567500) | Reswapper Output<br>(Step 1399500) |
100
+ |--------|--------|--------|--------|--------|
101
+ |![image](example/DataAugmentation/target.jpg)| ![image](example/DataAugmentation/source.jpg) |![image](example/DataAugmentation/inswapper_output.jpg) |![image](example/DataAugmentation/reswapper_256Output-1567500.jpg) | ![image](example/DataAugmentation/reswapper_256Output-1399500.jpg) |
102
+
103
+ #### Notes
104
+ - Do not stop the training too early.
105
+
106
+ - I'm using an RTX3060 12GB for training. It takes around 12 hours for 50,000 steps.
107
+ - The optimizer may need to be changed to SGD for the final training, as many articles show that SGD can result in lower loss.
108
+ - To get inspiration for improving the model, you might want to review the commented code and unused functions in commit [c2a12e10021ecd1342b9ba50570a16b18f9634b9](https://github.com/somanchiu/ReSwapper/commit/c2a12e10021ecd1342b9ba50570a16b18f9634b9).
109
+
110
+ ## Inference
111
+ ```python
112
+ python swap.py
113
+ ```
114
+
115
+ ## Pretrained Model
116
+ ### 256 Resolution
117
+ - [reswapper_256-1567500.pth](https://huggingface.co/somanchiu/reswapper/tree/main)
118
+ - [reswapper_256-1399500.pth](https://huggingface.co/somanchiu/reswapper/tree/main)
119
+
120
+ ### 128 Resolution
121
+ - [reswapper-1019500.pth](https://huggingface.co/somanchiu/reswapper/tree/main)
122
+ - [reswapper-1019500.onnx](https://huggingface.co/somanchiu/reswapper/tree/main)
123
+ - [reswapper-429500.pth](https://huggingface.co/somanchiu/reswapper/tree/main)
124
+ - [reswapper-429500.onnx](https://huggingface.co/somanchiu/reswapper/tree/main)
125
+
126
+ ### Notes
127
+ If you downloaded the ONNX format model before 2024/11/25, please download the model again or export the model with opset_version=11. This is related to issue #8.
128
+
129
+ ## To Do
130
+ - Create a 512-resolution model (alternative to inswapper_512)
StyleTransferLoss.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import torch
3
+ import torch.nn as nn
4
+ import numpy as np
5
+ from insightface.app import FaceAnalysis
6
+ from pytorch_msssim import ssim
7
+
8
+ import Image
9
+
10
+ class StyleTransferLoss(nn.Module):
11
+ def __init__(self, device='cuda', face_analysis = None):
12
+ super(StyleTransferLoss, self).__init__()
13
+ if face_analysis is None:
14
+ self.face_analysis = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
15
+ self.face_analysis.prepare(ctx_id=0, det_size=(128, 128))
16
+ else:
17
+ self.face_analysis = face_analysis
18
+ self.device = device
19
+ self.cosine_similarity = nn.CosineSimilarity(dim=0)
20
+
21
+ # Content loss
22
+ self.content_loss = nn.MSELoss()
23
+
24
+ def extract_face_latent(self, image):
25
+ # Convert torch tensor to numpy array
26
+ face_tensor = image.squeeze().cpu().detach()
27
+ face_np = (face_tensor.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
28
+ face_np = cv2.cvtColor(face_np, cv2.COLOR_RGB2BGR)
29
+
30
+ # Extract face embedding
31
+ faces = self.face_analysis.get(face_np)
32
+ if len(faces) == 0:
33
+ return None
34
+ return torch.tensor(Image.getLatent(faces[0])[0]).to(self.device)
35
+
36
+ def forward(self, output_image, target_content):
37
+ # Content loss
38
+ # content_loss = self.content_loss(output_image, target_content)
39
+ content_loss = 1 - ssim(output_image, target_content, data_range=1.0)
40
+
41
+ output_embedding = self.extract_face_latent(output_image)
42
+ target_embedding = self.extract_face_latent(target_content)
43
+
44
+ identity_loss = None
45
+
46
+ if output_embedding is not None and target_embedding is not None:
47
+ similarity = self.cosine_similarity(output_embedding, target_embedding)
48
+
49
+ identity_loss = 1-((similarity + 1) / 2)
50
+ identity_loss = identity_loss ** 2 * 10
51
+
52
+ return content_loss, identity_loss
StyleTransferModel_128.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class StyleTransferModel(nn.Module):
6
+ def __init__(self):
7
+ super(StyleTransferModel, self).__init__()
8
+
9
+ # self.pad = nn.ReflectionPad2d(3)
10
+ # Encoder for target face
11
+ self.target_encoder = nn.Sequential(
12
+ # self.pad,
13
+ nn.Conv2d(3, 128, kernel_size=7, stride=1, padding=0),
14
+ nn.LeakyReLU(0.2),
15
+ nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
16
+ nn.LeakyReLU(0.2),
17
+ nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
18
+ nn.LeakyReLU(0.2),
19
+ nn.Conv2d(512, 1024, kernel_size=3, stride=2, padding=1),
20
+ nn.LeakyReLU(0.2),
21
+ )
22
+
23
+ # for style_block in self.target_encoder:
24
+ # for param in style_block.parameters():
25
+ # param.requires_grad = False
26
+
27
+ # Style blocks
28
+ self.style_blocks = nn.ModuleList([
29
+ StyleBlock(1024, 1024, blockIndex) for blockIndex in range(6)
30
+ ])
31
+
32
+ # Decoder (upsampling)
33
+ self.decoder = nn.Sequential(
34
+ nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1),
35
+ nn.LeakyReLU(0.2)
36
+ )
37
+
38
+ self.decoderPart1 = nn.Sequential(
39
+ nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
40
+ nn.LeakyReLU(0.2),
41
+ nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
42
+ nn.LeakyReLU(0.2)
43
+ )
44
+
45
+ self.decoderPart2 = nn.Sequential(
46
+ # self.pad,
47
+ nn.Conv2d(128, 3, kernel_size=7, stride=1, padding=0),
48
+ nn.Tanh()
49
+ )
50
+
51
+ def forward(self, target, source):
52
+ # Encode target face
53
+ target = F.pad(target, pad=(3, 3, 3, 3), mode='reflect')
54
+
55
+ target_features = self.target_encoder(target)
56
+
57
+ # Apply style blocks
58
+ x = target_features
59
+ for style_block in self.style_blocks:
60
+ x = style_block(x, source)
61
+
62
+
63
+ # Decode
64
+ # x = F.interpolate(x, scale_factor=2, mode='linear')
65
+ x = F.upsample(
66
+ x,
67
+ scale_factor=2, # specify the desired height and width
68
+ mode='bilinear', # 'linear' in 2D is called 'bilinear'
69
+ align_corners=False # this is typically False for ONNX compatibility
70
+ )
71
+ output = self.decoder(x)
72
+
73
+ output = F.upsample(
74
+ output,
75
+ scale_factor=2, # specify the desired height and width
76
+ mode='bilinear', # 'linear' in 2D is called 'bilinear'
77
+ align_corners=False # this is typically False for ONNX compatibility
78
+ )
79
+ output = self.decoderPart1(output)
80
+
81
+ output = F.pad(output, pad=(3, 3, 3, 3), mode='reflect')
82
+
83
+ output = self.decoderPart2(output)
84
+
85
+ return (output + 1) / 2
86
+
87
+ class StyleBlock(nn.Module):
88
+ def __init__(self, in_channels, out_channels, blockIndex):
89
+ super(StyleBlock, self).__init__()
90
+ self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=0)
91
+ self.conv2 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=0)
92
+ self.style1 = nn.Linear(512, 2048)
93
+ self.style2 = nn.Linear(512, 2048)
94
+ self.style = [self.style1, self.style2]
95
+
96
+ self.blockIndex = blockIndex
97
+
98
+ def normalizeConvRMS(self, conv):
99
+ x = conv - torch.mean(conv, dim=[2, 3], keepdim=True) # centeredConv
100
+ squareX = x * x
101
+ meanSquaredX = torch.mean(squareX, dim=[2, 3], keepdim=True)
102
+ rms = torch.sqrt(meanSquaredX + 0.00000001)
103
+ return (1 / rms) * x
104
+
105
+ def forward(self, residual, style):
106
+ # print(f'Forward: {self.blockIndex}')
107
+ style1024 = []
108
+ for index in range(2):
109
+ style1 = self.style[index](style)
110
+ style1 = torch.unsqueeze(style1, 2)
111
+ style1 = torch.unsqueeze(style1, 3)
112
+ first_half = style1[:, :1024, :, :]
113
+ second_half = style1[:, 1024:, :, :]
114
+
115
+ style1024.append([first_half, second_half])
116
+
117
+ conv1 = self.normalizeConvRMS(self.conv1(F.pad(residual, pad=(1, 1, 1, 1), mode='reflect')))
118
+
119
+ out = F.relu(conv1 * style1024[0][0] + style1024[0][1])
120
+
121
+ out = F.pad(out, pad=(1, 1, 1, 1), mode='reflect')
122
+
123
+ conv2 = self.normalizeConvRMS(self.conv2(out))
124
+ out = conv2 * style1024[1][0] + style1024[1][1]
125
+
126
+ return residual + out
emap.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee626bc81721d71c5d6cb1f76f830b9ae46f595514b0884dd8ae34785576764
3
+ size 1048704
example/1/inswapperOutput.gif ADDED
example/1/inswapperOutput.jpg ADDED
example/1/reswapperOutput-1019500.jpg ADDED
example/1/reswapperOutput-1399500_256.jpg ADDED
example/1/reswapperOutput-429500.jpg ADDED
example/1/reswapperOutput.gif ADDED
example/1/source.jpg ADDED
example/1/target.jpg ADDED
example/2/inswapperOutput.jpg ADDED
example/2/reswapperOutput-1019500.jpg ADDED
example/2/reswapperOutput-1399500_256.jpg ADDED
example/2/reswapperOutput-429500.jpg ADDED
example/2/source.jpg ADDED
example/2/target.jpg ADDED
example/3/inswapperOutput.jpg ADDED
example/3/reswapperOutput-1019500.jpg ADDED
example/3/reswapperOutput-1399500_256.jpg ADDED
example/3/reswapperOutput-429500.jpg ADDED
example/3/source.png ADDED
example/3/target.jpg ADDED
example/DataAugmentation/inswapper_output.jpg ADDED
example/DataAugmentation/reswapper_256Output-1399500.jpg ADDED
example/DataAugmentation/reswapper_256Output-1567500.jpg ADDED
example/DataAugmentation/source.jpg ADDED
example/DataAugmentation/target.jpg ADDED
example/GeneralizationAbility/1399500_128.jpg ADDED
example/GeneralizationAbility/1399500_160.jpg ADDED
example/GeneralizationAbility/1399500_256.jpg ADDED
face_align.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from skimage import transform as trans
4
+
5
+
6
+ arcface_dst = np.array(
7
+ [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
8
+ [41.5493, 92.3655], [70.7299, 92.2041]],
9
+ dtype=np.float32)
10
+
11
+ def estimate_norm(lmk, image_size=112,mode='arcface'):
12
+ # assert lmk.shape == (5, 2)
13
+ # assert image_size%112==0 or image_size%128==0
14
+ if image_size%112==0:
15
+ ratio = float(image_size)/112.0
16
+ diff_x = 0
17
+ else:
18
+ ratio = float(image_size)/128.0
19
+ diff_x = 8.0*ratio
20
+ dst = arcface_dst * ratio
21
+ dst[:,0] += diff_x
22
+
23
+ if image_size != 128:
24
+ offset = (128/32768)*image_size-0.5
25
+ dst[:,0] += offset
26
+ dst[:,1] += offset
27
+
28
+ tform = trans.SimilarityTransform()
29
+ tform.estimate(lmk, dst)
30
+ M = tform.params[0:2, :]
31
+ return M
32
+
33
+ def norm_crop(img, landmark, image_size=112, mode='arcface'):
34
+ M = estimate_norm(landmark, image_size, mode)
35
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
36
+ return warped
37
+
38
+ def norm_crop2(img, landmark, image_size=112, mode='arcface'):
39
+ M = estimate_norm(landmark, image_size, mode)
40
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
41
+ return warped, M
42
+
43
+ def square_crop(im, S):
44
+ if im.shape[0] > im.shape[1]:
45
+ height = S
46
+ width = int(float(im.shape[1]) / im.shape[0] * S)
47
+ scale = float(S) / im.shape[0]
48
+ else:
49
+ width = S
50
+ height = int(float(im.shape[0]) / im.shape[1] * S)
51
+ scale = float(S) / im.shape[1]
52
+ resized_im = cv2.resize(im, (width, height))
53
+ det_im = np.zeros((S, S, 3), dtype=np.uint8)
54
+ det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
55
+ return det_im, scale
56
+
57
+
58
+ def transform(data, center, output_size, scale, rotation):
59
+ scale_ratio = scale
60
+ rot = float(rotation) * np.pi / 180.0
61
+ #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
62
+ t1 = trans.SimilarityTransform(scale=scale_ratio)
63
+ cx = center[0] * scale_ratio
64
+ cy = center[1] * scale_ratio
65
+ t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
66
+ t3 = trans.SimilarityTransform(rotation=rot)
67
+ t4 = trans.SimilarityTransform(translation=(output_size / 2,
68
+ output_size / 2))
69
+ t = t1 + t2 + t3 + t4
70
+ M = t.params[0:2]
71
+ cropped = cv2.warpAffine(data,
72
+ M, (output_size, output_size),
73
+ borderValue=0.0)
74
+ return cropped, M
75
+
76
+
77
+ def trans_points2d(pts, M):
78
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
79
+ for i in range(pts.shape[0]):
80
+ pt = pts[i]
81
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
82
+ new_pt = np.dot(M, new_pt)
83
+ #print('new_pt', new_pt.shape, new_pt)
84
+ new_pts[i] = new_pt[0:2]
85
+
86
+ return new_pts
87
+
88
+
89
+ def trans_points3d(pts, M):
90
+ scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
91
+ #print(scale)
92
+ new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
93
+ for i in range(pts.shape[0]):
94
+ pt = pts[i]
95
+ new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
96
+ new_pt = np.dot(M, new_pt)
97
+ #print('new_pt', new_pt.shape, new_pt)
98
+ new_pts[i][0:2] = new_pt[0:2]
99
+ new_pts[i][2] = pts[i][2] * scale
100
+
101
+ return new_pts
102
+
103
+
104
+ def trans_points(pts, M):
105
+ if pts.shape[1] == 2:
106
+ return trans_points2d(pts, M)
107
+ else:
108
+ return trans_points3d(pts, M)
109
+
requirements-colab.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ addict==2.4.0
3
+ albucore==0.0.17
4
+ albumentations==1.4.17
5
+ annotated-types==0.7.0
6
+ certifi==2024.8.30
7
+ chardet==3.0.4
8
+ charset-normalizer==3.3.2
9
+ colorama==0.4.6
10
+ coloredlogs==15.0.1
11
+ contourpy==1.3.0
12
+ cycler==0.12.1
13
+ Cython==3.0.11
14
+ easydict==1.13
15
+ einops==0.8.0
16
+ eval_type_backport==0.2.0
17
+ facexlib==0.3.0
18
+ filelock==3.13.1
19
+ filterpy==1.4.5
20
+ flatbuffers==24.3.25
21
+ fonttools==4.54.1
22
+ fsspec==2024.2.0
23
+ ftfy==6.2.3
24
+ future==1.0.0
25
+ grpcio==1.66.1
26
+ huggingface-hub==0.25.0
27
+ humanfriendly==10.0
28
+ idna==3.10
29
+ imageio==2.35.1
30
+ importlib_metadata==8.5.0
31
+ insightface==0.7.3
32
+ Jinja2==3.1.3
33
+ joblib==1.4.2
34
+ kiwisolver==1.4.7
35
+ lazy_loader==0.4
36
+ llvmlite==0.43.0
37
+ lmdb==1.5.1
38
+ Markdown==3.7
39
+ MarkupSafe==2.1.5
40
+ matplotlib==3.9.2
41
+ mpmath==1.3.0
42
+ networkx==3.3
43
+ numba==0.60.0
44
+ numpy==1.26.4
45
+ onnx==1.17.0
46
+ onnxruntime==1.18.1
47
+ onnxruntime-gpu==1.19.2
48
+ opencv-python==4.10.0.84
49
+ opencv-python-headless==4.10.0.84
50
+ packaging==24.1
51
+ pillow==10.4.0
52
+ platformdirs==4.3.6
53
+ prettytable==3.11.0
54
+ protobuf==5.28.2
55
+ pydantic==2.9.2
56
+ pydantic_core==2.23.4
57
+ pyparsing==3.1.4
58
+ pyreadline3==3.4.1
59
+ python-dateutil==2.9.0.post0
60
+ pytorch-msssim==1.0.0
61
+ PyYAML==6.0.2
62
+ regex==2024.9.11
63
+ requests==2.32.3
64
+ safetensors==0.4.5
65
+ scikit-image==0.24.0
66
+ scikit-learn==1.5.2
67
+ scipy==1.14.1
68
+ six==1.16.0
69
+ sympy==1.13.2
70
+ tensorboard==2.17.1
71
+ tensorboard-data-server==0.7.2
72
+ threadpoolctl==3.5.0
73
+ tifffile==2024.9.20
74
+ timm==1.0.9
75
+ tokenizers==0.15.2
76
+ tomli==2.0.1
77
+ torch==2.4.1
78
+ tqdm==4.66.5
79
+ transformers==4.36.2
80
+ typing_extensions==4.12.2
81
+ urllib3==2.2.3
82
+ wcwidth==0.2.13
83
+ Werkzeug==3.0.4
84
+ yapf==0.40.2
85
+ zipp==3.20.2
requirements.txt ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ addict==2.4.0
3
+ albucore==0.0.17
4
+ albumentations==1.4.17
5
+ annotated-types==0.7.0
6
+ certifi==2024.8.30
7
+ chardet==3.0.4
8
+ charset-normalizer==3.3.2
9
+ colorama==0.4.6
10
+ coloredlogs==15.0.1
11
+ contourpy==1.3.0
12
+ cycler==0.12.1
13
+ Cython==3.0.11
14
+ easydict==1.13
15
+ einops==0.8.0
16
+ eval_type_backport==0.2.0
17
+ facexlib==0.3.0
18
+ filelock==3.13.1
19
+ filterpy==1.4.5
20
+ flatbuffers==24.3.25
21
+ fonttools==4.54.1
22
+ fsspec==2024.2.0
23
+ ftfy==6.2.3
24
+ future==1.0.0
25
+ grpcio==1.66.1
26
+ huggingface-hub==0.25.0
27
+ humanfriendly==10.0
28
+ idna==3.10
29
+ imageio==2.35.1
30
+ importlib_metadata==8.5.0
31
+ insightface==0.7.3
32
+ Jinja2==3.1.3
33
+ joblib==1.4.2
34
+ kiwisolver==1.4.7
35
+ lazy_loader==0.4
36
+ llvmlite==0.43.0
37
+ lmdb==1.5.1
38
+ Markdown==3.7
39
+ MarkupSafe==2.1.5
40
+ matplotlib==3.9.2
41
+ mpmath==1.3.0
42
+ networkx==3.3
43
+ numba==0.60.0
44
+ numpy==1.26.4
45
+ onnx==1.17.0
46
+ onnxruntime==1.18.1
47
+ onnxruntime-gpu==1.19.2
48
+ opencv-python==4.10.0.84
49
+ opencv-python-headless==4.10.0.84
50
+ packaging==24.1
51
+ pillow==10.4.0
52
+ platformdirs==4.3.6
53
+ prettytable==3.11.0
54
+ protobuf==5.28.2
55
+ pydantic==2.9.2
56
+ pydantic_core==2.23.4
57
+ pyparsing==3.1.4
58
+ pyreadline3==3.4.1
59
+ python-dateutil==2.9.0.post0
60
+ pytorch-msssim==1.0.0
61
+ PyYAML==6.0.2
62
+ regex==2024.9.11
63
+ requests==2.32.3
64
+ safetensors==0.4.5
65
+ scikit-image==0.24.0
66
+ scikit-learn==1.5.2
67
+ scipy==1.14.1
68
+ six==1.16.0
69
+ sympy==1.13.2
70
+ tensorboard==2.17.1
71
+ tensorboard-data-server==0.7.2
72
+ threadpoolctl==3.5.0
73
+ tifffile==2024.9.20
74
+ timm==1.0.9
75
+ tokenizers==0.15.2
76
+ tomli==2.0.1
77
+ torch==2.4.1+cu121
78
+ torchvision==0.19.1+cu121
79
+ tqdm==4.66.5
80
+ transformers==4.36.2
81
+ typing_extensions==4.12.2
82
+ urllib3==2.2.3
83
+ wcwidth==0.2.13
84
+ Werkzeug==3.0.4
85
+ yapf==0.40.2
86
+ zipp==3.20.2
swap.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+
4
+ import cv2
5
+ import torch
6
+ import Image
7
+ from insightface.app import FaceAnalysis
8
+ import face_align
9
+
10
+ faceAnalysis = FaceAnalysis(name='buffalo_l')
11
+ faceAnalysis.prepare(ctx_id=0, det_size=(512, 512))
12
+
13
+ from StyleTransferModel_128 import StyleTransferModel
14
+
15
+ def parse_arguments():
16
+ parser = argparse.ArgumentParser(description='Process command line arguments')
17
+
18
+ parser.add_argument('--target', required=True, help='Target path')
19
+ parser.add_argument('--source', required=True, help='Source path')
20
+ parser.add_argument('--outputPath', required=True, help='Output path')
21
+ parser.add_argument('--modelPath', required=True, help='Model path')
22
+ parser.add_argument('--no-paste-back', action='store_true', help='Disable pasting back the swapped face onto the original image')
23
+ parser.add_argument('--resolution', type=int, default=128, help='Resolution')
24
+
25
+ return parser.parse_args()
26
+
27
+ def get_device():
28
+ return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
+
30
+ def load_model(model_path):
31
+ device = get_device()
32
+ model = StyleTransferModel().to(device)
33
+ model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
34
+ model.eval()
35
+ return model
36
+
37
+ def swap_face(model, target_face, source_face_latent):
38
+ device = get_device()
39
+
40
+ target_tensor = torch.from_numpy(target_face).to(device)
41
+ source_tensor = torch.from_numpy(source_face_latent).to(device)
42
+
43
+ with torch.no_grad():
44
+ swapped_tensor = model(target_tensor, source_tensor)
45
+
46
+ swapped_face = Image.postprocess_face(swapped_tensor)
47
+
48
+ return swapped_face, swapped_tensor
49
+
50
+ def create_target(target_image, resolution):
51
+ if isinstance(target_image, str):
52
+ target_image = cv2.imread(target_image)
53
+
54
+ target_face = faceAnalysis.get(target_image)[0]
55
+ aligned_target_face, M = face_align.norm_crop2(target_image, target_face.kps, resolution)
56
+ target_face_blob = Image.getBlob(aligned_target_face, (resolution, resolution))
57
+
58
+ return target_face_blob, M
59
+
60
+ def create_source(source_img_path):
61
+ source_image = cv2.imread(source_img_path)
62
+
63
+ source_face = faceAnalysis.get(source_image)[0]
64
+
65
+ source_latent = Image.getLatent(source_face)
66
+
67
+ return source_latent
68
+
69
+ def main():
70
+ args = parse_arguments()
71
+
72
+ # Access the arguments
73
+ target_image_path = args.target
74
+ source = args.source
75
+ output_path = args.outputPath
76
+ model_path = args.modelPath
77
+
78
+ model = load_model(model_path)
79
+
80
+ target_img = cv2.imread(target_image_path)
81
+ target_face_blob, M = create_target(target_img, args.resolution)
82
+ source_latent = create_source(source)
83
+ swapped_face, _ = swap_face(model, target_face_blob, source_latent)
84
+
85
+ if not args.no_paste_back:
86
+ swapped_face = Image.blend_swapped_image(swapped_face, target_img, M)
87
+
88
+ output_folder = os.path.dirname(output_path)
89
+ os.makedirs(output_folder, exist_ok=True)
90
+ cv2.imwrite(output_path, swapped_face)
91
+
92
+ if __name__ == "__main__":
93
+ main()
train.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ import os
3
+ import random
4
+ import torch
5
+ import torch.optim as optim
6
+ import torch.nn.functional as F
7
+
8
+ import Image
9
+ import ModelFormat
10
+ from StyleTransferLoss import StyleTransferLoss
11
+ import onnxruntime as rt
12
+
13
+ import cv2
14
+ from insightface.data import get_image as ins_get_image
15
+ from insightface.app import FaceAnalysis
16
+ import face_align
17
+
18
+ from StyleTransferModel_128 import StyleTransferModel
19
+ from torch.utils.tensorboard import SummaryWriter
20
+
21
+ inswapper_128_path = 'inswapper_128.onnx'
22
+ img_size = 128
23
+
24
+ providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
25
+
26
+ inswapperInferenceSession = rt.InferenceSession(inswapper_128_path, providers=providers)
27
+
28
+ faceAnalysis = FaceAnalysis(name='buffalo_l')
29
+ faceAnalysis.prepare(ctx_id=0, det_size=(512, 512))
30
+
31
+ def get_device():
32
+ return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
33
+ style_loss_fn = StyleTransferLoss().to(get_device())
34
+
35
+ def train(datasetDir, learning_rate=0.0001, model_path=None, outputModelFolder='', saveModelEachSteps = 1, stopAtSteps=None, logDir=None, previewDir=None, saveAs_onnx = False, resolutions = [128], enableDataAugmentation = False):
36
+ device = get_device()
37
+ print(f"Using device: {device}")
38
+
39
+ model = StyleTransferModel().to(device)
40
+
41
+ if model_path is not None:
42
+ model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
43
+ print(f"Loaded model from {model_path}")
44
+
45
+ lastSteps = int(model_path.split('-')[-1].split('.')[0])
46
+ print(f"Resuming training from step {lastSteps}")
47
+ else:
48
+ lastSteps = 0
49
+
50
+ model.train()
51
+ model = model.to(device)
52
+
53
+ # Initialize optimizer
54
+ optimizer = optim.Adam(model.parameters(), lr=learning_rate)
55
+
56
+ # Initialize TensorBoard writer
57
+ if logDir is not None:
58
+ train_writer = SummaryWriter(os.path.join(logDir, "training"))
59
+ val_writer = SummaryWriter(os.path.join(logDir, "validation"))
60
+
61
+ steps = 0
62
+
63
+ image = os.listdir(datasetDir)
64
+
65
+ resolutionIndex = 0
66
+
67
+ # Training loop
68
+ while True:
69
+ start_time = datetime.now()
70
+
71
+ resolution = resolutions[resolutionIndex%len(resolutions)]
72
+
73
+ targetFaceIndex = random.randint(0, len(image)-1)
74
+ sourceFaceIndex = random.randint(0, len(image)-1)
75
+
76
+ target_img=cv2.imread(f"{datasetDir}/{image[targetFaceIndex]}")
77
+ if enableDataAugmentation and steps % 2 == 0:
78
+ target_img = cv2.cvtColor(target_img, cv2.COLOR_BGR2GRAY)
79
+ target_img = cv2.cvtColor(target_img, cv2.COLOR_GRAY2BGR)
80
+ faces = faceAnalysis.get(target_img)
81
+
82
+ if targetFaceIndex != sourceFaceIndex:
83
+ source_img = cv2.imread(f"{datasetDir}/{image[sourceFaceIndex]}")
84
+ faces2 = faceAnalysis.get(source_img)
85
+ else:
86
+ faces2 = faces
87
+
88
+ if len(faces) > 0 and len(faces2) > 0:
89
+ new_aligned_face, _ = face_align.norm_crop2(target_img, faces[0].kps, img_size)
90
+ blob = Image.getBlob(new_aligned_face)
91
+ latent = Image.getLatent(faces2[0])
92
+ else:
93
+ continue
94
+
95
+ if targetFaceIndex != sourceFaceIndex:
96
+ input = {inswapperInferenceSession.get_inputs()[0].name: blob,
97
+ inswapperInferenceSession.get_inputs()[1].name: latent}
98
+
99
+ expected_output = inswapperInferenceSession.run([inswapperInferenceSession.get_outputs()[0].name], input)[0]
100
+ else:
101
+ expected_output = blob
102
+
103
+ expected_output_tensor = torch.from_numpy(expected_output).to(device)
104
+
105
+ if resolution != 128:
106
+ new_aligned_face, _ = face_align.norm_crop2(target_img, faces[0].kps, resolution)
107
+ blob = Image.getBlob(new_aligned_face, (resolution, resolution))
108
+
109
+ latent_tensor = torch.from_numpy(latent).to(device)
110
+ target_input_tensor = torch.from_numpy(blob).to(device)
111
+
112
+ optimizer.zero_grad()
113
+ output = model(target_input_tensor, latent_tensor)
114
+
115
+ if (resolution != 128):
116
+ output = F.interpolate(output, size=(128, 128), mode='bilinear', align_corners=False)
117
+
118
+ content_loss, identity_loss = style_loss_fn(output, expected_output_tensor)
119
+
120
+ loss = content_loss
121
+
122
+ if identity_loss is not None:
123
+ loss +=identity_loss
124
+
125
+ loss.backward()
126
+
127
+ optimizer.step()
128
+
129
+ steps += 1
130
+ totalSteps = steps + lastSteps
131
+
132
+ if logDir is not None:
133
+ train_writer.add_scalar("Loss/total", loss.item(), totalSteps)
134
+ train_writer.add_scalar("Loss/content_loss", content_loss.item(), totalSteps)
135
+
136
+ if identity_loss is not None:
137
+ train_writer.add_scalar("Loss/identity_loss", identity_loss.item(), totalSteps)
138
+
139
+ elapsed_time = datetime.now() - start_time
140
+
141
+ print(f"Total Steps: {totalSteps}, Step: {steps}, Loss: {loss.item():.4f}, Elapsed time: {elapsed_time}")
142
+
143
+ if steps % saveModelEachSteps == 0:
144
+ outputModelPath = f"reswapper-{totalSteps}.pth"
145
+ if outputModelFolder != '':
146
+ outputModelPath = f"{outputModelFolder}/{outputModelPath}"
147
+ saveModel(model, outputModelPath)
148
+
149
+ validation_total_loss, validation_content_loss, validation_identity_loss, swapped_face, swapped_face_256 = validate(outputModelPath)
150
+ if previewDir is not None:
151
+ cv2.imwrite(f"{previewDir}/{totalSteps}.jpg", swapped_face)
152
+ cv2.imwrite(f"{previewDir}/{totalSteps}_256.jpg", swapped_face_256)
153
+
154
+ if logDir is not None:
155
+ val_writer.add_scalar("Loss/total", validation_total_loss.item(), totalSteps)
156
+ val_writer.add_scalar("Loss/content_loss", validation_content_loss.item(), totalSteps)
157
+ if validation_identity_loss is not None:
158
+ val_writer.add_scalar("Loss/identity_loss", validation_identity_loss.item(), totalSteps)
159
+
160
+ if saveAs_onnx :
161
+ ModelFormat.save_as_onnx_model(outputModelPath)
162
+
163
+ if stopAtSteps is not None and steps == stopAtSteps:
164
+ exit()
165
+
166
+ resolutionIndex += 1
167
+
168
+ def saveModel(model, outputModelPath):
169
+ torch.save(model.state_dict(), outputModelPath)
170
+
171
+ def load_model(model_path):
172
+ device = get_device()
173
+ model = StyleTransferModel().to(device)
174
+ model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
175
+
176
+ model.eval()
177
+ return model
178
+
179
+ def swap_face(model, target_face, source_face_latent):
180
+ device = get_device()
181
+
182
+ target_tensor = torch.from_numpy(target_face).to(device)
183
+ source_tensor = torch.from_numpy(source_face_latent).to(device)
184
+
185
+ with torch.no_grad():
186
+ swapped_tensor = model(target_tensor, source_tensor)
187
+
188
+ swapped_face = Image.postprocess_face(swapped_tensor)
189
+
190
+ return swapped_face, swapped_tensor
191
+
192
+ # test image
193
+ test_img = ins_get_image('t1')
194
+
195
+ test_faces = faceAnalysis.get(test_img)
196
+ test_faces = sorted(test_faces, key = lambda x : x.bbox[0])
197
+ test_target_face, _ = face_align.norm_crop2(test_img, test_faces[0].kps, img_size)
198
+ test_target_face = Image.getBlob(test_target_face)
199
+ test_l = Image.getLatent(test_faces[2])
200
+
201
+ test_target_face_256, _ = face_align.norm_crop2(test_img, test_faces[0].kps, 256)
202
+ test_target_face_256 = Image.getBlob(test_target_face_256, (256, 256))
203
+
204
+ test_input = {inswapperInferenceSession.get_inputs()[0].name: test_target_face,
205
+ inswapperInferenceSession.get_inputs()[1].name: test_l}
206
+
207
+ test_inswapperOutput = inswapperInferenceSession.run([inswapperInferenceSession.get_outputs()[0].name], test_input)[0]
208
+
209
+ def validate(modelPath):
210
+ model = load_model(modelPath)
211
+ swapped_face, swapped_tensor= swap_face(model, test_target_face, test_l)
212
+ swapped_face_256, _= swap_face(model, test_target_face_256, test_l)
213
+
214
+ validation_content_loss, validation_identity_loss = style_loss_fn(swapped_tensor, torch.from_numpy(test_inswapperOutput).to(get_device()))
215
+
216
+ validation_total_loss = validation_content_loss
217
+ if validation_identity_loss is not None:
218
+ validation_total_loss += validation_identity_loss
219
+
220
+ return validation_total_loss, validation_content_loss, validation_identity_loss, swapped_face, swapped_face_256
221
+
222
+ def main():
223
+ outputModelFolder = "model"
224
+ modelPath = None
225
+ # modelPath = f"{outputModelFolder}/reswapper-<step>.pth"
226
+
227
+ logDir = "training/log"
228
+ previewDir = "training/preview"
229
+ datasetDir = "FFHQ"
230
+
231
+ os.makedirs(outputModelFolder, exist_ok=True)
232
+ os.makedirs(previewDir, exist_ok=True)
233
+
234
+ train(
235
+ datasetDir=datasetDir,
236
+ model_path=modelPath,
237
+ learning_rate=0.0001,
238
+ # resolutions = [128, 256],
239
+ # enableDataAugmentation=True,
240
+ outputModelFolder=outputModelFolder,
241
+ saveModelEachSteps = 1000,
242
+ stopAtSteps = 70000,
243
+ logDir=f"{logDir}/{datetime.now().strftime('%Y%m%d %H%M%S')}",
244
+ previewDir=previewDir)
245
+
246
+ if __name__ == "__main__":
247
+ main()