Spaces:

AItoolstack
/

face_reaging

Build error

App Files Files Community

teja141290 commited on Jun 16

Commit

412f263

1 Parent(s): 35758f6

Initial commit with Git LFS tracking

Browse files

Files changed (31) hide show

.gitattributes +6 -0
assets/docs/ex1.gif +3 -0
assets/docs/ex2.gif +3 -0
assets/docs/ex3.gif +3 -0
assets/docs/ex4.gif +3 -0
assets/docs/ex5.gif +3 -0
assets/docs/ex5_img.png +3 -0
assets/docs/sam_ex.gif +3 -0
assets/docs/vid20.gif +3 -0
assets/docs/vid35orig.gif +3 -0
assets/docs/vid60.gif +3 -0
assets/gradio_example_images/1.png +3 -0
assets/gradio_example_images/2.png +3 -0
assets/gradio_example_images/3.png +3 -0
assets/gradio_example_images/4.png +3 -0
assets/gradio_example_images/5.png +3 -0
assets/gradio_example_images/6.png +3 -0
assets/gradio_example_images/orig.mp4 +3 -0
assets/mask1024.jpg +3 -0
assets/mask512.jpg +3 -0
model/__pycache__/models.cpython-310.pyc +0 -0
model/best_discriminator_model.pth +3 -0
model/best_unet_model.pth +3 -0
model/losses.py +70 -0
model/models.py +99 -0
requirements.txt +12 -0
scripts/__pycache__/test_functions.cpython-310.pyc +0 -0
scripts/app.py +101 -0
scripts/gradio_demo.py +116 -0
scripts/test_functions.py +229 -0
scripts/train.py +216 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text

assets/docs/ex1.gif ADDED Viewed

Git LFS Details

SHA256: cf0ea42c8b1d0f87d185184d4d638855392c19fa204879b32ad8e54fbc633443
Pointer size: 132 Bytes
Size of remote file: 1.47 MB

assets/docs/ex2.gif ADDED Viewed

Git LFS Details

SHA256: 89d87e92e7844a2b1d51e5ce69f3b15ecf657aaf8c433b967db33180411418e2
Pointer size: 132 Bytes
Size of remote file: 1.88 MB

assets/docs/ex3.gif ADDED Viewed

Git LFS Details

SHA256: a049aae887abc1d4d7faccd02b023ac909dccda4c2273b2cbc49a9fa71b263ba
Pointer size: 132 Bytes
Size of remote file: 1.92 MB

assets/docs/ex4.gif ADDED Viewed

Git LFS Details

SHA256: ffc437e988cda30318ed8dab6c348da8ec3cbf6f780718d4f863767818907467
Pointer size: 132 Bytes
Size of remote file: 1.68 MB

assets/docs/ex5.gif ADDED Viewed

Git LFS Details

SHA256: 07c6035a1425dfe2c52a2efeb0847bd41877b7dba396f1164d8ab62b65870716
Pointer size: 132 Bytes
Size of remote file: 2.5 MB

assets/docs/ex5_img.png ADDED Viewed

Git LFS Details

SHA256: 8299d8c625bc4b4fb8a787c5c5cad89c9a94eb693b1255f36310a21d66b6508e
Pointer size: 132 Bytes
Size of remote file: 1.3 MB

assets/docs/sam_ex.gif ADDED Viewed

Git LFS Details

SHA256: 2b9bd47e3a83afa7652cde402c419c764a3c16ba1671c8361f130ae3dda5a9fa
Pointer size: 132 Bytes
Size of remote file: 6.01 MB

assets/docs/vid20.gif ADDED Viewed

Git LFS Details

SHA256: 80ffacdc59cbd233b2c3d8eaa9ddb27342cad7826a5969f0805e798931f96e36
Pointer size: 132 Bytes
Size of remote file: 6.57 MB

assets/docs/vid35orig.gif ADDED Viewed

Git LFS Details

SHA256: fe8380b1f68fbc936c3692a64cc4344dbd842692ff33b9b9ae1e703bb57a47ca
Pointer size: 132 Bytes
Size of remote file: 6.55 MB

assets/docs/vid60.gif ADDED Viewed

Git LFS Details

SHA256: 657d5f7f487aaab4a9e03bc61530921a321a1d95b7934c122f2cf821712a7b6b
Pointer size: 132 Bytes
Size of remote file: 6.72 MB

assets/gradio_example_images/1.png ADDED Viewed

Git LFS Details

SHA256: cff048edd766cea2eb970abf86d3a6b581680589b43c37623835efc8955ab48f
Pointer size: 131 Bytes
Size of remote file: 987 kB

assets/gradio_example_images/2.png ADDED Viewed

Git LFS Details

SHA256: 3a28998e8c61101a1071c77c64fa692900f5f4b3cc75b8f8ad5aaed87e19be2f
Pointer size: 132 Bytes
Size of remote file: 2.41 MB

assets/gradio_example_images/3.png ADDED Viewed

Git LFS Details

SHA256: 8ac6a2f780110fb2124275f4bb5518989afa9fa1aa390a59109f3fe72c58446a
Pointer size: 132 Bytes
Size of remote file: 2.25 MB

assets/gradio_example_images/4.png ADDED Viewed

Git LFS Details

SHA256: 7d22eae5966510727bcde2be670ffd86c176aea5b043aef599ae77e62a3b4e06
Pointer size: 132 Bytes
Size of remote file: 1.99 MB

assets/gradio_example_images/5.png ADDED Viewed

Git LFS Details

SHA256: 0df5f6af3546dd561a8de090d70dea7a025c81a70b3134e7289af880d7d1953c
Pointer size: 132 Bytes
Size of remote file: 1.14 MB

assets/gradio_example_images/6.png ADDED Viewed

Git LFS Details

SHA256: f60c9b9d5f28cf10db7c6b5b7e859bab0157b1c372bbf5f711c47b546da04869
Pointer size: 132 Bytes
Size of remote file: 1.36 MB

assets/gradio_example_images/orig.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:107f1bb068638619d3712dfd72fa21b4fba8d072faa9768a85090d3369c70b8e
+size 3675407

assets/mask1024.jpg ADDED Viewed

Git LFS Details

SHA256: d28fcdfd77b6d22c24153fbea0ea39b84cc967063fbeeee9389e1513e8ba8565
Pointer size: 131 Bytes
Size of remote file: 207 kB

assets/mask512.jpg ADDED Viewed

Git LFS Details

SHA256: 062526a2a4d73c91f06c90059a78d48e46a5dc7c73a5652175c42c3a4ad635c3
Pointer size: 130 Bytes
Size of remote file: 10.5 kB

model/__pycache__/models.cpython-310.pyc ADDED Viewed

Binary file (3.22 kB). View file

model/best_discriminator_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa0362ca43e381848ac34fd8b44a3d65d9eb6100b1bcb77aa706c0e1c58e06f1
+size 2668758

model/best_unet_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:230b6a007c65af43a67dcbbb46f4504cff71031e6d410952ef195ba6db90e942
+size 124275652

model/losses.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import torch
+import torch.nn as nn
+import lpips  # LPIPS library for perceptual loss
+class GeneratorLoss(nn.Module):
+    def __init__(self, discriminator_model, l1_weight=1.0, perceptual_weight=1.0, adversarial_weight=0.05,
+                 device="cpu"):
+        super(GeneratorLoss, self).__init__()
+        self.discriminator_model = discriminator_model
+        self.l1_weight = l1_weight
+        self.perceptual_weight = perceptual_weight
+        self.adversarial_weight = adversarial_weight
+        self.criterion_l1 = nn.L1Loss()
+        self.criterion_adversarial = nn.BCEWithLogitsLoss()
+        self.criterion_perceptual = lpips.LPIPS(net='vgg').to(device)
+    def forward(self, output, target, source):
+        # L1 loss
+        l1_loss = self.criterion_l1(output, target)
+        # Perceptual loss
+        perceptual_loss = torch.mean(self.criterion_perceptual(output, target))
+        # Adversarial loss
+        fake_input = torch.cat([output, source[:, 4:5, :, :]], dim=1)
+        fake_prediction = self.discriminator_model(fake_input)
+        adversarial_loss = self.criterion_adversarial(fake_prediction, torch.ones_like(fake_prediction))
+        # Combine losses
+        generator_loss = self.l1_weight * l1_loss + self.perceptual_weight * perceptual_loss + \
+                         self.adversarial_weight * adversarial_loss
+        return generator_loss, l1_loss, perceptual_loss, adversarial_loss
+class DiscriminatorLoss(nn.Module):
+    def __init__(self, discriminator_model, fake_weight=1.0, real_weight=2.0, mock_weight=.5):
+        super(DiscriminatorLoss, self).__init__()
+        self.discriminator_model = discriminator_model
+        self.criterion_adversarial = nn.BCEWithLogitsLoss()
+        self.fake_weight = fake_weight
+        self.real_weight = real_weight
+        self.mock_weight = mock_weight
+    def forward(self, output, target, source):
+        # Adversarial loss
+        fake_input = torch.cat([output, source[:, 4:5, :, :]], dim=1)  # prediction img with target age
+        real_input = torch.cat([target, source[:, 4:5, :, :]], dim=1)  # target img with target age
+        mock_input1 = torch.cat([source[:, :3, :, :], source[:, 4:5, :, :]], dim=1)  # source img with target age
+        mock_input2 = torch.cat([target, source[:, 3:4, :, :]], dim=1)  # target img with source age
+        mock_input3 = torch.cat([output, source[:, 3:4, :, :]], dim=1)  # prediction img with source age
+        mock_input4 = torch.cat([target, source[:, 3:4, :, :]], dim=1)  # target img with target age
+        fake_pred, real_pred = self.discriminator_model(fake_input), self.discriminator_model(real_input)
+        mock_pred1, mock_pred2, mock_pred3, mock_pred4 = (self.discriminator_model(mock_input1),
+                                                          self.discriminator_model(mock_input2),
+                                                          self.discriminator_model(mock_input3),
+                                                          self.discriminator_model(mock_input4))
+        discriminator_loss = (self.fake_weight * self.criterion_adversarial(fake_pred, torch.zeros_like(fake_pred)) +
+                              self.real_weight * self.criterion_adversarial(real_pred, torch.ones_like(real_pred)) +
+                              self.mock_weight * self.criterion_adversarial(mock_pred1, torch.zeros_like(mock_pred1)) +
+                              self.mock_weight * self.criterion_adversarial(mock_pred2, torch.zeros_like(mock_pred2)) +
+                              self.mock_weight * self.criterion_adversarial(mock_pred3, torch.zeros_like(mock_pred3)) +
+                              self.mock_weight * self.criterion_adversarial(mock_pred4, torch.zeros_like(mock_pred4))
+                              )
+        return discriminator_loss

model/models.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import torch
+import torch.nn as nn
+import antialiased_cnns
+class DownLayer(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownLayer, self).__init__()
+        self.layer = nn.Sequential(
+            nn.MaxPool2d(kernel_size=2, stride=1),
+            antialiased_cnns.BlurPool(in_channels, stride=2),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.layer(x)
+class UpLayer(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(UpLayer, self).__init__()
+        # Conv transpose upsampling
+        self.blur_upsample = nn.Sequential(
+            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2, padding=0),
+            antialiased_cnns.BlurPool(out_channels, stride=1)
+        )
+        self.layer = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.LeakyReLU(inplace=True)
+        )
+    def forward(self, x, skip):
+        x = self.blur_upsample(x)
+        x = torch.cat([x, skip], dim=1)  # Concatenate with skip connection
+        return self.layer(x)
+class UNet(nn.Module):
+    def __init__(self):
+        super(UNet, self).__init__()
+        self.init_conv = nn.Sequential(
+            nn.Conv2d(5, 64, kernel_size=3, padding=1),  # output: 512 x 512 x 64
+            nn.LeakyReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),  # output: 512 x 512 x 64
+            nn.LeakyReLU(inplace=True)
+        )
+        self.down1 = DownLayer(64, 128)  # output: 256 x 256 x 128
+        self.down2 = DownLayer(128, 256)  # output: 128 x 128 x 256
+        self.down3 = DownLayer(256, 512)  # output: 64 x 64 x 512
+        self.down4 = DownLayer(512, 1024)  # output: 32 x 32 x 1024
+        self.up1 = UpLayer(1024, 512)  # output: 64 x 64 x 512
+        self.up2 = UpLayer(512, 256)  # output: 128 x 128 x 256
+        self.up3 = UpLayer(256, 128)  # output: 256 x 256 x 128
+        self.up4 = UpLayer(128, 64)  # output: 512 x 512 x 64
+        self.final_conv = nn.Conv2d(64, 3, kernel_size=1)  # output: 512 x 512 x 3
+    def forward(self, x):
+        x0 = self.init_conv(x)
+        x1 = self.down1(x0)
+        x2 = self.down2(x1)
+        x3 = self.down3(x2)
+        x4 = self.down4(x3)
+        x = self.up1(x4, x3)
+        x = self.up2(x, x2)
+        x = self.up3(x, x1)
+        x = self.up4(x, x0)
+        x = self.final_conv(x)
+        return x
+class PatchGANDiscriminator(nn.Module):
+    def __init__(self, input_channels=3):
+        super(PatchGANDiscriminator, self).__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(input_channels, 64, kernel_size=4, stride=2, padding=1),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Conv2d(256, 1, kernel_size=4, stride=1, padding=1)
+            # Output layer with 1 channel for binary classification
+        )
+    def forward(self, x):
+        return self.model(x)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+torch
+torchvision
+antialiased_cnns
+lpips
+ffmpy
+av
+gradio
+cmake
+face_recognition
+dlib
+numpy
+Pillow

scripts/__pycache__/test_functions.cpython-310.pyc ADDED Viewed

Binary file (6.11 kB). View file

scripts/app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+import torch
+from model.models import UNet
+from scripts.test_functions import process_image, process_video
+window_size = 512
+stride = 256
+steps = 18
+frame_count = 0
+def get_model():
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    unet_model = UNet().to(device)
+    unet_model.load_state_dict(torch.load("model/best_unet_model.pth", map_location=device))
+    unet_model.eval()
+    return unet_model
+unet_model = get_model()
+def block_img(image, source_age, target_age):
+    from PIL import Image as PILImage
+    import numpy as np
+    if isinstance(image, str):
+        image = PILImage.open(image).convert('RGB')
+    elif isinstance(image, np.ndarray) and image.dtype == object:
+        image = image.astype(np.uint8)
+    return process_image(unet_model, image, video=False, source_age=source_age,
+                         target_age=target_age, window_size=window_size, stride=stride)
+def block_img_vid(image, source_age):
+    from PIL import Image as PILImage
+    import numpy as np
+    if isinstance(image, str):
+        image = PILImage.open(image).convert('RGB')
+    elif isinstance(image, np.ndarray) and image.dtype == object:
+        image = image.astype(np.uint8)
+    return process_image(unet_model, image, video=True, source_age=source_age,
+                         target_age=0, window_size=window_size, stride=stride, steps=steps)
+def block_vid(video_path, source_age, target_age):
+    return process_video(unet_model, video_path, source_age, target_age,
+                         window_size=window_size, stride=stride, frame_count=frame_count)
+demo_img = gr.Interface(
+    fn=block_img,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+        gr.Slider(10, 90, value=80, step=1, label="Target age", info="Choose the age you want to become")
+    ],
+    outputs="image",
+    examples=[
+        ['assets/gradio_example_images/1.png', 20, 80],
+        ['assets/gradio_example_images/2.png', 75, 40],
+        ['assets/gradio_example_images/3.png', 30, 70],
+        ['assets/gradio_example_images/4.png', 22, 60],
+        ['assets/gradio_example_images/5.png', 28, 75],
+        ['assets/gradio_example_images/6.png', 35, 15]
+    ],
+    description="Input an image of a person and age them from the source age to the target age."
+)
+demo_img_vid = gr.Interface(
+    fn=block_img_vid,
+    inputs=[
+        gr.Image(type="pil"),
+        gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+    ],
+    outputs=gr.Video(),
+    examples=[
+        ['assets/gradio_example_images/1.png', 20],
+        ['assets/gradio_example_images/2.png', 75],
+        ['assets/gradio_example_images/3.png', 30],
+        ['assets/gradio_example_images/4.png', 22],
+        ['assets/gradio_example_images/5.png', 28],
+        ['assets/gradio_example_images/6.png', 35]
+    ],
+    description="Input an image of a person and a video will be returned of the person at different ages."
+)
+demo_vid = gr.Interface(
+    fn=block_vid,
+    inputs=[
+        gr.Video(),
+        gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+        gr.Slider(10, 90, value=80, step=1, label="Target age", info="Choose the age you want to become")
+    ],
+    outputs=gr.Video(),
+    examples=[
+        ['assets/gradio_example_images/orig.mp4', 35, 60],
+    ],
+    description="Input a video of a person, and it will be aged frame-by-frame."
+)
+demo = gr.TabbedInterface([demo_img, demo_img_vid, demo_vid],
+                          tab_names=['Image inference demo', 'Image animation demo', 'Video inference demo'],
+                          title="Face Re-Aging Demo",
+                          )
+if __name__ == "__main__":
+    demo.launch()

scripts/gradio_demo.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import gradio as gr
+import torch
+import argparse
+import sys
+sys.path.append(".")
+from model.models import UNet
+from scripts.test_functions import process_image, process_video
+# default settings
+window_size = 512
+stride = 256
+steps = 18
+frame_count = 0
+def run(model_path):
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    unet_model = UNet().to(device)
+    unet_model.load_state_dict(torch.load(model_path, map_location=device))
+    unet_model.eval()
+    def block_img(image, source_age, target_age):
+        from PIL import Image as PILImage
+        import numpy as np
+        # If image is a file path (from examples), load it
+        if isinstance(image, str):
+            image = PILImage.open(image).convert('RGB')
+        # If image is a numpy array with dtype object (sometimes from Gradio), convert to uint8
+        elif isinstance(image, np.ndarray) and image.dtype == object:
+            image = image.astype(np.uint8)
+        return process_image(unet_model, image, video=False, source_age=source_age,
+                             target_age=target_age, window_size=window_size, stride=stride)
+    def block_img_vid(image, source_age):
+        from PIL import Image as PILImage
+        import numpy as np
+        if isinstance(image, str):
+            image = PILImage.open(image).convert('RGB')
+        elif isinstance(image, np.ndarray) and image.dtype == object:
+            image = image.astype(np.uint8)
+        return process_image(unet_model, image, video=True, source_age=source_age,
+                             target_age=0, window_size=window_size, stride=stride, steps=steps)
+    def block_vid(video_path, source_age, target_age):
+        return process_video(unet_model, video_path, source_age, target_age,
+                             window_size=window_size, stride=stride, frame_count=frame_count)
+    demo_img = gr.Interface(
+        fn=block_img,
+        inputs=[
+            gr.Image(type="pil"),
+            gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+            gr.Slider(10, 90, value=80, step=1, label="Target age", info="Choose the age you want to become")
+        ],
+        outputs="image",
+        examples=[
+            ['assets/gradio_example_images/1.png', 20, 80],
+            ['assets/gradio_example_images/2.png', 75, 40],
+            ['assets/gradio_example_images/3.png', 30, 70],
+            ['assets/gradio_example_images/4.png', 22, 60],
+            ['assets/gradio_example_images/5.png', 28, 75],
+            ['assets/gradio_example_images/6.png', 35, 15]
+        ],
+        description="Input an image of a person and age them from the source age to the target age."
+    )
+    demo_img_vid = gr.Interface(
+        fn=block_img_vid,
+        inputs=[
+            gr.Image(type="pil"),
+            gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+        ],
+        outputs=gr.Video(),
+        examples=[
+            ['assets/gradio_example_images/1.png', 20],
+            ['assets/gradio_example_images/2.png', 75],
+            ['assets/gradio_example_images/3.png', 30],
+            ['assets/gradio_example_images/4.png', 22],
+            ['assets/gradio_example_images/5.png', 28],
+            ['assets/gradio_example_images/6.png', 35]
+        ],
+        description="Input an image of a person and a video will be returned of the person at different ages."
+    )
+    demo_vid = gr.Interface(
+        fn=block_vid,
+        inputs=[
+            gr.Video(),
+            gr.Slider(10, 90, value=20, step=1, label="Current age", info="Choose your current age"),
+            gr.Slider(10, 90, value=80, step=1, label="Target age", info="Choose the age you want to become")
+        ],
+        outputs=gr.Video(),
+        examples=[
+            ['assets/gradio_example_images/orig.mp4', 35, 60],
+        ],
+        description="Input a video of a person, and it will be aged frame-by-frame."
+    )
+    demo = gr.TabbedInterface([demo_img, demo_img_vid, demo_vid],
+                              tab_names=['Image inference demo', 'Image animation demo', 'Video inference demo'],
+                              title="Face Re-Aging Demo",
+                              )
+    demo.launch()
+if __name__ == "__main__":
+    # Define command-line arguments
+    parser = argparse.ArgumentParser(description="Testing script - Image demo")
+    parser.add_argument("--model_path", type=str, default="model/best_unet_model.pth", help="Path to the model")
+    # Parse command-line arguments
+    args = parser.parse_args()
+    run(args.model_path)

scripts/test_functions.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import face_recognition
+import numpy as np
+import os
+import torch
+from torch.autograd import Variable
+from torchvision import transforms
+from torchvision.io import write_video
+import tempfile
+import subprocess
+import json
+from ffmpy import FFmpeg, FFprobe
+from PIL import Image
+mask_file = torch.from_numpy(np.array(Image.open('assets/mask1024.jpg').convert('L'))) / 255
+small_mask_file = torch.from_numpy(np.array(Image.open('assets/mask512.jpg').convert('L'))) / 255
+def sliding_window_tensor(input_tensor, window_size, stride, your_model, mask=mask_file, small_mask=small_mask_file):
+    """
+    Apply aging operation on input tensor using a sliding-window method. This operation is done on the GPU, if available.
+    """
+    input_tensor = input_tensor.to(next(your_model.parameters()).device)
+    mask = mask.to(next(your_model.parameters()).device)
+    small_mask = small_mask.to(next(your_model.parameters()).device)
+    n, c, h, w = input_tensor.size()
+    output_tensor = torch.zeros((n, 3, h, w), dtype=input_tensor.dtype, device=input_tensor.device)
+    count_tensor = torch.zeros((n, 3, h, w), dtype=torch.float32, device=input_tensor.device)
+    add = 2 if window_size % stride != 0 else 1
+    for y in range(0, h - window_size + add, stride):
+        for x in range(0, w - window_size + add, stride):
+            window = input_tensor[:, :, y:y + window_size, x:x + window_size]
+            # Apply the same preprocessing as during training
+            input_variable = Variable(window, requires_grad=False)  # Assuming GPU is available
+            # Forward pass
+            with torch.no_grad():
+                output = your_model(input_variable)
+            output_tensor[:, :, y:y + window_size, x:x + window_size] += output * small_mask
+            count_tensor[:, :, y:y + window_size, x:x + window_size] += small_mask
+    count_tensor = torch.clamp(count_tensor, min=1.0)
+    # Average the overlapping regions
+    output_tensor /= count_tensor
+    # Apply mask
+    output_tensor *= mask
+    return output_tensor.cpu()
+def process_image(your_model, image, video, source_age, target_age=0,
+                  window_size=512, stride=256, steps=18):
+    input_size = (1024, 1024)
+    # Robustly handle image input for face_recognition
+    from PIL import Image as PILImage
+    import numpy as np
+    if isinstance(image, PILImage.Image):
+        image = image.convert('RGB')
+        image = np.array(image)
+    elif isinstance(image, np.ndarray):
+        if image.ndim == 2:  # grayscale
+            image = np.stack([image]*3, axis=-1)
+        elif image.shape[2] == 4:  # RGBA
+            image = image[..., :3]
+        if image.dtype == np.float32 or image.dtype == np.float64:
+            if image.max() <= 1.0:
+                image = (image * 255).astype(np.uint8)
+            else:
+                image = image.astype(np.uint8)
+        elif image.dtype != np.uint8:
+            image = image.astype(np.uint8)
+    else:
+        image = np.array(PILImage.fromarray(image).convert('RGB'))
+    # Ensure shape is (H, W, 3) and contiguous
+    if image.ndim != 3 or image.shape[2] != 3:
+        raise ValueError(f"Image must have shape (H, W, 3), got {image.shape}")
+    image = np.ascontiguousarray(image, dtype=np.uint8)
+    print(f"[DEBUG] image type: {type(image)}, shape: {image.shape}, dtype: {image.dtype}, contiguous: {image.flags['C_CONTIGUOUS']}")
+    if video:  # h264 codec requires frame size to be divisible by 2.
+        width, height, depth = image.shape
+        new_width = width if width % 2 == 0 else width - 1
+        new_height = height if height % 2 == 0 else height - 1
+        image.resize((new_width, new_height, depth))
+    # Diagnostic: try face_recognition on this image, and if it fails, save and reload
+    try:
+        fl = face_recognition.face_locations(image)[0]
+    except Exception as e:
+        print(f"[DEBUG] face_locations failed: {e}. Saving image for test...")
+        import tempfile
+        from PIL import Image as PILImage
+        temp_path = tempfile.mktemp(suffix='.png')
+        PILImage.fromarray(image).save(temp_path)
+        print(f"[DEBUG] Saved image to {temp_path}. Trying face_recognition.load_image_file...")
+        loaded_img = face_recognition.load_image_file(temp_path)
+        print(f"[DEBUG] loaded_img type: {type(loaded_img)}, shape: {loaded_img.shape}, dtype: {loaded_img.dtype}")
+        fl = face_recognition.face_locations(loaded_img)[0]
+    # calculate margins
+    margin_y_t = int((fl[2] - fl[0]) * .63 * .85)  # larger as the forehead is often cut off
+    margin_y_b = int((fl[2] - fl[0]) * .37 * .85)
+    margin_x = int((fl[1] - fl[3]) // (2 / .85))
+    margin_y_t += 2 * margin_x - margin_y_t - margin_y_b  # make sure square is preserved
+    l_y = max([fl[0] - margin_y_t, 0])
+    r_y = min([fl[2] + margin_y_b, image.shape[0]])
+    l_x = max([fl[3] - margin_x, 0])
+    r_x = min([fl[1] + margin_x, image.shape[1]])
+    # crop image
+    cropped_image = image[l_y:r_y, l_x:r_x, :]
+    # Resizing
+    orig_size = cropped_image.shape[:2]
+    cropped_image = transforms.ToTensor()(cropped_image)
+    cropped_image_resized = transforms.Resize(input_size, interpolation=Image.BILINEAR, antialias=True)(cropped_image)
+    source_age_channel = torch.full_like(cropped_image_resized[:1, :, :], source_age / 100)
+    target_age_channel = torch.full_like(cropped_image_resized[:1, :, :], target_age / 100)
+    input_tensor = torch.cat([cropped_image_resized, source_age_channel, target_age_channel], dim=0).unsqueeze(0)
+    image = transforms.ToTensor()(image)
+    if video:
+        # aging in steps
+        interval = .8 / steps
+        aged_cropped_images = torch.zeros((steps, 3, input_size[1], input_size[0]))
+        for i in range(0, steps):
+            input_tensor[:, -1, :, :] += interval
+            # performing actions on image
+            aged_cropped_images[i, ...] = sliding_window_tensor(input_tensor, window_size, stride, your_model)
+        # resize back to original size
+        aged_cropped_images_resized = transforms.Resize(orig_size, interpolation=Image.BILINEAR, antialias=True)(
+            aged_cropped_images)
+        # re-apply
+        image = image.repeat(steps, 1, 1, 1)
+        image[:, :, l_y:r_y, l_x:r_x] += aged_cropped_images_resized
+        image = torch.clamp(image, 0, 1)
+        image = (image * 255).to(torch.uint8)
+        output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+        write_video(output_file.name, image.permute(0, 2, 3, 1), 2)
+        return output_file.name
+    else:
+        # performing actions on image
+        aged_cropped_image = sliding_window_tensor(input_tensor, window_size, stride, your_model)
+        # resize back to original size
+        aged_cropped_image_resized = transforms.Resize(orig_size, interpolation=Image.BILINEAR, antialias=True)(
+            aged_cropped_image)
+        # re-apply
+        image[:, l_y:r_y, l_x:r_x] += aged_cropped_image_resized.squeeze(0)
+        image = torch.clamp(image, 0, 1)
+        return transforms.functional.to_pil_image(image)
+def process_video(your_model, video_path, source_age, target_age, window_size=512, stride=256, frame_count=0):
+    """
+    Applying the aging to a video.
+    We age as from source_age to target_age, and return an image.
+    To limit the number of frames in a video, we can set frame_count.
+    """
+    # Extracting frames and placing them in a temporary directory
+    frames_dir = tempfile.TemporaryDirectory()
+    output_template = os.path.join(frames_dir.name, '%04d.jpg')
+    if frame_count:
+        ff = FFmpeg(
+            inputs={video_path: None},
+            outputs={output_template: ['-vf', f'select=lt(n\,{frame_count})', '-q:v', '1']}
+        )
+    else:
+        ff = FFmpeg(
+            inputs={video_path: None},
+            outputs={output_template: ['-q:v', '1']}
+        )
+    ff.run()
+    # Getting framerate (for reconstruction later)
+    ff = FFprobe(inputs={video_path: None},
+                 global_options=['-v', 'error', '-select_streams', 'v', '-show_entries', 'stream=r_frame_rate', '-of',
+                                 'default=noprint_wrappers=1:nokey=1'])
+    stdout, _ = ff.run(stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    frame_rate = eval(stdout.decode('utf-8').strip())
+    # Applying process_image to frames
+    processed_dir = tempfile.TemporaryDirectory()
+    for name in os.listdir(frames_dir.name):
+        image_path = os.path.join(frames_dir.name, name)
+        image = Image.open(image_path).convert('RGB')
+        image_aged = process_image(your_model, image, False, source_age, target_age, window_size, stride)
+        image_aged.save(os.path.join(processed_dir.name, name))
+    # Generating a new video
+    input_template = os.path.join(processed_dir.name, '%04d.jpg')
+    output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    ff = FFmpeg(
+        inputs={input_template: f'-framerate {frame_rate}'}, global_options=['-y'],
+        outputs={output_file.name: ['-c:v', 'libx264', '-pix_fmt', 'yuv420p']}
+    )
+    ff.run()
+    frames_dir.cleanup()
+    processed_dir.cleanup()
+    return output_file.name

scripts/train.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import torch
+import torch.optim as optim
+from torch.utils.data import DataLoader, random_split
+import argparse
+import sys
+sys.path.append(".")
+from model.models import UNet, PatchGANDiscriminator
+from model.losses import GeneratorLoss, DiscriminatorLoss
+from utils.dataloader import CustomDataset, transform
+def train_model(root_dir, start_epoch, num_epochs, load_model_g, load_model_d, num_workers,
+                val_freq, batch_size, accum_iter, lr, lr_d, wandb_tracking, desc):
+    if wandb_tracking:
+        import wandb
+        wandb.init(project="FRAN",
+                   # track hyperparameters and run metadata
+                   config={
+                       "lr": lr,
+                       "lr_d": lr_d,
+                       "dataset": root_dir,
+                       "epochs": num_epochs,
+                       "batch_size": batch_size,
+                       "description": desc
+                   }
+                   )
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    print(f"device: {device}")
+    if torch.cuda.device_count() > 0:
+        print(f"{torch.cuda.device_count()} GPU(s)")
+        if torch.cuda.device_count() > 1:
+            print("multi-GPU training is currently not supported.")
+    # Create instances of the dataset and split into scripts and validation sets
+    dataset = CustomDataset(root_dir=root_dir, transform=transform)
+    # Assuming you want to use 80% of the data for scripts and 20% for validation
+    train_size = int(0.8 * len(dataset))
+    val_size = len(dataset) - train_size
+    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
+    # Create data loaders for scripts and validation
+    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+    # Create instances of the U-Net, discriminator, and loss models
+    unet_model = UNet()
+    discriminator_model = PatchGANDiscriminator(input_channels=4)
+    if load_model_g:
+        unet_model.load_state_dict(torch.load(load_model_g, map_location=device))
+        print(f'loaded {load_model_g} for unet_model')
+    if load_model_d:
+        discriminator_model.load_state_dict(torch.load(load_model_d, map_location=device))
+        print(f'loaded {load_model_d} for discriminator_model')
+    unet_model = unet_model.to(device)
+    discriminator_model = discriminator_model.to(device)
+    # if multiGPU:
+    #    unet_model = nn.DataParallel(unet_model)
+    #    discriminator_model = nn.DataParallel(discriminator_model)
+    # Create loss instances
+    generator_loss_func = GeneratorLoss(discriminator_model, l1_weight=1.0, perceptual_weight=1.0,
+                                        adversarial_weight=0.05, device=device)
+    discriminator_loss_func = DiscriminatorLoss(discriminator_model)
+    # Create instances of the Adam optimizer
+    optimizer_g = optim.Adam(unet_model.parameters(), lr=lr)
+    optimizer_d = optim.Adam(discriminator_model.parameters(), lr=lr_d)
+    # Training and validation loop
+    best_val_loss = float('inf')
+    for epoch in range(start_epoch - 1, num_epochs):
+        # Training
+        unet_model.train()
+        discriminator_model.train()
+        batch_idx = 0
+        for batch in train_dataloader:
+            batch_idx += 1
+            source_images, target_images = batch
+            # if not multiGPU:
+            # if multi GPU, nn.DataParallel will already put the batches on the right devices.
+            # Otherwise, we do it manually
+            source_images = source_images.to(device)
+            target_images = target_images.to(device)
+            # Zero gradients
+            # optimizer_g.zero_grad()
+            # optimizer_d.zero_grad()
+            # Forward pass
+            output_images = unet_model(source_images)
+            # if multiGPU:
+            #     output_device = output_images.get_device()
+            #     source_images, target_images = source_images.to(output_device), target_images.to(output_device)
+            output_images += source_images[:, :3, :, :]
+            # Discriminator pass
+            discriminator_loss = discriminator_loss_func(output_images.detach(), target_images, source_images)
+            # discriminator_loss /= accum_iter
+            discriminator_loss.backward()
+            if (batch_idx % accum_iter == 0) or (batch_idx == len(train_dataloader)):
+                optimizer_d.step()
+                optimizer_d.zero_grad()
+            # Generator pass
+            # Calculate the loss
+            generator_loss, l1_loss, per_loss, adv_loss = generator_loss_func(output_images, target_images,
+                                                                              source_images)
+            generator_loss, l1_loss, per_loss, adv_loss = [i / accum_iter for i in
+                                                           [generator_loss, l1_loss, per_loss, adv_loss]]
+            generator_loss.backward()
+            if (batch_idx % accum_iter == 0) or (batch_idx == len(train_dataloader)):
+                optimizer_g.step()
+                optimizer_g.zero_grad()
+            # Print scripts information (if needed)
+            print(
+                f'Training Epoch [{epoch + 1}/{num_epochs}], Gen Loss: {generator_loss.item()}, L1: {l1_loss.item()}, P: {per_loss.item()}, A: {adv_loss.item()}, Dis Loss: {discriminator_loss.item()}')
+            if wandb_tracking:
+                wandb.log({
+                    'Training Epoch': epoch + 1,
+                    'Gen Loss': generator_loss.item(),
+                    'L1': l1_loss.item(),
+                    'P': per_loss.item(),
+                    'A': adv_loss.item(),
+                    'Dis Loss': discriminator_loss.item()
+                })
+        torch.save(unet_model.state_dict(), 'recent_unet_model.pth')
+        torch.save(discriminator_model.state_dict(), 'recent_discriminator_model.pth')
+        # Validation
+        if epoch % val_freq == 0:
+            unet_model.eval()
+            total_val_loss = 0.0
+            with torch.no_grad():
+                for val_batch in val_dataloader:
+                    val_source_images, val_target_images = val_batch
+                    # if not multiGPU:
+                    # if multi GPU, nn.DataParallel will already put the batches on the right devices.
+                    # Otherwise, we do it manually
+                    val_source_images = val_source_images.to(device)
+                    val_target_images = val_target_images.to(device)
+                    # Forward pass
+                    val_output_images = unet_model(val_source_images)
+                    # if multiGPU:
+                    #     output_device = val_output_images.get_device()
+                    #     val_source_images, val_target_images = val_source_images.to(output_device), \
+                    #         val_target_images.to(output_device)
+                    # Calculate the loss
+                    generator_loss, _, _, _ = generator_loss_func(val_output_images, val_target_images,
+                                                                  val_source_images)
+                    total_val_loss += generator_loss.item()
+            average_val_loss = total_val_loss / len(val_dataloader)
+            # Print validation information
+            print(f'Validation Epoch [{epoch + 1}/{num_epochs}], Average Loss: {average_val_loss}')
+            if wandb_tracking:
+                wandb.log({
+                    'Training Epoch': epoch + 1,
+                    'Val Loss': average_val_loss,
+                })
+            # Save the model with the best validation loss
+            if average_val_loss < best_val_loss:
+                best_val_loss = average_val_loss
+                torch.save(unet_model.state_dict(), 'best_unet_model.pth')
+                torch.save(discriminator_model.state_dict(), 'best_discriminator_model.pth')
+    if wandb_tracking:
+        wandb.finish()
+if __name__ == "__main__":
+    # Define command-line arguments
+    parser = argparse.ArgumentParser(description="Training Script")
+    parser.add_argument("--root_dir", type=str, default='data/processed/train',
+                        help="Path to the training data. Note the format: To use the dataloader, the directory should be filled with folders containing image files of various ages, where the file name is the age.")
+    parser.add_argument("--start_epoch", type=int, default=0, help="Start epoch, if scripts is resumed")
+    parser.add_argument("--num_epochs", type=int, default=2000, help="End epoch")
+    parser.add_argument("--load_model_g", type=str, default='',
+                        help="Path to pretrained generator model. Leave blank to train from scratch")
+    parser.add_argument("--load_model_d", type=str, default='',
+                        help="Path to pretrained discriminator model. Leave blank to train from scratch")
+    parser.add_argument("--num_workers", type=int, default=4, help="Number of workers")
+    parser.add_argument("--batch_size", type=int, default=3, help="Batch size")
+    parser.add_argument("--accum_iter", type=int, default=3, help="Number of batches after which weights are updated")
+    parser.add_argument("--val_freq", type=int, default=1, help="Validation frequency (epochs)")
+    parser.add_argument("--lr", type=float, default=0.00001, help="Learning rate for generator")
+    parser.add_argument("--lr_d", type=float, default=0.00001, help="Learning rate for discriminator")
+    parser.add_argument("--wandb_tracking", help="A binary (True/False) argument for using WandB tracking or not")
+    parser.add_argument("--desc", type=str, default='', help="Description for WandB")
+    # Parse command-line arguments
+    args = parser.parse_args()
+    # Call the scripts function with parsed arguments
+    train_model(args.root_dir, args.start_epoch, args.num_epochs, args.load_model_g, args.load_model_d,
+                args.num_workers, args.val_freq, args.batch_size, args.accum_iter, args.lr, args.lr_d,
+                args.wandb_tracking, args.desc)