File size: 2,252 Bytes
daa6779
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""
-----------------------------------------------------------------------------
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

NVIDIA CORPORATION and its licensors retain all intellectual property
and proprietary rights in and to this software, related documentation
and any modifications thereto. Any use, reproduction, disclosure or
distribution of this software and related documentation without an express
license agreement from NVIDIA CORPORATION is strictly prohibited.
-----------------------------------------------------------------------------
"""

import numpy as np
import torch


class FlowMatchingScheduler:
    def __init__(self, num_train_timesteps: int = 1000, shift: float = 1):
        # set timesteps
        self.num_train_timesteps = num_train_timesteps
        self.shift = shift

        timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy()
        timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)

        sigmas = timesteps / num_train_timesteps
        sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)

        self.sigmas = sigmas  # 1 --> 0
        self.timesteps = sigmas * num_train_timesteps  # num_train_timesteps --> 1

    # set device
    def to(self, device):
        self.sigmas = self.sigmas.to(device=device)
        self.timesteps = self.timesteps.to(device=device)

    # add random noise to latent during training
    def add_noise(self, latent: torch.Tensor, logit_mean: float = 1.0, logit_std: float = 1.0):
        # latent: [B, ...]
        # timesteps: [B]
        # return: [B, ...] noisy_latent, [B, ...] noise, [B] timesteps

        # logit-normal sampling
        u = torch.normal(mean=logit_mean, std=logit_std, size=(latent.shape[0],), device=self.sigmas.device)
        u = torch.nn.functional.sigmoid(u)

        step_indices = (u * self.num_train_timesteps).long()
        timesteps = self.timesteps[step_indices]

        sigmas = self.sigmas[step_indices].flatten()

        while len(sigmas.shape) < latent.ndim:
            sigmas = sigmas.unsqueeze(-1)

        noise = torch.randn_like(latent)
        noisy_latent = (1.0 - sigmas) * latent + sigmas * noise

        return noisy_latent, noise, timesteps