Spaces:

gaur3009
/

Design_warper

Sleeping

App Files Files Community

gaur3009 commited on Jun 29

Commit

e6d6add

verified ·

1 Parent(s): 1964060

Update networks.py

Browse files

Files changed (1) hide show

networks.py +85 -75

networks.py CHANGED Viewed

@@ -120,6 +120,7 @@ class FeatureRegression(nn.Module):
         x = self.linear(x)
         return self.tanh(x)
 class TpsGridGen(nn.Module):
     def __init__(self, out_h=256, out_w=192, grid_size=5):
         super(TpsGridGen, self).__init__()
@@ -128,106 +129,115 @@ class TpsGridGen(nn.Module):
         self.grid_size = grid_size
         self.N = grid_size * grid_size
-        # Create regular grid of control points
         axis_coords = np.linspace(-1, 1, grid_size)
         P_Y, P_X = np.meshgrid(axis_coords, axis_coords)
-        P_X = torch.FloatTensor(P_X.reshape(-1, 1))  # (N,1)
-        P_Y = torch.FloatTensor(P_Y.reshape(-1, 1))  # (N,1)
-        # Register buffers to persist through saving/loading
-        self.register_buffer('P_X_base', P_X)
-        self.register_buffer('P_Y_base', P_Y)
         # Compute inverse matrix L^-1
         Li = self.compute_L_inverse(P_X, P_Y)
-        self.register_buffer('Li', Li)
-        # Create sampling grid
-        grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, out_w), np.linspace(-1, 1, out_h))
-        self.register_buffer('grid_X', torch.FloatTensor(grid_X).unsqueeze(0).unsqueeze(3))  # (1,H,W,1)
-        self.register_buffer('grid_Y', torch.FloatTensor(grid_Y).unsqueeze(0).unsqueeze(3))  # (1,H,W,1)
     def compute_L_inverse(self, X, Y):
-        N = X.size(0)
-        device = X.device
-        # Construct distance matrix
-        Xmat = X.expand(N, N)
-        Ymat = Y.expand(N, N)
-        P_dist_squared = torch.pow(Xmat - Xmat.t(), 2) + torch.pow(Ymat - Ymat.t(), 2)
-        P_dist_squared[P_dist_squared == 0] = 1  # Avoid log(0)
-        K = torch.mul(P_dist_squared, torch.log(P_dist_squared))
-        # Construct L matrix
-        O = torch.ones(N, 1, device=device)
-        Z = torch.zeros(3, 3, device=device)
-        P = torch.cat((O, X, Y), 1)
-        L_top = torch.cat((K, P), 1)
-        L_bottom = torch.cat((P.t(), Z), 1)
-        L = torch.cat((L_top, L_bottom), 0)
-        return torch.inverse(L)
     def forward(self, theta):
         batch_size = theta.size(0)
         device = theta.device
-        # Split theta into x and y components
-        Q_X = theta[:, :self.N].view(batch_size, self.N, 1)
-        Q_Y = theta[:, self.N:].view(batch_size, self.N, 1)
         Q_X = Q_X + self.P_X_base.expand_as(Q_X)
         Q_Y = Q_Y + self.P_Y_base.expand_as(Q_Y)
-        # Extract top-left NxN block of Li matrix
-        Li_block = self.Li[:self.N, :self.N]
-        # Compute weights
-        W_X = torch.bmm(Li_block.expand(batch_size, -1, -1), Q_X)
-        W_Y = torch.bmm(Li_block.expand(batch_size, -1, -1), Q_Y)
-        # Prepare grid tensors
-        grid_X = self.grid_X.expand(batch_size, -1, -1, -1)
-        grid_Y = self.grid_Y.expand(batch_size, -1, -1, -1)
-        # Compute transformed coordinates
-        points_X = self.transform_points(grid_X, W_X, Q_X)
-        points_Y = self.transform_points(grid_Y, W_Y, Q_Y)
-        return torch.cat((points_X, points_Y), 3)
-    # In TpsGridGen class, replace transform_points method with this:
-    def transform_points(self, grid, W, Q):
-        batch_size, h, w, _ = grid.size()
-        n_points = h * w
-        # Control points P (N, 2)
-        P = torch.cat([self.P_X_base, self.P_Y_base], 1)
-        P = P.unsqueeze(0).expand(batch_size, -1, -1)  # (B, N, 2)
-        # Compute U = r^2 * log(r^2)
-        grid_flat = grid.view(batch_size, n_points, 2)  # (B, H*W, 2)
-        dist = grid_flat.unsqueeze(2) - P.unsqueeze(1)  # (B, H*W, N, 2)
-        dist_squared = torch.sum(dist**2, dim=3)  # (B, H*W, N)
-        dist_squared[dist_squared == 0] = 1  # Avoid log(0)
-        U = dist_squared * torch.log(dist_squared)
-        # Compute affine part [1, x, y]
-        ones = torch.ones(batch_size, n_points, 1, device=grid.device)
-        A = torch.cat([ones, grid_flat], dim=2)  # (B, H*W, 3)
-        # Warp coefficients
-        W = W.view(batch_size, self.N, 1)
-        Q = Q.view(batch_size, self.N, 1)
-        # Non-affine part
-        non_affine = torch.bmm(U, W)  # (B, H*W, 1)
-        # Affine part
-        affine = torch.bmm(A, Q)  # (B, H*W, 1)
-        # Combine components
-        points = affine + non_affine
-        return points.view(batch_size, h, w, 1)
 class GMM(nn.Module):
     def __init__(self, opt=None):

         x = self.linear(x)
         return self.tanh(x)
+# networks.py - TpsGridGen class replacement
 class TpsGridGen(nn.Module):
     def __init__(self, out_h=256, out_w=192, grid_size=5):
         super(TpsGridGen, self).__init__()
         self.grid_size = grid_size
         self.N = grid_size * grid_size
+        # Create grid in numpy
+        self.grid = np.zeros([self.out_h, self.out_w, 3], dtype=np.float32)
+        # Sampling grid with dim-0 (Y) and dim-1 (X) coords
+        grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, out_w), np.linspace(-1, 1, out_h))
+        self.grid_X = torch.FloatTensor(grid_X).unsqueeze(0).unsqueeze(3)  # [1, H, W, 1]
+        self.grid_Y = torch.FloatTensor(grid_Y).unsqueeze(0).unsqueeze(3)  # [1, H, W, 1]
+        # Register buffers
+        self.register_buffer('grid_X_base', self.grid_X)
+        self.register_buffer('grid_Y_base', self.grid_Y)
+        # Initialize regular grid for control points
         axis_coords = np.linspace(-1, 1, grid_size)
         P_Y, P_X = np.meshgrid(axis_coords, axis_coords)
+        P_X = np.reshape(P_X, (-1, 1))  # [N, 1]
+        P_Y = np.reshape(P_Y, (-1, 1))  # [N, 1]
+        self.P_X = torch.FloatTensor(P_X)
+        self.P_Y = torch.FloatTensor(P_Y)
+        self.register_buffer('P_X_base', self.P_X)
+        self.register_buffer('P_Y_base', self.P_Y)
         # Compute inverse matrix L^-1
         Li = self.compute_L_inverse(P_X, P_Y)
+        self.register_buffer('Li', torch.FloatTensor(Li))
     def compute_L_inverse(self, X, Y):
+        N = X.shape[0]  # num of points (along dim 0)
+        # Construct matrix K
+        Xmat = np.tile(X, (1, N))
+        Ymat = np.tile(Y, (1, N))
+        P_dist_squared = np.power(Xmat - Xmat.T, 2) + np.power(Ymat - Ymat.T, 2)
+        P_dist_squared[P_dist_squared == 0] = 1  # make diagonal 1 to avoid NaN in log computation
+        K = P_dist_squared * np.log(P_dist_squared)
+        # Construct matrix L
+        O = np.ones((N, 1))
+        Z = np.zeros((3, 3))
+        P = np.concatenate((O, X, Y), axis=1)
+        L = np.concatenate((np.concatenate((K, P), axis=1),
+                            np.concatenate((P.T, Z), axis=1)), axis=0)
+        Li = np.linalg.inv(L)
+        return Li
     def forward(self, theta):
         batch_size = theta.size(0)
         device = theta.device
+        # Split theta into point coordinates
+        Q_X = theta[:, :self.N].view(batch_size, self.N, 1, 1)
+        Q_Y = theta[:, self.N:].view(batch_size, self.N, 1, 1)
         Q_X = Q_X + self.P_X_base.expand_as(Q_X)
         Q_Y = Q_Y + self.P_Y_base.expand_as(Q_Y)
+        # Get spatial dimensions of points
+        points = torch.cat((self.grid_X_base.expand(batch_size, -1, -1, -1),
+                           self.grid_Y_base.expand(batch_size, -1, -1, -1)), 3)
+        # Repeat pre-defined control points along spatial dimensions of points to be transformed
+        P_X = self.P_X_base.expand(batch_size, 1, 1, self.N)
+        P_Y = self.P_Y_base.expand(batch_size, 1, 1, self.N)
+        # Compute weights for non-linear part
+        W_X = torch.bmm(self.Li[:self.N, :self.N].unsqueeze(0).expand(batch_size, -1, -1), Q_X.squeeze(-1))
+        W_Y = torch.bmm(self.Li[:self.N, :self.N].unsqueeze(0).expand(batch_size, -1, -1), Q_Y.squeeze(-1))
+        # Reshape to [B, H, W, N]
+        W_X = W_X.unsqueeze(3).unsqueeze(4).transpose(1, 4).repeat(1, self.out_h, self.out_w, 1, 1)
+        W_Y = W_Y.unsqueeze(3).unsqueeze(4).transpose(1, 4).repeat(1, self.out_h, self.out_w, 1, 1)
+        # Compute weights for affine part
+        A_X = torch.bmm(self.Li[self.N:, :self.N].unsqueeze(0).expand(batch_size, -1, -1), Q_X.squeeze(-1))
+        A_Y = torch.bmm(self.Li[self.N:, :self.N].unsqueeze(0).expand(batch_size, -1, -1), Q_Y.squeeze(-1))
+        # Reshape to [B, H, W, 1, 3]
+        A_X = A_X.unsqueeze(3).unsqueeze(4).transpose(1, 4).repeat(1, self.out_h, self.out_w, 1, 1)
+        A_Y = A_Y.unsqueeze(3).unsqueeze(4).transpose(1, 4).repeat(1, self.out_h, self.out_w, 1, 1)
+        # Compute distance P_i - (grid_X, grid_Y)
+        points_X = points[:, :, :, 0].unsqueeze(3)  # [B, H, W, 1]
+        points_Y = points[:, :, :, 1].unsqueeze(3)  # [B, H, W, 1]
+        delta_X = points_X - P_X
+        delta_Y = points_Y - P_Y
+        # Compute U (radial basis function)
+        dist_squared = torch.pow(delta_X, 2) + torch.pow(delta_Y, 2)
+        dist_squared[dist_squared == 0] = 1  # avoid NaN in log computation
+        U = dist_squared * torch.log(dist_squared)
+        # Compute non-affine part
+        points_X_prime = torch.sum(torch.mul(W_X, U), dim=4)
+        points_Y_prime = torch.sum(torch.mul(W_Y, U), dim=4)
+        # Compute affine part
+        A_X0 = A_X[:, :, :, :, 0]
+        A_X1 = A_X[:, :, :, :, 1]
+        A_X2 = A_X[:, :, :, :, 2]
+        A_Y0 = A_Y[:, :, :, :, 0]
+        A_Y1 = A_Y[:, :, :, :, 1]
+        A_Y2 = A_Y[:, :, :, :, 2]
+        points_X_prime += A_X0 + torch.mul(A_X1, points_X.squeeze(3)) + torch.mul(A_X2, points_Y.squeeze(3))
+        points_Y_prime += A_Y0 + torch.mul(A_Y1, points_X.squeeze(3)) + torch.mul(A_Y2, points_Y.squeeze(3))
+        return torch.cat((points_X_prime.unsqueeze(3), points_Y_prime.unsqueeze(3)), 3)
 class GMM(nn.Module):
     def __init__(self, opt=None):