Spaces:

AashishNKumar
/

video-quality-enhancement

Running

App Files Files Community

headscratchertm commited on Dec 3, 2024

Commit

f90ddf2

1 Parent(s): 7917eea

image is getting generated but it's very ass

Browse files

Files changed (3) hide show

__pycache__/model.cpython-310.pyc +0 -0
main.py +58 -46
model.py +11 -25

__pycache__/model.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/model.cpython-310.pyc and b/__pycache__/model.cpython-310.pyc differ

main.py CHANGED Viewed

@@ -1,68 +1,80 @@
 import torch
 from model import UNet
-from frames import load_frames
 from PIL import Image
-from torchvision.transforms import transforms,ToTensor
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def save_frames(tensor,out_path)->None:
-    image=normalize_frames(tensor)
-    image=Image.fromarray(image)
     image.save(out_path)
 def normalize_frames(tensor):
-    tensor=tensor.squeeze(0).detach().cpu()
-    min_val=tensor.min()
-    max_val=tensor.max()
-    tensor=(tensor-min_val)/(max_val-min_val)
-    tensor=(tensor*255).byte()
-    tensor=tensor.permute(1,2,0).numpy()
     return tensor
-def time_steps(input_fps,output_fps)->list[float]:
-    if output_fps<=input_fps:
         return []
-    k=output_fps//input_fps
-    n=k-1
-    return [i/n+1 for i in range(1,n+1)]
-def expand_channels(tensor,target): # adding filler channels
-    batch_size,current_channels,height,width=tensor.shape
-    if current_channels>=target:
         return tensor
-    required=target-current_channels
-    extra=torch.zeros(batch_size,required,height,width,device=tensor.device,dtype=tensor.dtype)
-    return torch.cat((tensor,extra),dim=1)
-def interpolate(model_FC,model_AT,A,B,input_fps,output_fps)-> list[float]:
-    interval=time_steps(input_fps,output_fps)
-    input_tensor=torch.cat((A,B),dim=1)
-    print(interval)
     with torch.no_grad():
-        flow_output=model_FC(input_tensor)
-        flow_output=expand_channels(flow_output,20)
-    generated_frames=[]
     with torch.no_grad():
         for i in interval:
-            inter_tensor=torch.tensor([i],dtype=torch.float32).unsqueeze(0).to(device)
-            interpolated_frame=model_AT(flow_output,inter_tensor)
             generated_frames.append(interpolated_frame)
     return generated_frames
 def solve():
-    checkpoint=torch.load("SuperSloMo.ckpt")
-    model_FC=UNet(6,4) # initialize ARCH
-    model_FC=model_FC.to(device)# reassign model tensors
-    model_FC.load_state_dict(checkpoint["state_dictFC"]) # loading all weights from model
-    model_AT=UNet(20,5)
-    model_AT.load_state_dict(checkpoint["state_dictAT"],strict=False)
-    model_AT=model_AT.to(device)
-    model_AT.eval()
     model_FC.eval()
-    A=load_frames("output/1.png")
-    B=load_frames("output/69.png")
-    interpolated_frames=interpolate(model_FC,model_AT,A,B,30,60)
-    for index,value in enumerate(interpolated_frames):
-        save_frames(value[:,:3,:,:],"Result_Test/image{}.png".format(index+1))
 def main():
     solve()
-if __name__=="__main__":
-    main()

 import torch
 from model import UNet
 from PIL import Image
+from torchvision.transforms import transforms, ToTensor
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def save_frames(tensor, out_path) -> None:
+    image = normalize_frames(tensor)
+    image = Image.fromarray(image)
     image.save(out_path)
 def normalize_frames(tensor):
+    tensor = tensor.squeeze(0).detach().cpu()
+    tensor = torch.clamp(tensor, 0.0, 1.0)  # Ensure values are in [0, 1]
+    tensor = (tensor * 255).byte()  # Scale to [0, 255]
+    tensor = tensor.permute(1, 2, 0).numpy()  # Convert to [H, W, C]
+    return tensor
+def load_frames(image_path):
+    transform = transforms.Compose([
+        ToTensor()  # Converts to [0, 1] range and [C, H, W]
+    ])
+    img = Image.open(image_path).convert("RGB")
+    tensor = transform(img).unsqueeze(0).to(device)  # Add batch dimension
     return tensor
+def time_steps(input_fps, output_fps) -> list[float]:
+    if output_fps <= input_fps:
         return []
+    k = output_fps // input_fps
+    n = k - 1
+    return [i / (n + 1) for i in range(1, n + 1)]
+def expand_channels(tensor, target):
+    batch_size, current_channels, height, width = tensor.shape
+    if current_channels >= target:
         return tensor
+    required = target - current_channels
+    extra = torch.zeros(batch_size, required, height, width, device=tensor.device, dtype=tensor.dtype)
+    return torch.cat((tensor, extra), dim=1)
+def interpolate(model_FC, model_AT, A, B, input_fps, output_fps):
+    interval = time_steps(input_fps, output_fps)
+    input_tensor = torch.cat((A, B), dim=1)
+    print(f"Time intervals: {interval}")
     with torch.no_grad():
+        flow_output = model_FC(input_tensor)  # Output shape: [1, 4, H, W]
+        flow_output = expand_channels(flow_output, 20)  # Expand to 20 channels
+    generated_frames = []
     with torch.no_grad():
         for i in interval:
+            inter_tensor = torch.tensor([i], dtype=torch.float32).unsqueeze(0).to(device)
+            interpolated_frame = model_AT(flow_output, inter_tensor)
             generated_frames.append(interpolated_frame)
     return generated_frames
 def solve():
+    checkpoint = torch.load("SuperSloMo.ckpt")
+    model_FC = UNet(6, 4).to(device)  # Initialize flow computation model
+    model_FC.load_state_dict(checkpoint["state_dictFC"])  # Load weights
     model_FC.eval()
+    model_AT = UNet(20, 5).to(device)  # Initialize auxiliary task model
+    model_AT.load_state_dict(checkpoint["state_dictAT"], strict=False)  # Load weights
+    model_AT.eval()
+    A = load_frames("output/1.png")
+    B = load_frames("output/69.png")
+    interpolated_frames = interpolate(model_FC, model_AT, A, B, 30, 60)
+    for index, value in enumerate(interpolated_frames):
+        save_frames(value[:, :3, :, :], f"Result_Test/image{index + 1}.png")  # Save only RGB channels
 def main():
     solve()
+if __name__ == "__main__":
+    main()

model.py CHANGED Viewed

@@ -107,35 +107,21 @@ class up(nn.Module):
         self.conv1 = nn.Conv2d(inChannels,  outChannels, 3, stride=1, padding=1)
         # (2 * outChannels) is used for accommodating skip connection.
         self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
-    def forward(self, x, skpCn):
-        """
-        Returns output tensor after passing input `x` to the neural network
-        block.
-        Parameters
-        ----------
-            x : tensor
-                input to the NN block.
-            skpCn : tensor
-                skip connection input to the NN block.
-        Returns
-        -------
-            tensor
-                output of the NN block.
-        """
-        # Bilinear interpolation with scaling 2.
-        x = F.interpolate(x, scale_factor=2, mode='bilinear')
-        # Convolution + Leaky ReLU
-        x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
-        # Convolution + Leaky ReLU on (`x`, `skpCn`)
-        x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1)
         return x
 class UNet(nn.Module):
     """
     A class for creating UNet like architecture as specified by the

         self.conv1 = nn.Conv2d(inChannels,  outChannels, 3, stride=1, padding=1)
         # (2 * outChannels) is used for accommodating skip connection.
         self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
+    def forward(self, x, skpCn):
+        # Upsample x
+        x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
+        # Match dimensions by cropping the skip connection (skpCn) to match x
+        if x.size(-1) != skpCn.size(-1):
+            skpCn = skpCn[:, :, :, :x.size(-1)]
+        if x.size(-2) != skpCn.size(-2):
+            skpCn = skpCn[:, :, :x.size(-2), :]
+        # Concatenate and apply convolutions
+        x = F.leaky_relu(self.conv1(x), negative_slope=0.1)
+        x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)
         return x
 class UNet(nn.Module):
     """
     A class for creating UNet like architecture as specified by the