unknown commited on
Commit
93eb0ff
·
1 Parent(s): be5b973
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -136,7 +136,12 @@ class FoleyController:
136
  cfg_scale_slider,
137
  seed_textbox,
138
  ):
139
-
 
 
 
 
 
140
  vision_transform_list = [
141
  torchvision.transforms.Resize((128, 128)),
142
  torchvision.transforms.CenterCrop((112, 112)),
@@ -153,7 +158,7 @@ class FoleyController:
153
  frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
154
  if duration >= 10:
155
  duration = 10
156
- time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to('cuda')
157
  time_frames = video_transform(time_frames)
158
  time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
159
  preds = self.time_detector(time_frames)
@@ -165,7 +170,7 @@ class FoleyController:
165
  # w -> b c h w
166
  time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
167
 
168
- images = self.image_processor(images=frames, return_tensors="pt").to('cuda')
169
  image_embeddings = self.image_encoder(**images).image_embeds
170
  image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
171
  neg_image_embeddings = torch.zeros_like(image_embeddings)
@@ -208,12 +213,6 @@ class FoleyController:
208
  controller = FoleyController()
209
  device = "cuda" if torch.cuda.is_available() else "cpu"
210
 
211
- # move to gpu
212
- controller.time_detector = controller.time_detector.to(device)
213
- controller.pipeline = controller.pipeline.to(device)
214
- controller.vocoder = controller.vocoder.to(device)
215
- controller.image_encoder = controller.image_encoder.to(device)
216
-
217
  with gr.Blocks(css=css) as demo:
218
  gr.HTML(
219
  '<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
 
136
  cfg_scale_slider,
137
  seed_textbox,
138
  ):
139
+ device = 'cuda'
140
+ # move to gpu
141
+ controller.time_detector = controller.time_detector.to(device)
142
+ controller.pipeline = controller.pipeline.to(device)
143
+ controller.vocoder = controller.vocoder.to(device)
144
+ controller.image_encoder = controller.image_encoder.to(device)
145
  vision_transform_list = [
146
  torchvision.transforms.Resize((128, 128)),
147
  torchvision.transforms.CenterCrop((112, 112)),
 
158
  frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
159
  if duration >= 10:
160
  duration = 10
161
+ time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to(device)
162
  time_frames = video_transform(time_frames)
163
  time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
164
  preds = self.time_detector(time_frames)
 
170
  # w -> b c h w
171
  time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
172
 
173
+ images = self.image_processor(images=frames, return_tensors="pt").to(device)
174
  image_embeddings = self.image_encoder(**images).image_embeds
175
  image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
176
  neg_image_embeddings = torch.zeros_like(image_embeddings)
 
213
  controller = FoleyController()
214
  device = "cuda" if torch.cuda.is_available() else "cpu"
215
 
 
 
 
 
 
 
216
  with gr.Blocks(css=css) as demo:
217
  gr.HTML(
218
  '<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'