jjz5463 commited on
Commit
45f2ffe
·
1 Parent(s): 97af337
Files changed (1) hide show
  1. baseline_utils.py +31 -42
baseline_utils.py CHANGED
@@ -9,10 +9,9 @@ from diffusers.utils import export_to_video
9
  import os
10
  import spaces
11
  from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
 
12
  import requests
13
  from transformers import pipeline
14
- from multiprocessing import Pool
15
-
16
 
17
  # Utilize the Google Cloud Vision API to recognize text in the
18
  # input input_images (diary input_images), https://cloud.google.com/vision.
@@ -123,62 +122,52 @@ def scenes_caption(scenes, api_key):
123
 
124
  return "\n\n".join(captions)
125
 
126
- # Define the single video generation function in the global scope
127
- def generate_single_video(gpu_id, prompt, writer_description, fps, i):
128
- # Assign the specific GPU for this process
129
- device = f"cuda:{gpu_id}"
130
 
131
- # Initialize the pipeline for this GPU
 
 
132
  pipe = CogVideoXPipeline.from_pretrained(
133
  "THUDM/CogVideoX-5b",
134
  torch_dtype=torch.bfloat16,
135
  cache_dir="./CogVideoX-5b"
136
  )
137
- pipe.to(device) # Move the model to the assigned GPU
138
  pipe.enable_model_cpu_offload()
139
  pipe.vae.enable_tiling()
140
 
141
- # Generate the video
142
- video = pipe(
143
- prompt=prompt + f'\nThe main character is described as: {writer_description}.',
144
- num_videos_per_prompt=1,
145
- num_inference_steps=50,
146
- num_frames=fps,
147
- guidance_scale=6,
148
- generator=torch.Generator(device=device).manual_seed(42),
149
- ).frames[0]
150
-
151
- # Save the video
152
- video_path = export_to_video(video, output_video_path=f'videos/video{i}.mp4')
153
- return video_path
154
-
155
-
156
- @spaces.GPU
157
- def generate_video(scene_list, writer_description, opt, fps=24): # Lower fps
158
- # Set TOKENIZERS_PARALLELISM to avoid tokenizer warnings
159
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
160
 
161
- # Ensure the output directory exists
162
  os.makedirs("videos", exist_ok=True)
163
-
164
- # Assign each task to a GPU in a round-robin fashion
165
- num_gpus = torch.cuda.device_count()
166
- if num_gpus < 4:
167
- raise RuntimeError("This code assumes at least 4 GPUs are available.")
168
-
169
- tasks = [(i % num_gpus, prompt, writer_description, fps, i) for i, prompt in enumerate(scene_list)]
170
-
171
- # Parallelize using multiprocessing
172
- with Pool(processes=num_gpus) as pool:
173
- video_paths = pool.starmap(generate_single_video, tasks)
 
 
174
 
175
  # Concatenate the generated videos into a single video
176
  concatenated_video_path = "videos/combined_video_music.mp4"
177
  if opt == "Narration":
178
- concatenate_videos_music(video_paths, concatenated_video_path, audio_path="narration.mp3")
179
  else:
180
- concatenate_videos_music(video_paths, concatenated_video_path, audio_path="meow-meow-meow-tiktok.mp3")
181
-
182
  return concatenated_video_path
183
 
184
 
 
9
  import os
10
  import spaces
11
  from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
12
+ from transformers import pipeline
13
  import requests
14
  from transformers import pipeline
 
 
15
 
16
  # Utilize the Google Cloud Vision API to recognize text in the
17
  # input input_images (diary input_images), https://cloud.google.com/vision.
 
122
 
123
  return "\n\n".join(captions)
124
 
 
 
 
 
125
 
126
+ @spaces.GPU
127
+ def generate_video(scene_list, writer_description, opt, fps=24): # Lower fps
128
+
129
  pipe = CogVideoXPipeline.from_pretrained(
130
  "THUDM/CogVideoX-5b",
131
  torch_dtype=torch.bfloat16,
132
  cache_dir="./CogVideoX-5b"
133
  )
134
+
135
  pipe.enable_model_cpu_offload()
136
  pipe.vae.enable_tiling()
137
 
138
+ # Check for available device: CUDA, MPS, or CPU
139
+ if torch.cuda.is_available():
140
+ device = "cuda"
141
+ print("Using CUDA backend.")
142
+ elif torch.backends.mps.is_available():
143
+ device = "mps"
144
+ print("Using MPS backend.")
145
+ else:
146
+ device = "cpu"
147
+ print("CUDA and MPS not available. Falling back to CPU.")
 
 
 
 
 
 
 
 
 
148
 
149
+ # Truncate the prompt to fit the CLIP token limit
150
  os.makedirs("videos", exist_ok=True)
151
+ video_paths = []
152
+ for i, prompt in enumerate(scene_list):
153
+ video = pipe(
154
+ prompt=prompt + f'\nThe main character is described as: {writer_description}.',
155
+ num_videos_per_prompt=1,
156
+ num_inference_steps=50,
157
+ num_frames=fps,
158
+ guidance_scale=6,
159
+ generator=torch.Generator(device=device).manual_seed(42),
160
+ ).frames[0]
161
+
162
+ video_path = export_to_video(video, output_video_path=f'videos/video{i}.mp4')
163
+ video_paths.append(video_path)
164
 
165
  # Concatenate the generated videos into a single video
166
  concatenated_video_path = "videos/combined_video_music.mp4"
167
  if opt == "Narration":
168
+ concatenate_videos_music(video_paths, concatenated_video_path, audio_path="narration.mp3")
169
  else:
170
+ concatenate_videos_music(video_paths, concatenated_video_path, audio_path="meow-meow-meow-tiktok.mp3")
 
171
  return concatenated_video_path
172
 
173