Spaces:
Paused
Paused
pool
Browse files- baseline_utils.py +31 -42
baseline_utils.py
CHANGED
@@ -9,10 +9,9 @@ from diffusers.utils import export_to_video
|
|
9 |
import os
|
10 |
import spaces
|
11 |
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
|
|
|
12 |
import requests
|
13 |
from transformers import pipeline
|
14 |
-
from multiprocessing import Pool
|
15 |
-
|
16 |
|
17 |
# Utilize the Google Cloud Vision API to recognize text in the
|
18 |
# input input_images (diary input_images), https://cloud.google.com/vision.
|
@@ -123,62 +122,52 @@ def scenes_caption(scenes, api_key):
|
|
123 |
|
124 |
return "\n\n".join(captions)
|
125 |
|
126 |
-
# Define the single video generation function in the global scope
|
127 |
-
def generate_single_video(gpu_id, prompt, writer_description, fps, i):
|
128 |
-
# Assign the specific GPU for this process
|
129 |
-
device = f"cuda:{gpu_id}"
|
130 |
|
131 |
-
|
|
|
|
|
132 |
pipe = CogVideoXPipeline.from_pretrained(
|
133 |
"THUDM/CogVideoX-5b",
|
134 |
torch_dtype=torch.bfloat16,
|
135 |
cache_dir="./CogVideoX-5b"
|
136 |
)
|
137 |
-
|
138 |
pipe.enable_model_cpu_offload()
|
139 |
pipe.vae.enable_tiling()
|
140 |
|
141 |
-
#
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
# Save the video
|
152 |
-
video_path = export_to_video(video, output_video_path=f'videos/video{i}.mp4')
|
153 |
-
return video_path
|
154 |
-
|
155 |
-
|
156 |
-
@spaces.GPU
|
157 |
-
def generate_video(scene_list, writer_description, opt, fps=24): # Lower fps
|
158 |
-
# Set TOKENIZERS_PARALLELISM to avoid tokenizer warnings
|
159 |
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
160 |
|
161 |
-
#
|
162 |
os.makedirs("videos", exist_ok=True)
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
174 |
|
175 |
# Concatenate the generated videos into a single video
|
176 |
concatenated_video_path = "videos/combined_video_music.mp4"
|
177 |
if opt == "Narration":
|
178 |
-
|
179 |
else:
|
180 |
-
|
181 |
-
|
182 |
return concatenated_video_path
|
183 |
|
184 |
|
|
|
9 |
import os
|
10 |
import spaces
|
11 |
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
|
12 |
+
from transformers import pipeline
|
13 |
import requests
|
14 |
from transformers import pipeline
|
|
|
|
|
15 |
|
16 |
# Utilize the Google Cloud Vision API to recognize text in the
|
17 |
# input input_images (diary input_images), https://cloud.google.com/vision.
|
|
|
122 |
|
123 |
return "\n\n".join(captions)
|
124 |
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
@spaces.GPU
|
127 |
+
def generate_video(scene_list, writer_description, opt, fps=24): # Lower fps
|
128 |
+
|
129 |
pipe = CogVideoXPipeline.from_pretrained(
|
130 |
"THUDM/CogVideoX-5b",
|
131 |
torch_dtype=torch.bfloat16,
|
132 |
cache_dir="./CogVideoX-5b"
|
133 |
)
|
134 |
+
|
135 |
pipe.enable_model_cpu_offload()
|
136 |
pipe.vae.enable_tiling()
|
137 |
|
138 |
+
# Check for available device: CUDA, MPS, or CPU
|
139 |
+
if torch.cuda.is_available():
|
140 |
+
device = "cuda"
|
141 |
+
print("Using CUDA backend.")
|
142 |
+
elif torch.backends.mps.is_available():
|
143 |
+
device = "mps"
|
144 |
+
print("Using MPS backend.")
|
145 |
+
else:
|
146 |
+
device = "cpu"
|
147 |
+
print("CUDA and MPS not available. Falling back to CPU.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
# Truncate the prompt to fit the CLIP token limit
|
150 |
os.makedirs("videos", exist_ok=True)
|
151 |
+
video_paths = []
|
152 |
+
for i, prompt in enumerate(scene_list):
|
153 |
+
video = pipe(
|
154 |
+
prompt=prompt + f'\nThe main character is described as: {writer_description}.',
|
155 |
+
num_videos_per_prompt=1,
|
156 |
+
num_inference_steps=50,
|
157 |
+
num_frames=fps,
|
158 |
+
guidance_scale=6,
|
159 |
+
generator=torch.Generator(device=device).manual_seed(42),
|
160 |
+
).frames[0]
|
161 |
+
|
162 |
+
video_path = export_to_video(video, output_video_path=f'videos/video{i}.mp4')
|
163 |
+
video_paths.append(video_path)
|
164 |
|
165 |
# Concatenate the generated videos into a single video
|
166 |
concatenated_video_path = "videos/combined_video_music.mp4"
|
167 |
if opt == "Narration":
|
168 |
+
concatenate_videos_music(video_paths, concatenated_video_path, audio_path="narration.mp3")
|
169 |
else:
|
170 |
+
concatenate_videos_music(video_paths, concatenated_video_path, audio_path="meow-meow-meow-tiktok.mp3")
|
|
|
171 |
return concatenated_video_path
|
172 |
|
173 |
|