Spaces:
Runtime error
Runtime error
llinahosna
commited on
Commit
•
3716ece
1
Parent(s):
9fb8c08
Create main.py
Browse files
main.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
import cv2
|
4 |
+
|
5 |
+
from dall_e import DalleImageGenerator
|
6 |
+
from download_from_youtube import download_transcription, download_mp3, get_video_name
|
7 |
+
from utils import clean_str, get_sqrt, read_and_preprocess_transcript, put_subtitles_on_frame
|
8 |
+
|
9 |
+
|
10 |
+
def main():
|
11 |
+
parser = argparse.ArgumentParser(description='Generate a Dall-e video clip from a youtube url')
|
12 |
+
parser.add_argument('url', help='URL of the a song to get the audio from')
|
13 |
+
parser.add_argument('--token', help='A Replicate API token', default=None)
|
14 |
+
parser.add_argument('--song_name', help='The name of the song / output files', default=None)
|
15 |
+
parser.add_argument('--fps', help='FPS higher then 1 / sec_per_img to better control over timing', default=10)
|
16 |
+
parser.add_argument('--sec_per_img', help='How long to show each image', default=3)
|
17 |
+
parser.add_argument('--n_lines', help='Limit number of generated lines in the video to faster testing', default=None)
|
18 |
+
args = parser.parse_args()
|
19 |
+
|
20 |
+
dalle = DalleImageGenerator(token=args.token)
|
21 |
+
img_dim = 256 # Dall-e's output dim
|
22 |
+
resize_factor = 2 # upacale factor for frames
|
23 |
+
|
24 |
+
if args.song_name is None:
|
25 |
+
args.song_name = clean_str(get_video_name(args.url))
|
26 |
+
|
27 |
+
# Set paths
|
28 |
+
outputs_dir = f"data/{args.song_name}"
|
29 |
+
frames_dir = f"{outputs_dir}/frames"
|
30 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
31 |
+
os.makedirs(frames_dir, exist_ok=True)
|
32 |
+
mp3_path = f"{outputs_dir}/audio.mp3"
|
33 |
+
transcript_path = f"{outputs_dir}/transcript.json"
|
34 |
+
vid_path = f"{outputs_dir}/frames.avi"
|
35 |
+
final_vid_path = f"{outputs_dir}/final.avi"
|
36 |
+
|
37 |
+
print("Getting audio file and transcript from youtube")
|
38 |
+
# Download data
|
39 |
+
download_transcription(args.url, transcript_path)
|
40 |
+
download_mp3(args.url, mp3_path)
|
41 |
+
|
42 |
+
transcript = read_and_preprocess_transcript(transcript_path, args.song_name, args.n_lines)
|
43 |
+
|
44 |
+
frames, video_duration = get_frames(dalle, transcript, resize_factor, args)
|
45 |
+
|
46 |
+
# Write video
|
47 |
+
video = cv2.VideoWriter(vid_path, 0, args.fps, (img_dim * resize_factor, img_dim * resize_factor))
|
48 |
+
for i, frame in enumerate(frames):
|
49 |
+
cv2.imwrite(f"{frames_dir}/frame-{i}.png", frame)
|
50 |
+
video.write(frame)
|
51 |
+
cv2.destroyAllWindows()
|
52 |
+
video.release()
|
53 |
+
|
54 |
+
# Mix video clip with audio
|
55 |
+
os.system(f"ffmpeg -ss 00:00:00 -t {video_duration} -i '{mp3_path}' -map 0:a -acodec libmp3lame '{f'data/{args.song_name}/tmp.mp3'}'")
|
56 |
+
os.system(f"ffmpeg -i '{vid_path}' -i '{f'data/{args.song_name}/tmp.mp3'}' -map 0 -map 1:a -c:v copy -shortest '{final_vid_path}'")
|
57 |
+
print(f"Final video available at: {final_vid_path}")
|
58 |
+
|
59 |
+
|
60 |
+
def get_frames(dalle, transcript, resize_factor, args):
|
61 |
+
"""For each line in the transcript prompt dall-e mini to get images and duplicate them in the correct FPS for writing as a video"""
|
62 |
+
print("Building video-clip")
|
63 |
+
frames = []
|
64 |
+
video_duration = 0
|
65 |
+
for line in transcript:
|
66 |
+
text = clean_str(line['text'])
|
67 |
+
# start = min(video_duration, start)
|
68 |
+
start = line['start']
|
69 |
+
duration = line['duration']
|
70 |
+
|
71 |
+
# Dall-e generatees grid_size**2 images
|
72 |
+
grid_size = max(get_sqrt(duration / args.sec_per_img), 1)
|
73 |
+
|
74 |
+
print(f"({start:.1f} - {start + duration:.1f}):")
|
75 |
+
|
76 |
+
print(f"* Generating {grid_size**2} images with prompt: '{text}'")
|
77 |
+
# Generate images
|
78 |
+
images = dalle.generate_images(text, grid_size, text_adherence=3)
|
79 |
+
|
80 |
+
# Write frames
|
81 |
+
segment_duration = 0
|
82 |
+
frames_per_image = int(duration * args.fps) // len(images)
|
83 |
+
for j in range(len(images)):
|
84 |
+
frame = cv2.cvtColor(images[j], cv2.COLOR_RGBA2BGR)
|
85 |
+
frame = put_subtitles_on_frame(frame, text, resize_factor)
|
86 |
+
print(f"* Writing image - {j} as {frames_per_image} frames")
|
87 |
+
for _ in range(frames_per_image):
|
88 |
+
frames.append(frame)
|
89 |
+
segment_duration += 1 / args.fps
|
90 |
+
|
91 |
+
# Write more frames from last image to fill the gap
|
92 |
+
if segment_duration < duration:
|
93 |
+
n_frames = int((duration - segment_duration) * args.fps)
|
94 |
+
print(f"* Writing image - {j} for {n_frames} frames")
|
95 |
+
for _ in range(n_frames):
|
96 |
+
frames.append(frame)
|
97 |
+
segment_duration += 1 / args.fps
|
98 |
+
video_duration += segment_duration
|
99 |
+
|
100 |
+
return frames, video_duration
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == '__main__':
|
104 |
+
main()
|