Update app.py
Browse files
app.py
CHANGED
@@ -45,10 +45,11 @@ tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
|
45 |
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
46 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
47 |
model.to(device)
|
48 |
-
print(device)
|
49 |
|
50 |
|
51 |
def get_output_video(text):
|
|
|
52 |
inputs = tokenizer(text,
|
53 |
max_length=1024,
|
54 |
truncation=True,
|
@@ -58,6 +59,7 @@ def get_output_video(text):
|
|
58 |
skip_special_tokens=True,
|
59 |
clean_up_tokenization_spaces=False)
|
60 |
plot = list(summary[0].split('.'))
|
|
|
61 |
|
62 |
'''
|
63 |
The required models will be downloaded to models_root if they are not already there.
|
@@ -68,15 +70,16 @@ def get_output_video(text):
|
|
68 |
'''
|
69 |
@spaces.GPU(duration=60 * 3)
|
70 |
def generate_image(
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
):
|
|
|
80 |
model = MinDalle(
|
81 |
is_mega=is_mega,
|
82 |
models_root=models_root,
|
@@ -94,21 +97,28 @@ def get_output_video(text):
|
|
94 |
top_k=top_k,
|
95 |
is_verbose=True
|
96 |
)
|
|
|
97 |
return image
|
98 |
|
99 |
|
100 |
generated_images = []
|
101 |
-
for senten in plot[:-1]:
|
102 |
-
image
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# Step 4- Creation of the subtitles
|
114 |
sentences = plot[:-1]
|
@@ -121,6 +131,7 @@ def get_output_video(text):
|
|
121 |
for k in range(len(generated_images)):
|
122 |
subtitles = tokenize.sent_tokenize(sentences[k])
|
123 |
sub_names.append(subtitles)
|
|
|
124 |
|
125 |
# Step 5- Adding Subtitles to the Images
|
126 |
def draw_multiple_line_text(image, text, font, text_color, text_start_height):
|
@@ -165,6 +176,7 @@ def get_output_video(text):
|
|
165 |
text_to_add = sub_names[k][0]
|
166 |
result = add_text_to_img(text_to_add, imagenes)
|
167 |
generated_images_sub.append(result)
|
|
|
168 |
|
169 |
# Step 7 - Creation of audio
|
170 |
c = 0
|
@@ -172,7 +184,7 @@ def get_output_video(text):
|
|
172 |
mp3_lengths = []
|
173 |
for k in range(len(generated_images)):
|
174 |
text_to_add = sub_names[k][0]
|
175 |
-
print(text_to_add)
|
176 |
f_name = 'audio_' + str(c) + '.mp3'
|
177 |
mp3_names.append(f_name)
|
178 |
# The text that you want to convert to audio
|
@@ -190,7 +202,7 @@ def get_output_video(text):
|
|
190 |
audio = AudioSegment.from_file(sound_file, format="mp3")
|
191 |
duration = len(audio) / 1000
|
192 |
mp3_lengths.append(duration)
|
193 |
-
print(duration)
|
194 |
c += 1
|
195 |
|
196 |
# Step 8 - Merge audio files
|
@@ -201,16 +213,16 @@ def get_output_video(text):
|
|
201 |
|
202 |
for n, mp3_file in enumerate(mp3_names):
|
203 |
mp3_file = mp3_file.replace(chr(92), '/')
|
204 |
-
print(
|
205 |
# Load the current mp3 into `audio_segment`
|
206 |
audio_segment = AudioSegment.from_mp3(mp3_file)
|
207 |
# Just accumulate the new `audio_segment` + `silence`
|
208 |
full_audio += audio_segment + silence
|
209 |
-
print('Merging
|
210 |
# The loop will exit once all files in the list have been used
|
211 |
# Then export
|
212 |
full_audio.export(export_path, format='mp3')
|
213 |
-
print('\
|
214 |
|
215 |
# Step 9 - Creation of the video with adjusted times of the sound
|
216 |
c = 0
|
@@ -219,18 +231,20 @@ def get_output_video(text):
|
|
219 |
f_name = 'img_' + str(c) + '.jpg'
|
220 |
file_names.append(f_name)
|
221 |
img.save(f_name)
|
|
|
222 |
c += 1
|
223 |
-
print(file_names)
|
224 |
|
225 |
clips = []
|
226 |
d = 0
|
227 |
for m in file_names:
|
228 |
duration = mp3_lengths[d]
|
229 |
-
print(d
|
230 |
clips.append(mpe.ImageClip(m).set_duration(duration + 0.5))
|
231 |
d += 1
|
232 |
concat_clip = mpe.concatenate_videoclips(clips, method="compose")
|
233 |
concat_clip.write_videofile("result_new.mp4", fps=24)
|
|
|
234 |
|
235 |
# Step 10 - Merge Video + Audio
|
236 |
movie_name = 'result_new.mp4'
|
@@ -244,6 +258,7 @@ def get_output_video(text):
|
|
244 |
final_clip.write_videofile(outname, fps=fps)
|
245 |
|
246 |
combine_audio(movie_name, export_path, movie_final) # create a new file
|
|
|
247 |
|
248 |
# Cleanup intermediate files
|
249 |
for f in file_names:
|
@@ -252,8 +267,9 @@ def get_output_video(text):
|
|
252 |
os.remove(f)
|
253 |
os.remove("result_new.mp4")
|
254 |
os.remove("result.mp3")
|
|
|
255 |
|
256 |
-
|
257 |
return 'result_final.mp4'
|
258 |
|
259 |
|
@@ -277,4 +293,4 @@ with demo:
|
|
277 |
gr.Markdown(
|
278 |
"This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
|
279 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
280 |
-
demo.launch(debug=
|
|
|
45 |
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
|
46 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
47 |
model.to(device)
|
48 |
+
print(f"Using device: {device}")
|
49 |
|
50 |
|
51 |
def get_output_video(text):
|
52 |
+
print("Starting get_output_video function...")
|
53 |
inputs = tokenizer(text,
|
54 |
max_length=1024,
|
55 |
truncation=True,
|
|
|
59 |
skip_special_tokens=True,
|
60 |
clean_up_tokenization_spaces=False)
|
61 |
plot = list(summary[0].split('.'))
|
62 |
+
print(f"Summarized plot: {plot}")
|
63 |
|
64 |
'''
|
65 |
The required models will be downloaded to models_root if they are not already there.
|
|
|
70 |
'''
|
71 |
@spaces.GPU(duration=60 * 3)
|
72 |
def generate_image(
|
73 |
+
is_mega: bool,
|
74 |
+
text: str,
|
75 |
+
seed: int,
|
76 |
+
grid_size: int,
|
77 |
+
top_k: int,
|
78 |
+
image_path: str,
|
79 |
+
models_root: str,
|
80 |
+
fp16: bool,
|
81 |
):
|
82 |
+
print(f"Generating image for: {text}")
|
83 |
model = MinDalle(
|
84 |
is_mega=is_mega,
|
85 |
models_root=models_root,
|
|
|
97 |
top_k=top_k,
|
98 |
is_verbose=True
|
99 |
)
|
100 |
+
print(f"Image generated successfully.")
|
101 |
return image
|
102 |
|
103 |
|
104 |
generated_images = []
|
105 |
+
for i, senten in enumerate(plot[:-1]):
|
106 |
+
print(f"Generating image {i+1} of {len(plot)-1}...")
|
107 |
+
try:
|
108 |
+
image = generate_image(
|
109 |
+
is_mega=True,
|
110 |
+
text=senten,
|
111 |
+
seed=1,
|
112 |
+
grid_size=1, # param {type:"integer"}
|
113 |
+
top_k=256, # param {type:"integer"}
|
114 |
+
image_path='generated',
|
115 |
+
models_root='pretrained',
|
116 |
+
fp16=True, )
|
117 |
+
generated_images.append(image)
|
118 |
+
print(f"Image {i+1} generated and appended.")
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error generating image {i+1}: {e}")
|
121 |
+
raise
|
122 |
|
123 |
# Step 4- Creation of the subtitles
|
124 |
sentences = plot[:-1]
|
|
|
131 |
for k in range(len(generated_images)):
|
132 |
subtitles = tokenize.sent_tokenize(sentences[k])
|
133 |
sub_names.append(subtitles)
|
134 |
+
print(f"Subtitles generated for image {k+1}: {subtitles}")
|
135 |
|
136 |
# Step 5- Adding Subtitles to the Images
|
137 |
def draw_multiple_line_text(image, text, font, text_color, text_start_height):
|
|
|
176 |
text_to_add = sub_names[k][0]
|
177 |
result = add_text_to_img(text_to_add, imagenes)
|
178 |
generated_images_sub.append(result)
|
179 |
+
print(f"Subtitles added to image {k+1}.")
|
180 |
|
181 |
# Step 7 - Creation of audio
|
182 |
c = 0
|
|
|
184 |
mp3_lengths = []
|
185 |
for k in range(len(generated_images)):
|
186 |
text_to_add = sub_names[k][0]
|
187 |
+
print(f"Generating audio for: {text_to_add}")
|
188 |
f_name = 'audio_' + str(c) + '.mp3'
|
189 |
mp3_names.append(f_name)
|
190 |
# The text that you want to convert to audio
|
|
|
202 |
audio = AudioSegment.from_file(sound_file, format="mp3")
|
203 |
duration = len(audio) / 1000
|
204 |
mp3_lengths.append(duration)
|
205 |
+
print(f"Audio duration: {duration} seconds")
|
206 |
c += 1
|
207 |
|
208 |
# Step 8 - Merge audio files
|
|
|
213 |
|
214 |
for n, mp3_file in enumerate(mp3_names):
|
215 |
mp3_file = mp3_file.replace(chr(92), '/')
|
216 |
+
print(f"Merging audio file: {mp3_file}")
|
217 |
# Load the current mp3 into `audio_segment`
|
218 |
audio_segment = AudioSegment.from_mp3(mp3_file)
|
219 |
# Just accumulate the new `audio_segment` + `silence`
|
220 |
full_audio += audio_segment + silence
|
221 |
+
print(f'Merging audio {n+1} completed.')
|
222 |
# The loop will exit once all files in the list have been used
|
223 |
# Then export
|
224 |
full_audio.export(export_path, format='mp3')
|
225 |
+
print('\nAudio merging done!')
|
226 |
|
227 |
# Step 9 - Creation of the video with adjusted times of the sound
|
228 |
c = 0
|
|
|
231 |
f_name = 'img_' + str(c) + '.jpg'
|
232 |
file_names.append(f_name)
|
233 |
img.save(f_name)
|
234 |
+
print(f"Saving image: {f_name}")
|
235 |
c += 1
|
236 |
+
print(f"Image file names: {file_names}")
|
237 |
|
238 |
clips = []
|
239 |
d = 0
|
240 |
for m in file_names:
|
241 |
duration = mp3_lengths[d]
|
242 |
+
print(f"Creating video clip {d+1} with duration: {duration} seconds")
|
243 |
clips.append(mpe.ImageClip(m).set_duration(duration + 0.5))
|
244 |
d += 1
|
245 |
concat_clip = mpe.concatenate_videoclips(clips, method="compose")
|
246 |
concat_clip.write_videofile("result_new.mp4", fps=24)
|
247 |
+
print("Video clips concatenated and saved as result_new.mp4")
|
248 |
|
249 |
# Step 10 - Merge Video + Audio
|
250 |
movie_name = 'result_new.mp4'
|
|
|
258 |
final_clip.write_videofile(outname, fps=fps)
|
259 |
|
260 |
combine_audio(movie_name, export_path, movie_final) # create a new file
|
261 |
+
print("Video and audio merged successfully!")
|
262 |
|
263 |
# Cleanup intermediate files
|
264 |
for f in file_names:
|
|
|
267 |
os.remove(f)
|
268 |
os.remove("result_new.mp4")
|
269 |
os.remove("result.mp3")
|
270 |
+
print("Intermediate files cleaned up.")
|
271 |
|
272 |
+
print("Finished get_output_video function.")
|
273 |
return 'result_final.mp4'
|
274 |
|
275 |
|
|
|
293 |
gr.Markdown(
|
294 |
"This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
|
295 |
button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
|
296 |
+
demo.launch(debug=True)
|