ruslanmv commited on
Commit
351d597
·
verified ·
1 Parent(s): a1316a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -29
app.py CHANGED
@@ -45,10 +45,11 @@ tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
45
  model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
46
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
47
  model.to(device)
48
- print(device)
49
 
50
 
51
  def get_output_video(text):
 
52
  inputs = tokenizer(text,
53
  max_length=1024,
54
  truncation=True,
@@ -58,6 +59,7 @@ def get_output_video(text):
58
  skip_special_tokens=True,
59
  clean_up_tokenization_spaces=False)
60
  plot = list(summary[0].split('.'))
 
61
 
62
  '''
63
  The required models will be downloaded to models_root if they are not already there.
@@ -68,15 +70,16 @@ def get_output_video(text):
68
  '''
69
  @spaces.GPU(duration=60 * 3)
70
  def generate_image(
71
- is_mega: bool,
72
- text: str,
73
- seed: int,
74
- grid_size: int,
75
- top_k: int,
76
- image_path: str,
77
- models_root: str,
78
- fp16: bool,
79
  ):
 
80
  model = MinDalle(
81
  is_mega=is_mega,
82
  models_root=models_root,
@@ -94,21 +97,28 @@ def get_output_video(text):
94
  top_k=top_k,
95
  is_verbose=True
96
  )
 
97
  return image
98
 
99
 
100
  generated_images = []
101
- for senten in plot[:-1]:
102
- image = generate_image(
103
- is_mega=True,
104
- text=senten,
105
- seed=1,
106
- grid_size=1, # param {type:"integer"}
107
- top_k=256, # param {type:"integer"}
108
- image_path='generated',
109
- models_root='pretrained',
110
- fp16=True, )
111
- generated_images.append(image)
 
 
 
 
 
 
112
 
113
  # Step 4- Creation of the subtitles
114
  sentences = plot[:-1]
@@ -121,6 +131,7 @@ def get_output_video(text):
121
  for k in range(len(generated_images)):
122
  subtitles = tokenize.sent_tokenize(sentences[k])
123
  sub_names.append(subtitles)
 
124
 
125
  # Step 5- Adding Subtitles to the Images
126
  def draw_multiple_line_text(image, text, font, text_color, text_start_height):
@@ -165,6 +176,7 @@ def get_output_video(text):
165
  text_to_add = sub_names[k][0]
166
  result = add_text_to_img(text_to_add, imagenes)
167
  generated_images_sub.append(result)
 
168
 
169
  # Step 7 - Creation of audio
170
  c = 0
@@ -172,7 +184,7 @@ def get_output_video(text):
172
  mp3_lengths = []
173
  for k in range(len(generated_images)):
174
  text_to_add = sub_names[k][0]
175
- print(text_to_add)
176
  f_name = 'audio_' + str(c) + '.mp3'
177
  mp3_names.append(f_name)
178
  # The text that you want to convert to audio
@@ -190,7 +202,7 @@ def get_output_video(text):
190
  audio = AudioSegment.from_file(sound_file, format="mp3")
191
  duration = len(audio) / 1000
192
  mp3_lengths.append(duration)
193
- print(duration)
194
  c += 1
195
 
196
  # Step 8 - Merge audio files
@@ -201,16 +213,16 @@ def get_output_video(text):
201
 
202
  for n, mp3_file in enumerate(mp3_names):
203
  mp3_file = mp3_file.replace(chr(92), '/')
204
- print(n, mp3_file)
205
  # Load the current mp3 into `audio_segment`
206
  audio_segment = AudioSegment.from_mp3(mp3_file)
207
  # Just accumulate the new `audio_segment` + `silence`
208
  full_audio += audio_segment + silence
209
- print('Merging ', n)
210
  # The loop will exit once all files in the list have been used
211
  # Then export
212
  full_audio.export(export_path, format='mp3')
213
- print('\ndone!')
214
 
215
  # Step 9 - Creation of the video with adjusted times of the sound
216
  c = 0
@@ -219,18 +231,20 @@ def get_output_video(text):
219
  f_name = 'img_' + str(c) + '.jpg'
220
  file_names.append(f_name)
221
  img.save(f_name)
 
222
  c += 1
223
- print(file_names)
224
 
225
  clips = []
226
  d = 0
227
  for m in file_names:
228
  duration = mp3_lengths[d]
229
- print(d, duration)
230
  clips.append(mpe.ImageClip(m).set_duration(duration + 0.5))
231
  d += 1
232
  concat_clip = mpe.concatenate_videoclips(clips, method="compose")
233
  concat_clip.write_videofile("result_new.mp4", fps=24)
 
234
 
235
  # Step 10 - Merge Video + Audio
236
  movie_name = 'result_new.mp4'
@@ -244,6 +258,7 @@ def get_output_video(text):
244
  final_clip.write_videofile(outname, fps=fps)
245
 
246
  combine_audio(movie_name, export_path, movie_final) # create a new file
 
247
 
248
  # Cleanup intermediate files
249
  for f in file_names:
@@ -252,8 +267,9 @@ def get_output_video(text):
252
  os.remove(f)
253
  os.remove("result_new.mp4")
254
  os.remove("result.mp3")
 
255
 
256
-
257
  return 'result_final.mp4'
258
 
259
 
@@ -277,4 +293,4 @@ with demo:
277
  gr.Markdown(
278
  "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
279
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
280
- demo.launch(debug=False)
 
45
  model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
46
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
47
  model.to(device)
48
+ print(f"Using device: {device}")
49
 
50
 
51
  def get_output_video(text):
52
+ print("Starting get_output_video function...")
53
  inputs = tokenizer(text,
54
  max_length=1024,
55
  truncation=True,
 
59
  skip_special_tokens=True,
60
  clean_up_tokenization_spaces=False)
61
  plot = list(summary[0].split('.'))
62
+ print(f"Summarized plot: {plot}")
63
 
64
  '''
65
  The required models will be downloaded to models_root if they are not already there.
 
70
  '''
71
  @spaces.GPU(duration=60 * 3)
72
  def generate_image(
73
+ is_mega: bool,
74
+ text: str,
75
+ seed: int,
76
+ grid_size: int,
77
+ top_k: int,
78
+ image_path: str,
79
+ models_root: str,
80
+ fp16: bool,
81
  ):
82
+ print(f"Generating image for: {text}")
83
  model = MinDalle(
84
  is_mega=is_mega,
85
  models_root=models_root,
 
97
  top_k=top_k,
98
  is_verbose=True
99
  )
100
+ print(f"Image generated successfully.")
101
  return image
102
 
103
 
104
  generated_images = []
105
+ for i, senten in enumerate(plot[:-1]):
106
+ print(f"Generating image {i+1} of {len(plot)-1}...")
107
+ try:
108
+ image = generate_image(
109
+ is_mega=True,
110
+ text=senten,
111
+ seed=1,
112
+ grid_size=1, # param {type:"integer"}
113
+ top_k=256, # param {type:"integer"}
114
+ image_path='generated',
115
+ models_root='pretrained',
116
+ fp16=True, )
117
+ generated_images.append(image)
118
+ print(f"Image {i+1} generated and appended.")
119
+ except Exception as e:
120
+ print(f"Error generating image {i+1}: {e}")
121
+ raise
122
 
123
  # Step 4- Creation of the subtitles
124
  sentences = plot[:-1]
 
131
  for k in range(len(generated_images)):
132
  subtitles = tokenize.sent_tokenize(sentences[k])
133
  sub_names.append(subtitles)
134
+ print(f"Subtitles generated for image {k+1}: {subtitles}")
135
 
136
  # Step 5- Adding Subtitles to the Images
137
  def draw_multiple_line_text(image, text, font, text_color, text_start_height):
 
176
  text_to_add = sub_names[k][0]
177
  result = add_text_to_img(text_to_add, imagenes)
178
  generated_images_sub.append(result)
179
+ print(f"Subtitles added to image {k+1}.")
180
 
181
  # Step 7 - Creation of audio
182
  c = 0
 
184
  mp3_lengths = []
185
  for k in range(len(generated_images)):
186
  text_to_add = sub_names[k][0]
187
+ print(f"Generating audio for: {text_to_add}")
188
  f_name = 'audio_' + str(c) + '.mp3'
189
  mp3_names.append(f_name)
190
  # The text that you want to convert to audio
 
202
  audio = AudioSegment.from_file(sound_file, format="mp3")
203
  duration = len(audio) / 1000
204
  mp3_lengths.append(duration)
205
+ print(f"Audio duration: {duration} seconds")
206
  c += 1
207
 
208
  # Step 8 - Merge audio files
 
213
 
214
  for n, mp3_file in enumerate(mp3_names):
215
  mp3_file = mp3_file.replace(chr(92), '/')
216
+ print(f"Merging audio file: {mp3_file}")
217
  # Load the current mp3 into `audio_segment`
218
  audio_segment = AudioSegment.from_mp3(mp3_file)
219
  # Just accumulate the new `audio_segment` + `silence`
220
  full_audio += audio_segment + silence
221
+ print(f'Merging audio {n+1} completed.')
222
  # The loop will exit once all files in the list have been used
223
  # Then export
224
  full_audio.export(export_path, format='mp3')
225
+ print('\nAudio merging done!')
226
 
227
  # Step 9 - Creation of the video with adjusted times of the sound
228
  c = 0
 
231
  f_name = 'img_' + str(c) + '.jpg'
232
  file_names.append(f_name)
233
  img.save(f_name)
234
+ print(f"Saving image: {f_name}")
235
  c += 1
236
+ print(f"Image file names: {file_names}")
237
 
238
  clips = []
239
  d = 0
240
  for m in file_names:
241
  duration = mp3_lengths[d]
242
+ print(f"Creating video clip {d+1} with duration: {duration} seconds")
243
  clips.append(mpe.ImageClip(m).set_duration(duration + 0.5))
244
  d += 1
245
  concat_clip = mpe.concatenate_videoclips(clips, method="compose")
246
  concat_clip.write_videofile("result_new.mp4", fps=24)
247
+ print("Video clips concatenated and saved as result_new.mp4")
248
 
249
  # Step 10 - Merge Video + Audio
250
  movie_name = 'result_new.mp4'
 
258
  final_clip.write_videofile(outname, fps=fps)
259
 
260
  combine_audio(movie_name, export_path, movie_final) # create a new file
261
+ print("Video and audio merged successfully!")
262
 
263
  # Cleanup intermediate files
264
  for f in file_names:
 
267
  os.remove(f)
268
  os.remove("result_new.mp4")
269
  os.remove("result.mp3")
270
+ print("Intermediate files cleaned up.")
271
 
272
+ print("Finished get_output_video function.")
273
  return 'result_final.mp4'
274
 
275
 
 
293
  gr.Markdown(
294
  "This program text-to-video AI software generating videos from any prompt! AI software to build an art gallery. The future version will use Dalle-2 For more info visit [ruslanmv.com](https://ruslanmv.com/) ")
295
  button_gen_video.click(fn=get_output_video, inputs=input_start_text, outputs=output_interpolation)
296
+ demo.launch(debug=True)