DEVMAXXING commited on
Commit
5b777fc
1 Parent(s): a37c6d3

Add option to split on \n. Add .ass output

Browse files

This PR enables:
1. Users to be able to paste text that's already separated with new lines without having to manually insert '|' separators
2. Users to be able to copy the raw .ass output (for example to save and import into video editing software)

Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -104,7 +104,7 @@ def delete_mp4s_except_given_filepath(filepath):
104
 
105
 
106
 
107
- def align(lang, Microphone, File_Upload, text, col1, col2, col3, progress=gr.Progress()):
108
  # Create utt_id, specify output_video_filepath and delete any MP4s
109
  # that are not that filepath. These stray MP4s can be created
110
  # if a user refreshes or exits the page while this 'align' function is executing.
@@ -115,6 +115,7 @@ def align(lang, Microphone, File_Upload, text, col1, col2, col3, progress=gr.Pro
115
  delete_mp4s_except_given_filepath(output_video_filepath)
116
 
117
  output_info = ""
 
118
 
119
  progress(0, desc="Validating input")
120
 
@@ -197,6 +198,10 @@ def align(lang, Microphone, File_Upload, text, col1, col2, col3, progress=gr.Pro
197
  with open(manifest_path, 'w') as fout:
198
  fout.write(f"{json.dumps(data)}\n")
199
 
 
 
 
 
200
  # run alignment
201
  if "|" in text:
202
  resegment_text_to_fill_space = False
@@ -238,6 +243,9 @@ def align(lang, Microphone, File_Upload, text, col1, col2, col3, progress=gr.Pro
238
  # make video file from the word-level ASS file
239
  ass_file_for_video = f"{tmpdir}/nfa_output/ass/words/{utt_id}.ass"
240
 
 
 
 
241
  ffmpeg_command = (
242
  f"ffmpeg -y -i {audio_path} "
243
  "-f lavfi -i color=c=white:s=1280x720:r=50 "
@@ -248,7 +256,7 @@ def align(lang, Microphone, File_Upload, text, col1, col2, col3, progress=gr.Pro
248
 
249
  os.system(ffmpeg_command)
250
 
251
- return output_video_filepath, gr.update(value=output_info, visible=True), output_video_filepath
252
 
253
 
254
  def delete_non_tmp_video(video_path):
@@ -281,6 +289,9 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
281
  label="[Optional] The reference text. Use '|' separators to specify which text will appear together. "
282
  "Leave this field blank to use an ASR model's transcription as the reference text instead."
283
  )
 
 
 
284
 
285
  gr.Markdown("[Optional] For fun - adjust the colors of the text in the output video")
286
  with gr.Row():
@@ -294,6 +305,7 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
294
  gr.Markdown("## Output")
295
  video_out = gr.Video(label="output video")
296
  text_out = gr.Textbox(label="output info", visible=False)
 
297
 
298
  with gr.Row():
299
  gr.HTML(
@@ -306,8 +318,8 @@ with gr.Blocks(title="NeMo Forced Aligner", theme="huggingface") as demo:
306
 
307
  submit_button.click(
308
  fn=align,
309
- inputs=[lang_drop, mic_in, audio_file_in, ref_text, col1, col2, col3,],
310
- outputs=[video_out, text_out, non_tmp_output_video_filepath],
311
  ).then(
312
  fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,
313
  )
 
104
 
105
 
106
 
107
+ def align(lang, Microphone, File_Upload, text, col1, col2, col3, split_on_newline, progress=gr.Progress()):
108
  # Create utt_id, specify output_video_filepath and delete any MP4s
109
  # that are not that filepath. These stray MP4s can be created
110
  # if a user refreshes or exits the page while this 'align' function is executing.
 
115
  delete_mp4s_except_given_filepath(output_video_filepath)
116
 
117
  output_info = ""
118
+ ass_text=""
119
 
120
  progress(0, desc="Validating input")
121
 
 
198
  with open(manifest_path, 'w') as fout:
199
  fout.write(f"{json.dumps(data)}\n")
200
 
201
+ # split text on new lines if requested
202
+ if split_on_newline:
203
+ text = "|".join(list(filter(None, text.split("\n"))))
204
+
205
  # run alignment
206
  if "|" in text:
207
  resegment_text_to_fill_space = False
 
243
  # make video file from the word-level ASS file
244
  ass_file_for_video = f"{tmpdir}/nfa_output/ass/words/{utt_id}.ass"
245
 
246
+ with open(ass_file_for_video, "r") as ass_file:
247
+ ass_text = ass_file.read()
248
+
249
  ffmpeg_command = (
250
  f"ffmpeg -y -i {audio_path} "
251
  "-f lavfi -i color=c=white:s=1280x720:r=50 "
 
256
 
257
  os.system(ffmpeg_command)
258
 
259
+ return output_video_filepath, gr.update(value=output_info, visible=True), output_video_filepath, ass_text
260
 
261
 
262
  def delete_non_tmp_video(video_path):
 
289
  label="[Optional] The reference text. Use '|' separators to specify which text will appear together. "
290
  "Leave this field blank to use an ASR model's transcription as the reference text instead."
291
  )
292
+ split_on_newline = gr.Checkbox(
293
+ label="Separate text on new lines", default=False
294
+ )
295
 
296
  gr.Markdown("[Optional] For fun - adjust the colors of the text in the output video")
297
  with gr.Row():
 
305
  gr.Markdown("## Output")
306
  video_out = gr.Video(label="output video")
307
  text_out = gr.Textbox(label="output info", visible=False)
308
+ ass_out = gr.Textbox(label="output .ass")
309
 
310
  with gr.Row():
311
  gr.HTML(
 
318
 
319
  submit_button.click(
320
  fn=align,
321
+ inputs=[lang_drop, mic_in, audio_file_in, ref_text, col1, col2, col3,split_on_newline,],
322
+ outputs=[video_out, text_out, non_tmp_output_video_filepath, ass_out],
323
  ).then(
324
  fn=delete_non_tmp_video, inputs=[non_tmp_output_video_filepath], outputs=None,
325
  )