killian31 commited on
Commit
7a98cb1
·
1 Parent(s): e2ea7b3

feat: delay transcript

Browse files
Files changed (1) hide show
  1. app.py +21 -11
app.py CHANGED
@@ -1,16 +1,11 @@
1
  import gradio as gr
2
- import numpy as np
3
  import torch
4
  import whisper
5
- from moviepy.editor import *
6
  from moviepy.video.VideoClip import TextClip
7
 
8
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
- model = whisper.load_model("base", device=DEVICE)
11
-
12
-
13
- def generate_video(audio_path, language):
14
  # Transcribe audio
15
  result = model.transcribe(audio_path, language=language)
16
 
@@ -31,8 +26,13 @@ def generate_video(audio_path, language):
31
  )
32
  clips.append(text_clip)
33
 
 
 
 
34
  # Concatenate clips and set audio
35
  video = concatenate_videoclips(clips, method="compose")
 
 
36
  video = video.set_audio(AudioFileClip(audio_path))
37
 
38
  # Export video to a buffer
@@ -43,11 +43,13 @@ def generate_video(audio_path, language):
43
 
44
 
45
  if __name__ == "__main__":
46
-
47
- print(
48
- f"Model is {'multilingual' if model.is_multilingual else 'English-only'} "
49
- f"and has {sum(np.prod(p.shape) for p in model.parameters()):,} parameters."
50
  )
 
 
51
  # Gradio interface
52
  iface = gr.Interface(
53
  fn=generate_video,
@@ -58,6 +60,14 @@ if __name__ == "__main__":
58
  gr.Dropdown(
59
  ["en", "es", "fr", "de", "it", "nl", "ru", "zh"],
60
  label="Language",
 
 
 
 
 
 
 
 
61
  ),
62
  ],
63
  outputs=gr.Video(label="Play Video", show_download_button=True),
 
1
  import gradio as gr
 
2
  import torch
3
  import whisper
4
+ from moviepy.editor import AudioFileClip, ColorClip, concatenate_videoclips
5
  from moviepy.video.VideoClip import TextClip
6
 
 
7
 
8
+ def generate_video(audio_path, language, lag):
 
 
 
9
  # Transcribe audio
10
  result = model.transcribe(audio_path, language=language)
11
 
 
26
  )
27
  clips.append(text_clip)
28
 
29
+ if lag > 0:
30
+ clips.insert(0, ColorClip((1280, 720), color=(0, 0, 0)).set_duration(lag))
31
+
32
  # Concatenate clips and set audio
33
  video = concatenate_videoclips(clips, method="compose")
34
+
35
+ # Add audio to the video
36
  video = video.set_audio(AudioFileClip(audio_path))
37
 
38
  # Export video to a buffer
 
43
 
44
 
45
  if __name__ == "__main__":
46
+ DEVICE = (
47
+ "cuda"
48
+ if torch.cuda.is_available()
49
+ else "mps" if torch.backends.mps.is_available() else "cpu"
50
  )
51
+ model = whisper.load_model("base", device=DEVICE)
52
+
53
  # Gradio interface
54
  iface = gr.Interface(
55
  fn=generate_video,
 
60
  gr.Dropdown(
61
  ["en", "es", "fr", "de", "it", "nl", "ru", "zh"],
62
  label="Language",
63
+ value="en",
64
+ ),
65
+ gr.Slider(
66
+ minimum=0,
67
+ maximum=10,
68
+ step=1,
69
+ value=0,
70
+ label="Lag (seconds): delay the transcription by this amount of time.",
71
  ),
72
  ],
73
  outputs=gr.Video(label="Play Video", show_download_button=True),