Killian Steunou commited on
Commit
0a8ffcc
·
unverified ·
2 Parent(s): 272fe46 baafc0a

Merge pull request #1 from killian31/feat_video

Browse files
Files changed (1) hide show
  1. app.py +77 -49
app.py CHANGED
@@ -1,63 +1,87 @@
1
  import gradio as gr
2
  import torch
3
  import whisper
4
- from moviepy.editor import AudioFileClip, ColorClip, concatenate_videoclips
 
 
 
 
 
 
5
  from moviepy.video.VideoClip import TextClip
6
 
7
 
8
- def generate_video(audio_path, language, lag, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Transcribe audio
10
- progress(0.0, "Transcribing audio...")
11
  result = model.transcribe(audio_path, language=language)
12
  progress(0.30, "Audio transcribed!")
13
 
14
- # Prepare video clips from transcription segments
15
- clips = []
16
- total_segments = len(result["segments"])
17
- running_progress = 0.0
18
- current_time = 0.0
19
- for segment in result["segments"]:
20
- running_progress += 0.4 / total_segments
21
- if segment["start"] > current_time:
22
- clips.append(
23
- ColorClip((1280, 720), color=(0, 0, 0)).set_duration(
24
- segment["start"] - current_time
25
- )
26
- )
27
- text_clip = (
28
- TextClip(
29
- segment["text"],
30
- fontsize=24,
31
- font="Arial",
32
- color="white",
33
- bg_color="black",
34
- size=(1280, 720),
35
  )
36
- .set_duration(segment["end"] - segment["start"])
37
- .set_start(segment["start"])
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
- clips.append(text_clip)
40
- current_time = segment["end"]
41
- progress(min(0.3 + running_progress, 0.7), "Generating video frames...")
42
-
43
- if lag > 0:
44
- clips.insert(0, ColorClip((1280, 720), color=(0, 0, 0)).set_duration(lag))
45
- progress(0.7, "Video frames generated!")
46
-
47
- # Concatenate clips and set audio
48
- progress(0.75, "Concatenating video clips...")
49
- video = concatenate_videoclips(clips, method="compose")
50
-
51
- # Add audio to the video
52
- progress(0.85, "Adding audio to video...")
53
- video = video.set_audio(AudioFileClip(audio_path))
54
-
55
- # Export video to a buffer
56
- progress(0.90, "Exporting video...")
57
- output_path = "./transcribed_video.mp4"
58
- video.write_videofile(output_path, fps=6, codec="libx264", audio_codec="aac")
59
- progress(1.0, "Video exported!")
60
- return output_path
61
 
62
 
63
  if __name__ == "__main__":
@@ -69,8 +93,12 @@ if __name__ == "__main__":
69
  fn=generate_video,
70
  inputs=[
71
  gr.Audio(
72
- sources=["upload", "microphone"], type="filepath", label="Audio File"
 
 
73
  ),
 
 
74
  gr.Dropdown(
75
  ["en", "es", "fr", "de", "it", "nl", "ru", "no", "zh"],
76
  label="Language",
 
1
  import gradio as gr
2
  import torch
3
  import whisper
4
+ from moviepy.editor import (
5
+ AudioFileClip,
6
+ ColorClip,
7
+ CompositeVideoClip,
8
+ VideoFileClip,
9
+ concatenate_videoclips,
10
+ )
11
  from moviepy.video.VideoClip import TextClip
12
 
13
 
14
+ def generate_srt_file(transcription_result, srt_file_path, lag=0):
15
+ with open(srt_file_path, "w") as file:
16
+ for i, segment in enumerate(transcription_result["segments"], start=1):
17
+ # Adjusting times for lag
18
+ start_time = segment["start"] + lag
19
+ end_time = segment["end"] + lag
20
+ text = segment["text"]
21
+
22
+ # Convert times to SRT format (HH:MM:SS,MS)
23
+ start_srt = f"{int(start_time // 3600):02d}:{int((start_time % 3600) // 60):02d}:{int(start_time % 60):02d},{int((start_time % 1) * 1000):03d}"
24
+ end_srt = f"{int(end_time // 3600):02d}:{int((end_time % 3600) // 60):02d}:{int(end_time % 60):02d},{int((end_time % 1) * 1000):03d}"
25
+
26
+ file.write(f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n")
27
+
28
+
29
+ def generate_video(
30
+ audio_path, video_path, input, language, lag, progress=gr.Progress(track_tqdm=True)
31
+ ):
32
+
33
+ # Check if the input is a video
34
+ progress(0.0, "Checking input...")
35
+ if input == "Video":
36
+ progress(0.0, "Extracting audio from video...")
37
+ audio_path = "./temp_audio.wav"
38
+ video = VideoFileClip(video_path)
39
+ video.audio.write_audiofile(audio_path)
40
+ video.close()
41
+ progress(0.1, "Audio extracted!")
42
+
43
  # Transcribe audio
44
+ progress(0.1, "Transcribing audio...")
45
  result = model.transcribe(audio_path, language=language)
46
  progress(0.30, "Audio transcribed!")
47
 
48
+ # Generate SRT file
49
+ progress(0.30, "Generating SRT file...")
50
+ srt_file_path = "./temp.srt"
51
+ generate_srt_file(result, srt_file_path, lag=lag)
52
+ progress(0.40, "SRT file generated!")
53
+
54
+ if input == "Video":
55
+ # if lag is 0, we can use the original video, else we need to create a new video
56
+ if lag == 0:
57
+ return video_path, srt_file_path
58
+ else:
59
+ # we simply extend the original video with a black screen at the end of duration lag
60
+ video = VideoFileClip(video_path)
61
+ fps = video.fps
62
+ black_screen = ColorClip(
63
+ size=video.size, color=(0, 0, 0), duration=lag
64
+ ).set_fps(1)
65
+ final_video = concatenate_videoclips([video, black_screen])
66
+ output_video_path = "./transcribed_video.mp4"
67
+ final_video.write_videofile(
68
+ output_video_path, codec="libx264", audio_codec="aac"
69
  )
70
+ return output_video_path, srt_file_path
71
+ else:
72
+ output_video_path = "./transcribed_video.mp4"
73
+ audio_clip = AudioFileClip(audio_path)
74
+ duration = audio_clip.duration + lag
75
+ video_clip = ColorClip(
76
+ size=(1280, 720), color=(0, 0, 0), duration=duration
77
+ ).set_fps(
78
+ 1
79
+ ) # Low fps
80
+ video_clip = video_clip.set_audio(audio_clip)
81
+ video_clip.write_videofile(
82
+ output_video_path, codec="libx264", audio_codec="aac"
83
  )
84
+ return output_video_path, srt_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  if __name__ == "__main__":
 
93
  fn=generate_video,
94
  inputs=[
95
  gr.Audio(
96
+ sources=["upload", "microphone"],
97
+ type="filepath",
98
+ label="Audio File",
99
  ),
100
+ gr.Video(label="Or Video File", sources=["upload", "webcam"]),
101
+ gr.Dropdown(["Video", "Audio"], label="File Type", value="Audio"),
102
  gr.Dropdown(
103
  ["en", "es", "fr", "de", "it", "nl", "ru", "no", "zh"],
104
  label="Language",