Merge pull request #1 from killian31/feat_video
Browse files
app.py
CHANGED
@@ -1,63 +1,87 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import whisper
|
4 |
-
from moviepy.editor import
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from moviepy.video.VideoClip import TextClip
|
6 |
|
7 |
|
8 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# Transcribe audio
|
10 |
-
progress(0.
|
11 |
result = model.transcribe(audio_path, language=language)
|
12 |
progress(0.30, "Audio transcribed!")
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
if
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
)
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
)
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
)
|
39 |
-
|
40 |
-
current_time = segment["end"]
|
41 |
-
progress(min(0.3 + running_progress, 0.7), "Generating video frames...")
|
42 |
-
|
43 |
-
if lag > 0:
|
44 |
-
clips.insert(0, ColorClip((1280, 720), color=(0, 0, 0)).set_duration(lag))
|
45 |
-
progress(0.7, "Video frames generated!")
|
46 |
-
|
47 |
-
# Concatenate clips and set audio
|
48 |
-
progress(0.75, "Concatenating video clips...")
|
49 |
-
video = concatenate_videoclips(clips, method="compose")
|
50 |
-
|
51 |
-
# Add audio to the video
|
52 |
-
progress(0.85, "Adding audio to video...")
|
53 |
-
video = video.set_audio(AudioFileClip(audio_path))
|
54 |
-
|
55 |
-
# Export video to a buffer
|
56 |
-
progress(0.90, "Exporting video...")
|
57 |
-
output_path = "./transcribed_video.mp4"
|
58 |
-
video.write_videofile(output_path, fps=6, codec="libx264", audio_codec="aac")
|
59 |
-
progress(1.0, "Video exported!")
|
60 |
-
return output_path
|
61 |
|
62 |
|
63 |
if __name__ == "__main__":
|
@@ -69,8 +93,12 @@ if __name__ == "__main__":
|
|
69 |
fn=generate_video,
|
70 |
inputs=[
|
71 |
gr.Audio(
|
72 |
-
sources=["upload", "microphone"],
|
|
|
|
|
73 |
),
|
|
|
|
|
74 |
gr.Dropdown(
|
75 |
["en", "es", "fr", "de", "it", "nl", "ru", "no", "zh"],
|
76 |
label="Language",
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
import whisper
|
4 |
+
from moviepy.editor import (
|
5 |
+
AudioFileClip,
|
6 |
+
ColorClip,
|
7 |
+
CompositeVideoClip,
|
8 |
+
VideoFileClip,
|
9 |
+
concatenate_videoclips,
|
10 |
+
)
|
11 |
from moviepy.video.VideoClip import TextClip
|
12 |
|
13 |
|
14 |
+
def generate_srt_file(transcription_result, srt_file_path, lag=0):
|
15 |
+
with open(srt_file_path, "w") as file:
|
16 |
+
for i, segment in enumerate(transcription_result["segments"], start=1):
|
17 |
+
# Adjusting times for lag
|
18 |
+
start_time = segment["start"] + lag
|
19 |
+
end_time = segment["end"] + lag
|
20 |
+
text = segment["text"]
|
21 |
+
|
22 |
+
# Convert times to SRT format (HH:MM:SS,MS)
|
23 |
+
start_srt = f"{int(start_time // 3600):02d}:{int((start_time % 3600) // 60):02d}:{int(start_time % 60):02d},{int((start_time % 1) * 1000):03d}"
|
24 |
+
end_srt = f"{int(end_time // 3600):02d}:{int((end_time % 3600) // 60):02d}:{int(end_time % 60):02d},{int((end_time % 1) * 1000):03d}"
|
25 |
+
|
26 |
+
file.write(f"{i}\n{start_srt} --> {end_srt}\n{text}\n\n")
|
27 |
+
|
28 |
+
|
29 |
+
def generate_video(
|
30 |
+
audio_path, video_path, input, language, lag, progress=gr.Progress(track_tqdm=True)
|
31 |
+
):
|
32 |
+
|
33 |
+
# Check if the input is a video
|
34 |
+
progress(0.0, "Checking input...")
|
35 |
+
if input == "Video":
|
36 |
+
progress(0.0, "Extracting audio from video...")
|
37 |
+
audio_path = "./temp_audio.wav"
|
38 |
+
video = VideoFileClip(video_path)
|
39 |
+
video.audio.write_audiofile(audio_path)
|
40 |
+
video.close()
|
41 |
+
progress(0.1, "Audio extracted!")
|
42 |
+
|
43 |
# Transcribe audio
|
44 |
+
progress(0.1, "Transcribing audio...")
|
45 |
result = model.transcribe(audio_path, language=language)
|
46 |
progress(0.30, "Audio transcribed!")
|
47 |
|
48 |
+
# Generate SRT file
|
49 |
+
progress(0.30, "Generating SRT file...")
|
50 |
+
srt_file_path = "./temp.srt"
|
51 |
+
generate_srt_file(result, srt_file_path, lag=lag)
|
52 |
+
progress(0.40, "SRT file generated!")
|
53 |
+
|
54 |
+
if input == "Video":
|
55 |
+
# if lag is 0, we can use the original video, else we need to create a new video
|
56 |
+
if lag == 0:
|
57 |
+
return video_path, srt_file_path
|
58 |
+
else:
|
59 |
+
# we simply extend the original video with a black screen at the end of duration lag
|
60 |
+
video = VideoFileClip(video_path)
|
61 |
+
fps = video.fps
|
62 |
+
black_screen = ColorClip(
|
63 |
+
size=video.size, color=(0, 0, 0), duration=lag
|
64 |
+
).set_fps(1)
|
65 |
+
final_video = concatenate_videoclips([video, black_screen])
|
66 |
+
output_video_path = "./transcribed_video.mp4"
|
67 |
+
final_video.write_videofile(
|
68 |
+
output_video_path, codec="libx264", audio_codec="aac"
|
69 |
)
|
70 |
+
return output_video_path, srt_file_path
|
71 |
+
else:
|
72 |
+
output_video_path = "./transcribed_video.mp4"
|
73 |
+
audio_clip = AudioFileClip(audio_path)
|
74 |
+
duration = audio_clip.duration + lag
|
75 |
+
video_clip = ColorClip(
|
76 |
+
size=(1280, 720), color=(0, 0, 0), duration=duration
|
77 |
+
).set_fps(
|
78 |
+
1
|
79 |
+
) # Low fps
|
80 |
+
video_clip = video_clip.set_audio(audio_clip)
|
81 |
+
video_clip.write_videofile(
|
82 |
+
output_video_path, codec="libx264", audio_codec="aac"
|
83 |
)
|
84 |
+
return output_video_path, srt_file_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
|
86 |
|
87 |
if __name__ == "__main__":
|
|
|
93 |
fn=generate_video,
|
94 |
inputs=[
|
95 |
gr.Audio(
|
96 |
+
sources=["upload", "microphone"],
|
97 |
+
type="filepath",
|
98 |
+
label="Audio File",
|
99 |
),
|
100 |
+
gr.Video(label="Or Video File", sources=["upload", "webcam"]),
|
101 |
+
gr.Dropdown(["Video", "Audio"], label="File Type", value="Audio"),
|
102 |
gr.Dropdown(
|
103 |
["en", "es", "fr", "de", "it", "nl", "ru", "no", "zh"],
|
104 |
label="Language",
|