Spaces:

fffiloni
/

Hibiki-simple

Running on Zero

App Files Files Community

fffiloni commited on 18 days ago

Commit

1ede177

verified ·

1 Parent(s): ab9ae1a

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -6

app.py CHANGED Viewed

@@ -5,7 +5,26 @@ import os
 import shutil
 import tempfile
 from pydub import AudioSegment
 def process_audio(input_file):
     # Load the audio file
     audio = AudioSegment.from_file(input_file)
@@ -152,13 +171,44 @@ def overlay_audio(original_mp3: str, translated_wav: str, volume_reduction_db: i
     print(f"Final audio saved at: {temp_file.name}")
     return temp_file.name  # Return the temporary file path
-def process_final_combination(audio_in, chosen_translated):
     audio_in = process_audio(audio_in)
     temp_output_path = overlay_audio(audio_in, chosen_translated)
-    return gr.update(value=temp_output_path, visible=True)
 def hide_previous():
-    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
 css="""
 div#col-container{
@@ -171,6 +221,7 @@ with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# Hibiki ")
         gr.Markdown("This is a simple demo for Kyutai's Hibiki translation models • Currently supports French to English only.")
         audio_input = gr.Audio(label="Audio IN", type="filepath")
         submit_btn = gr.Button("Submit")
         output_result = gr.Audio(label="Translated result")
@@ -184,6 +235,8 @@ with gr.Blocks(css=css) as demo:
             )
             choose_this_btn = gr.Button("Use this one", scale=1, visible=False)
         combined_output = gr.Audio("Combined Outpu", visible=False)
         with gr.Accordion("Downloadable audio Output list", open=False, visible=False) as result_accordion:
             wav_list = gr.Files(label="Output Audio List", visible=False)
@@ -196,6 +249,12 @@ with gr.Blocks(css=css) as demo:
             inputs = [audio_input]
         )
     dropdown_wav_selector.select(
         fn = load_chosen_audio,
         inputs = [dropdown_wav_selector],
@@ -205,14 +264,20 @@ with gr.Blocks(css=css) as demo:
     choose_this_btn.click(
         fn = process_final_combination,
-        inputs = [audio_input, dropdown_wav_selector],
-        outputs = [combined_output]
     )
     submit_btn.click(
         fn = hide_previous,
         inputs = None,
-        outputs = [dropdown_wav_selector, result_accordion,  wav_list, choose_this_btn, combined_output]
     ).then(
         fn = infer,
         inputs = [audio_input],

 import shutil
 import tempfile
 from pydub import AudioSegment
+from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips, freeze
+def extract_audio_as_mp3(video_path: str) -> str:
+    """
+    Extracts the audio from a video file and saves it as a temporary MP3 file.
+    :param video_path: Path to the input video file.
+    :return: Path to the temporary MP3 file.
+    """
+    # Load the video
+    video = VideoFileClip(video_path)
+    # Create a temporary file for the extracted audio
+    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    # Extract and export the audio as MP3
+    video.audio.write_audiofile(temp_audio.name, codec="mp3")
+    return temp_audio.name  # Return the temp file path
 def process_audio(input_file):
     # Load the audio file
     audio = AudioSegment.from_file(input_file)
     print(f"Final audio saved at: {temp_file.name}")
     return temp_file.name  # Return the temporary file path
+def process_final_combination(audio_in, chosen_translated, video_input):
     audio_in = process_audio(audio_in)
     temp_output_path = overlay_audio(audio_in, chosen_translated)
+    if video_input:
+        return gr.update(value=temp_output_path, visible=True), gr.update(visible=True)
+    else:
+        return gr.update(value=temp_output_path, visible=True), gr.update(visible=False)
+def replace_video_audio(video_path: str, new_audio_path: str) -> str:
+    """Replaces the original audio in the video with the new combined audio,
+    extending video if needed, and saves it to a temp file."""
+    # Load video
+    video = VideoFileClip(video_path)
+    # Load new audio
+    new_audio = AudioFileClip(new_audio_path)
+    # If the new audio is longer, extend the video by freezing the last frame
+    if new_audio.duration > video.duration:
+        last_frame = video.get_frame(video.duration - 0.1)  # Get last frame
+        freeze_frame = freeze(video, t=video.duration - 0.1, freeze_duration=new_audio.duration - video.duration)
+        video = concatenate_videoclips([video, freeze_frame])
+    # Set new audio
+    video = video.set_audio(new_audio)
+    # Create a temporary file for the video
+    temp_video = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    # Export video with appropriate codec
+    video.write_videofile(temp_video.name, codec="libx264", audio_codec="aac")
+    return gr.update(value=temp_video.name, visible=True)  # Return path to temp video file
 def hide_previous():
+    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
 css="""
 div#col-container{
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# Hibiki ")
         gr.Markdown("This is a simple demo for Kyutai's Hibiki translation models • Currently supports French to English only.")
+        video_input = gr.Video(label="Video IN")
         audio_input = gr.Audio(label="Audio IN", type="filepath")
         submit_btn = gr.Button("Submit")
         output_result = gr.Audio(label="Translated result")
             )
             choose_this_btn = gr.Button("Use this one", scale=1, visible=False)
         combined_output = gr.Audio("Combined Outpu", visible=False)
+        apply_to_video_btn = gr.Button("Apply to video", visible=False)
+        final_video_out = gr.Video(label="Video + Translated Audio", visible=False)
         with gr.Accordion("Downloadable audio Output list", open=False, visible=False) as result_accordion:
             wav_list = gr.Files(label="Output Audio List", visible=False)
             inputs = [audio_input]
         )
+    video_input.upload(
+        fn = extract_audio_as_mp3,
+        inputs = [video_input],
+        outputs = [audio_input]
+    )
     dropdown_wav_selector.select(
         fn = load_chosen_audio,
         inputs = [dropdown_wav_selector],
     choose_this_btn.click(
         fn = process_final_combination,
+        inputs = [audio_input, dropdown_wav_selector, video_input],
+        outputs = [combined_output, apply_to_video_btn]
+    )
+    apply_to_video_btn.click(
+        fn = replace_video_audio,
+        inputs = [combined_output, video_input],
+        outputs = [final_video_out]
     )
     submit_btn.click(
         fn = hide_previous,
         inputs = None,
+        outputs = [dropdown_wav_selector, result_accordion,  wav_list, choose_this_btn, combined_output, apply_to_video_btn, final_video_out]
     ).then(
         fn = infer,
         inputs = [audio_input],