Spaces:

fffiloni
/

Hibiki-simple

Running on Zero

File size: 3,404 Bytes

import gradio as gr 

import glob
import os
from pydub import AudioSegment

def cleanup_old_audio():
    """Remove old audio files before starting a new inference."""
    files_to_remove = glob.glob("out_en-*.wav") + glob.glob("final_output.wav")
    
    if files_to_remove:
        print(f"Cleaning up {len(files_to_remove)} old audio files...")
        for file in files_to_remove:
            try:
                os.remove(file)
                print(f"Deleted: {file}")
            except Exception as e:
                print(f"Error deleting {file}: {e}")
    else:
        print("No old audio files found.")

def find_audio_chunks():
    """Finds all out_en-*.wav files, sorts them, and returns the file paths."""
    wav_files = glob.glob("out_en-*.wav")
    
    # Extract numbers and sort properly
    wav_files.sort(key=lambda x: int(x.split('-')[-1].split('.')[0]))

    print(f"Found {len(wav_files)} audio chunks: {wav_files}")
    
    return wav_files  # Returning the list of file paths

def concatenate_audio(output_filename="final_output.wav"):
    """Concatenates all audio chunks and saves them to a final output file."""
    wav_files = find_audio_chunks()  # Get sorted audio file paths
    
    if not wav_files:
        print("No audio files found.")
        return []

    # Load and concatenate all audio files
    combined = AudioSegment.empty()
    for file in wav_files:
        audio = AudioSegment.from_wav(file)
        combined += audio

    # Export the final combined audio
    combined.export(output_filename, format="wav")
    print(f"Concatenated audio saved as {output_filename}")
    
    return output_filename, wav_files  # Returning the list of processed file paths


def infer(audio_input_path):

    cleanup_old_audio()

    import subprocess

    command = [
        "python", "-m", "moshi.run_inference",
        f"{audio_input_path}", "out_en.wav",
        "--hf-repo", "kyutai/hibiki-1b-pytorch-bf16"
    ]

    result = subprocess.run(command, capture_output=True, text=True)

    # Print the standard output and error
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)

    # Check if the command was successful
    if result.returncode == 0:
        print("Command executed successfully.")
        concat_out, file_list = concatenate_audio()
        return concat_out, file_list
    else:
        print("Error executing command.")
        raise gr.Error("Error executing command")

css="""
div#col-container{
    margin: 0 auto;
    max-width: 720px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Hibiki ")
        gr.Markdown("This is a simple demo for Kyutai's Hibiki models")
        audio_input = gr.Audio(label="Audio IN", type="filepath")
        submit_btn = gr.Button("Submit")
        output_result = gr.Audio(label="Translated result")
        wav_list = gr.Files(label="Output Audio List")

        gr.Examples(
            examples = [
                "./examples/sample_fr_hibiki_intro.mp3",
                "./examples/sample_fr_hibiki_crepes.mp3",
                "./examples/sample_fr_hibiki_monologue_otis.mp3"
            ],
            inputs = [audio_input]
        )

    submit_btn.click(
        fn = infer,
        inputs = [audio_input],
        outputs = [output_result, wav_list]
    )

demo.queue().launch(show_api=False, show_error=True)