File size: 3,404 Bytes
f04256e
 
0ac86ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e980e0
 
0ac86ac
1e980e0
0ac86ac
 
 
1e980e0
 
 
 
 
 
 
 
0ac86ac
 
1e980e0
0ac86ac
 
 
 
 
 
 
 
 
 
1e980e0
 
0ac86ac
 
f04256e
 
0ac86ac
 
f04256e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e980e0
 
f04256e
 
0ac86ac
f04256e
2d06337
 
 
 
 
 
 
 
f04256e
1e980e0
2d06337
f04256e
 
a8c6c87
 
f04256e
2d06337
 
 
 
 
 
 
 
 
f04256e
 
 
1e980e0
f04256e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr 

import glob
import os
from pydub import AudioSegment

def cleanup_old_audio():
    """Remove old audio files before starting a new inference."""
    files_to_remove = glob.glob("out_en-*.wav") + glob.glob("final_output.wav")
    
    if files_to_remove:
        print(f"Cleaning up {len(files_to_remove)} old audio files...")
        for file in files_to_remove:
            try:
                os.remove(file)
                print(f"Deleted: {file}")
            except Exception as e:
                print(f"Error deleting {file}: {e}")
    else:
        print("No old audio files found.")

def find_audio_chunks():
    """Finds all out_en-*.wav files, sorts them, and returns the file paths."""
    wav_files = glob.glob("out_en-*.wav")
    
    # Extract numbers and sort properly
    wav_files.sort(key=lambda x: int(x.split('-')[-1].split('.')[0]))

    print(f"Found {len(wav_files)} audio chunks: {wav_files}")
    
    return wav_files  # Returning the list of file paths

def concatenate_audio(output_filename="final_output.wav"):
    """Concatenates all audio chunks and saves them to a final output file."""
    wav_files = find_audio_chunks()  # Get sorted audio file paths
    
    if not wav_files:
        print("No audio files found.")
        return []

    # Load and concatenate all audio files
    combined = AudioSegment.empty()
    for file in wav_files:
        audio = AudioSegment.from_wav(file)
        combined += audio

    # Export the final combined audio
    combined.export(output_filename, format="wav")
    print(f"Concatenated audio saved as {output_filename}")
    
    return output_filename, wav_files  # Returning the list of processed file paths


def infer(audio_input_path):

    cleanup_old_audio()

    import subprocess

    command = [
        "python", "-m", "moshi.run_inference",
        f"{audio_input_path}", "out_en.wav",
        "--hf-repo", "kyutai/hibiki-1b-pytorch-bf16"
    ]

    result = subprocess.run(command, capture_output=True, text=True)

    # Print the standard output and error
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)

    # Check if the command was successful
    if result.returncode == 0:
        print("Command executed successfully.")
        concat_out, file_list = concatenate_audio()
        return concat_out, file_list
    else:
        print("Error executing command.")
        raise gr.Error("Error executing command")

css="""
div#col-container{
    margin: 0 auto;
    max-width: 720px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("# Hibiki ")
        gr.Markdown("This is a simple demo for Kyutai's Hibiki models")
        audio_input = gr.Audio(label="Audio IN", type="filepath")
        submit_btn = gr.Button("Submit")
        output_result = gr.Audio(label="Translated result")
        wav_list = gr.Files(label="Output Audio List")

        gr.Examples(
            examples = [
                "./examples/sample_fr_hibiki_intro.mp3",
                "./examples/sample_fr_hibiki_crepes.mp3",
                "./examples/sample_fr_hibiki_monologue_otis.mp3"
            ],
            inputs = [audio_input]
        )

    submit_btn.click(
        fn = infer,
        inputs = [audio_input],
        outputs = [output_result, wav_list]
    )

demo.queue().launch(show_api=False, show_error=True)