Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,404 Bytes
f04256e 0ac86ac 1e980e0 0ac86ac 1e980e0 0ac86ac 1e980e0 0ac86ac 1e980e0 0ac86ac 1e980e0 0ac86ac f04256e 0ac86ac f04256e 1e980e0 f04256e 0ac86ac f04256e 2d06337 f04256e 1e980e0 2d06337 f04256e a8c6c87 f04256e 2d06337 f04256e 1e980e0 f04256e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
import glob
import os
from pydub import AudioSegment
def cleanup_old_audio():
"""Remove old audio files before starting a new inference."""
files_to_remove = glob.glob("out_en-*.wav") + glob.glob("final_output.wav")
if files_to_remove:
print(f"Cleaning up {len(files_to_remove)} old audio files...")
for file in files_to_remove:
try:
os.remove(file)
print(f"Deleted: {file}")
except Exception as e:
print(f"Error deleting {file}: {e}")
else:
print("No old audio files found.")
def find_audio_chunks():
"""Finds all out_en-*.wav files, sorts them, and returns the file paths."""
wav_files = glob.glob("out_en-*.wav")
# Extract numbers and sort properly
wav_files.sort(key=lambda x: int(x.split('-')[-1].split('.')[0]))
print(f"Found {len(wav_files)} audio chunks: {wav_files}")
return wav_files # Returning the list of file paths
def concatenate_audio(output_filename="final_output.wav"):
"""Concatenates all audio chunks and saves them to a final output file."""
wav_files = find_audio_chunks() # Get sorted audio file paths
if not wav_files:
print("No audio files found.")
return []
# Load and concatenate all audio files
combined = AudioSegment.empty()
for file in wav_files:
audio = AudioSegment.from_wav(file)
combined += audio
# Export the final combined audio
combined.export(output_filename, format="wav")
print(f"Concatenated audio saved as {output_filename}")
return output_filename, wav_files # Returning the list of processed file paths
def infer(audio_input_path):
cleanup_old_audio()
import subprocess
command = [
"python", "-m", "moshi.run_inference",
f"{audio_input_path}", "out_en.wav",
"--hf-repo", "kyutai/hibiki-1b-pytorch-bf16"
]
result = subprocess.run(command, capture_output=True, text=True)
# Print the standard output and error
print("STDOUT:", result.stdout)
print("STDERR:", result.stderr)
# Check if the command was successful
if result.returncode == 0:
print("Command executed successfully.")
concat_out, file_list = concatenate_audio()
return concat_out, file_list
else:
print("Error executing command.")
raise gr.Error("Error executing command")
css="""
div#col-container{
margin: 0 auto;
max-width: 720px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("# Hibiki ")
gr.Markdown("This is a simple demo for Kyutai's Hibiki models")
audio_input = gr.Audio(label="Audio IN", type="filepath")
submit_btn = gr.Button("Submit")
output_result = gr.Audio(label="Translated result")
wav_list = gr.Files(label="Output Audio List")
gr.Examples(
examples = [
"./examples/sample_fr_hibiki_intro.mp3",
"./examples/sample_fr_hibiki_crepes.mp3",
"./examples/sample_fr_hibiki_monologue_otis.mp3"
],
inputs = [audio_input]
)
submit_btn.click(
fn = infer,
inputs = [audio_input],
outputs = [output_result, wav_list]
)
demo.queue().launch(show_api=False, show_error=True) |