# coding=utf-8 import base64 import io import os import re import tempfile import gradio as gr import librosa import numpy as np import soundfile as sf import spaces import torch import torchaudio from funasr import AutoModel from sv import clean_and_emoji_annotate_speech, process_audio @spaces.GPU def model_inference(input_wav, language, fs=16000): language_abbr = { "auto": "auto", "zh": "zh", "en": "en", "yue": "yue", "ja": "ja", "ko": "ko", "nospeech": "nospeech", } language = "auto" if len(language) < 1 else language selected_language = language_abbr[language] # Handle input_wav format if isinstance(input_wav, tuple): fs, input_wav = input_wav input_wav = input_wav.astype(np.float32) / np.iinfo(np.int16).max if len(input_wav.shape) > 1: input_wav = input_wav.mean(-1) if fs != 16000: resampler = torchaudio.transforms.Resample(fs, 16000) input_wav_t = torch.from_numpy(input_wav).to(torch.float32) input_wav = resampler(input_wav_t[None, :])[0, :].numpy() # Save the input audio to a temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio: sf.write(temp_audio.name, input_wav, 16000) temp_audio_path = temp_audio.name try: # Process the audio using the function from sv.py result = process_audio(temp_audio_path, language=selected_language) finally: # Remove the temporary audio file os.remove(temp_audio_path) return result audio_examples = [ ["example/mtr.mp3", "auto"], ] def launch(): with gr.Blocks(theme=gr.themes.Soft()) as demo: with gr.Row(): with gr.Column(): audio_inputs = gr.Audio(label="Upload audio or use the microphone") with gr.Accordion("Configuration"): language_inputs = gr.Dropdown( choices=["auto", "zh", "en", "yue", "ja", "ko", "nospeech"], value="auto", label="Language", ) fn_button = gr.Button("Start", variant="primary") text_outputs = gr.Textbox(label="Results") gr.Examples( examples=audio_examples, inputs=[audio_inputs, language_inputs], examples_per_page=20, ) fn_button.click( model_inference, inputs=[audio_inputs, language_inputs], outputs=text_outputs, ) demo.launch() if __name__ == "__main__": # iface.launch() launch()