Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
DEFAULT_MODEL = "ginic/data_seed_bs64_4_wav2vec2-large-xlsr-53-buckeye-ipa" | |
VALID_MODELS = [ | |
"ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000", | |
"ginic/data_seed_bs64_1_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/data_seed_bs64_2_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/data_seed_bs64_3_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/data_seed_bs64_4_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_30_female_1_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_30_female_2_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_30_female_3_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_30_female_4_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_30_female_5_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_70_female_1_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_70_female_2_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa", | |
"ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa", | |
] | |
def load_model_and_predict(model_name, audio_in, model_state): | |
if model_state["model_name"] != model_name: | |
model_state = { | |
"loaded_model": pipeline( | |
task="automatic-speech-recognition", model=model_name | |
), | |
"model_name": model_name, | |
} | |
return model_state["loaded_model"](audio_in)["text"], model_state | |
def launch_demo(): | |
initial_model = { | |
"loaded_model": pipeline( | |
task="automatic-speech-recognition", model=DEFAULT_MODEL | |
), | |
"model_name": DEFAULT_MODEL, | |
} | |
demo = gr.Interface( | |
fn=load_model_and_predict, | |
inputs=[ | |
gr.Dropdown( | |
VALID_MODELS, | |
value=DEFAULT_MODEL, | |
label="IPA transcription ASR model", | |
info="Select the model to use for prediction.", | |
), | |
gr.Audio(type="filepath"), | |
gr.State( | |
value=initial_model | |
), # Store the name of the currently loaded model | |
], | |
outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()], | |
allow_flagging="never", | |
title="Automatic International Phonetic Alphabet Transcription", | |
description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.", | |
) | |
demo.launch() | |
if __name__ == "__main__": | |
launch_demo() | |