import gradio as gr from transformers import pipeline DEFAULT_MODEL = "ginic/data_seed_4_wav2vec2-large-xlsr-buckeye-ipa" VALID_MODELS = [ "ctaguchi/wav2vec2-large-xlsr-japlmthufielta-ipa-plus-2000", "ginic/hyperparam_tuning_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/data_seed_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/data_seed_2_wav2vec2-large-xlsr-buckeye-ipa", "ginic/data_seed_3_wav2vec2-large-xlsr-buckeye-ipa", "ginic/data_seed_4_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_30_female_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_30_female_2_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_30_female_3_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_30_female_4_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_30_female_5_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_70_female_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_70_female_2_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_70_female_3_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_70_female_4_wav2vec2-large-xlsr-buckeye-ipa", "ginic/gender_split_70_female_5_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_old_only_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_old_only_2_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_old_only_3_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_young_only_1_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_young_only_2_wav2vec2-large-xlsr-buckeye-ipa", "ginic/vary_individuals_young_only_3_wav2vec2-large-xlsr-buckeye-ipa" ] def load_model_and_predict(model_name, audio_in, model_state): if model_state["model_name"] != model_name: model_state = {"loaded_model":pipeline(task="automatic-speech-recognition", model=model_name), "model_name": model_name} return model_state["loaded_model"](audio_in)["text"], model_state def launch_demo(): initial_model = {"loaded_model":pipeline(task="automatic-speech-recognition", model=DEFAULT_MODEL), "model_name": DEFAULT_MODEL} demo= gr.Interface( fn=load_model_and_predict, inputs= [ gr.Dropdown(VALID_MODELS, value=DEFAULT_MODEL, label="IPA transcription ASR model", info="Select the model to use for prediction."), gr.Audio(type="filepath"), gr.State(value=initial_model) #Store the name of the currently loaded model ], outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()], allow_flagging="never", title="Automatic International Phonetic Alphabet Transcription", description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model." ) demo.launch() if __name__ == "__main__": launch_demo()