Spaces:
Running
Running
File size: 1,963 Bytes
d7ae26e cf4ccb1 d7ae26e e59bf3f 5de9f4c d4afb45 d7ae26e d8b3564 d7ae26e d4afb45 7ac8184 d7ae26e 5de9f4c d4afb45 cf4ccb1 d4afb45 d7ae26e d4afb45 1189a51 d4afb45 59bf002 1189a51 22fe498 59bf002 d4afb45 d7ae26e d4afb45 d7ae26e d4afb45 d7ae26e d4afb45 d7ae26e d630be3 d7ae26e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
import torch
import librosa
import json
import os
import huggingface_hub
from transformers import pipeline
# with open('ISO_codes.json', 'r') as file:
# iso_codes = json.load(file)
# languages = ["lug", "ach", "nyn", "teo"]
auth_token = os.environ.get("HF_TOKEN")
target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
languages = list(target_lang_options.keys())
# Transcribe audio using custom model
def transcribe_audio(input_file, language,chunk_length_s=10,
stride_length_s=(4, 2), return_timestamps="word"):
device = "cuda" if torch.cuda.is_available() else "cpu"
target_lang_code = target_lang_options[language]
# Determine the model_id based on the language
if target_lang_code == "eng":
model_id = "facebook/mms-1b-all"
else:
model_id = "Sunbird/sunbird-mms"
pipe = pipeline(model=model_id, device=device, token=hf_auth_token)
pipe.tokenizer.set_target_lang(target_lang_code)
pipe.model.load_adapter(target_lang_code)
# Read audio file
audio_data = input_file.read()
output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
return output
description = '''ASR with salt-mms'''
iface = gr.Interface(fn=transcribe_audio,
inputs=[
gr.Audio(source="microphone", type="filepath", label="Record Audio"),
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
gr.Dropdown(choices=languages, label="Language", value="English")
],
outputs=gr.Textbox(label="Transcription"),
description=description
)
iface.launch() |