File size: 1,963 Bytes
d7ae26e
cf4ccb1
d7ae26e
 
 
e59bf3f
5de9f4c
d4afb45
d7ae26e
d8b3564
 
d7ae26e
d4afb45
7ac8184
d7ae26e
5de9f4c
d4afb45
cf4ccb1
d4afb45
d7ae26e
 
d4afb45
1189a51
 
d4afb45
59bf002
1189a51
22fe498
59bf002
 
 
 
 
 
 
d4afb45
 
 
d7ae26e
d4afb45
 
 
 
d7ae26e
d4afb45
d7ae26e
 
d4afb45
d7ae26e
 
 
d630be3
d7ae26e
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from transformers import Wav2Vec2ForCTC, AutoProcessor, Wav2Vec2Processor
import torch
import librosa
import json
import os
import huggingface_hub
from transformers import pipeline

# with open('ISO_codes.json', 'r') as file:
#     iso_codes = json.load(file)

# languages = ["lug", "ach", "nyn", "teo"]
auth_token = os.environ.get("HF_TOKEN")


target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}

languages = list(target_lang_options.keys())


# Transcribe audio using custom model
def transcribe_audio(input_file, language,chunk_length_s=10,
                      stride_length_s=(4, 2), return_timestamps="word"):
    

    device = "cuda" if torch.cuda.is_available() else "cpu"
    target_lang_code = target_lang_options[language]
    
    # Determine the model_id based on the language
    if target_lang_code == "eng":
        model_id = "facebook/mms-1b-all"
    else:
        model_id = "Sunbird/sunbird-mms"
                
    pipe = pipeline(model=model_id, device=device, token=hf_auth_token)  
    pipe.tokenizer.set_target_lang(target_lang_code)
    pipe.model.load_adapter(target_lang_code)

    # Read audio file
    audio_data = input_file.read()
    output = pipe(audio_data, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
    return output


description = '''ASR with salt-mms'''

iface = gr.Interface(fn=transcribe_audio,
                     inputs=[
                         gr.Audio(source="microphone", type="filepath", label="Record Audio"),
                         gr.Audio(source="upload", type="filepath", label="Upload Audio"),
                         gr.Dropdown(choices=languages, label="Language", value="English")
                         ],
                     outputs=gr.Textbox(label="Transcription"),
                     description=description
                     )
iface.launch()