Alvin-Nahabwe commited on
Commit
86031f7
·
verified ·
1 Parent(s): fed6a47

Updated app.py: Added all models

Browse files
Files changed (1) hide show
  1. app.py +42 -31
app.py CHANGED
@@ -2,26 +2,37 @@ import gradio as gr
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM
3
  import os
4
 
5
- def transcribe(audio, language, model):
6
  model_map = {
7
  "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
8
  "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
9
  "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  }
11
 
12
- revison_map = {
13
- "w/ LM": "lm",
14
- "w/o LM": "main",
15
- }
16
-
17
- if revison_map[model] != "main":
18
- # load processor
19
- p = Wav2Vec2ProcessorWithLM.from_pretrained(model_map[language], revision=revison_map[model])
20
- # load eval pipeline
21
- asr = pipeline("automatic-speech-recognition", model=model_map[language], tokenizer=p.tokenizer, feature_extractor=p.feature_extractor, decoder=p.decoder, token=os.getenv('HF_TOKEN'))
22
  else:
23
- # load eval pipeline
24
- asr = pipeline("automatic-speech-recognition", model=model_map[language], token=os.getenv('HF_TOKEN'))
 
 
 
 
25
 
26
  text = asr(audio)["text"]
27
  return text
@@ -34,28 +45,28 @@ asr_app = gr.Interface(
34
  [
35
  "hausa",
36
  "igbo",
37
- "yoruba"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ]
39
  ),
40
- gr.Radio(["w/o LM","w/ LM"])
41
- ],
42
- examples=[
43
- ["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/o LM"],
44
- ["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/ LM"],
45
- ["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/o LM"],
46
- ["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/ LM"],
47
- ["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/o LM"],
48
- ["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/ LM"],
49
- ["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/o LM"],
50
- ["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/ LM"],
51
- ["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/o LM"],
52
- ["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/ LM"],
53
- ["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/o LM"],
54
- ["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/ LM"]
55
  ],
56
  outputs="text",
57
- title="NaijaVoices ASR",
58
- description="Realtime demo for Hausa, Igbo and Yoruba speech recognition using a fine-tuned Wav2Vec2-XLS-R 1B model.",
59
  )
60
 
61
  asr_app.launch()
 
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM
3
  import os
4
 
5
+ def transcribe(audio, language):
6
  model_map = {
7
  "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
8
  "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
9
  "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
10
+ "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
11
+ "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
12
+ "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
13
+ "bemba": "asr-africa/w2v-bert-2.0-BIG_C-AMMI-BEMBA_SPEECH_CORPUS-BEMBA-189hrs-V1",
14
+ "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
15
+ "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
16
+ "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
17
+ "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
18
+ "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
19
+ "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
20
+ "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
21
+ "akan": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
22
+ "ewe": "asr-africa/wav2vec2-xls-r-akan-100-hours",
23
+ "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
24
+ "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
25
  }
26
 
27
+ if language in ["hausa", "igbo", "yoruba"]:
28
+ revision = "lm"
 
 
 
 
 
 
 
 
29
  else:
30
+ revision = "main"
31
+
32
+ # load processor
33
+ p = Wav2Vec2ProcessorWithLM.from_pretrained(model_map[language], revision=revision)
34
+ # load eval pipeline
35
+ asr = pipeline("automatic-speech-recognition", model=model_map[language], tokenizer=p.tokenizer, feature_extractor=p.feature_extractor, decoder=p.decoder, token=os.getenv('HF_TOKEN'))
36
 
37
  text = asr(audio)["text"]
38
  return text
 
45
  [
46
  "hausa",
47
  "igbo",
48
+ "yoruba",
49
+ "zulu",
50
+ "xhosa",
51
+ "afrikaans",
52
+ "bemba",
53
+ "shona",
54
+ "luganda",
55
+ "swahili",
56
+ "lingala",
57
+ "amharic",
58
+ "kinyarwanda",
59
+ "oromo",
60
+ "akan",
61
+ "ewe",
62
+ "wolof",
63
+ "bambara",
64
  ]
65
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ],
67
  outputs="text",
68
+ title="ASR Africa",
69
+ description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 19 African languages using open source data.",
70
  )
71
 
72
  asr_app.launch()