anderbogia commited on
Commit
0cea3a7
·
0 Parent(s):

Duplicate from anderbogia/dtp-asr-demo

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. app.py +80 -0
  4. favicon.png +0 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Dusun speech recognition
3
+ emoji: 🌾
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: gpl-3.0
11
+ duplicated_from: anderbogia/dtp-asr-demo
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """ASR MMS gradio space demo.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1TJE7dxiuXeb0nGmkc0AgFLFOnQx35ZXo
8
+ """
9
+
10
+ #!pip install transformers
11
+ #!pip install gradio
12
+ import os
13
+ os.system("pip install git+https://github.com/huggingface/transformers datasets[torch]")
14
+ os.system("pip install torch accelerate torchaudio datasets")
15
+ os.system("pip install librosa")
16
+
17
+ #NumPy 1.24 or less needed by Numba
18
+ os.system("pip install numpy==1.24.0")
19
+
20
+ import gradio as gr
21
+ from transformers import pipeline, Wav2Vec2ForCTC, AutoProcessor
22
+ from datasets import load_dataset, Audio, Dataset
23
+ import torch
24
+
25
+ model_id = "facebook/mms-1b-all"
26
+
27
+ #Set target language to dtp (Kadazandusun)
28
+ processor = AutoProcessor.from_pretrained(model_id)
29
+ model = Wav2Vec2ForCTC.from_pretrained(model_id).to("cpu")
30
+ processor.tokenizer.set_target_lang("dtp") #Change dtp to tih for Timugon Murut or iba for Iban
31
+ model.load_adapter("dtp")
32
+
33
+ asr_pipeline = pipeline(task = "automatic-speech-recognition", model = model_id) #Function that returns a dict, transcription stored in item with key "text"
34
+
35
+ import librosa #For converting audio sample rate to 16k
36
+
37
+ def preprocess(input): #Sets recording sampling rate to 16k and returns numpy ndarray from audio
38
+ speech, sample_rate = librosa.load(input)
39
+ speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
40
+ loaded_audio = Dataset.from_dict({"audio": [input]}).cast_column("audio", Audio(sampling_rate=16000))
41
+ audio_to_array = loaded_audio[0]["audio"]["array"]
42
+ return audio_to_array
43
+
44
+ def transcribe(input): #Gradio UI wrapper function
45
+ audioarray = preprocess(input) #Call preprocessor function
46
+ out = run(audioarray)
47
+ return out
48
+ #transcription = asr_pipeline(audioarray)
49
+ #return transcription["text"]
50
+
51
+ def run(input):
52
+ inputs = processor(input, sampling_rate=16_000, return_tensors="pt")#.to("cuda")
53
+ with torch.no_grad():
54
+ outputs = model(**inputs).logits
55
+ ids = torch.argmax(outputs, dim=-1)[0]
56
+ transcription = processor.decode(ids)
57
+ return transcription
58
+
59
+ with gr.Blocks(theme = gr.themes.Soft()) as app:
60
+ gr.Markdown(
61
+ """
62
+ # Ponutun tuturan Boros Kadazandusun | Kadazandusun speech recognition
63
+ ### Winonsoi di Ander © 2023 id Universiti Teknologi PETRONAS | Built by Ander © 2023 at Universiti Teknologi PETRONAS.
64
+
65
+ **Somit tutun tuturan** do boros Kadazandusun ii ginuno nopo nga mantad totoodo *Massive Multilingual Speech* di Meta.
66
+
67
+ Kadazandusun **automatic speech recognition model** used is from Meta's Massive Multilingual Speech project.
68
+ """)
69
+ fn = transcribe
70
+ audiofile = gr.Audio(source = "microphone", type = "filepath", label = "Dusunai oku | Say something to me in Kadazandusun")
71
+ transcription_show = gr.components.Textbox(label = "Dalinsuat | Transcription")
72
+
73
+ allow_flagging = "never"
74
+
75
+ button1 = gr.Button("Dalinsuato' | Transcribe")
76
+ button1.click(fn, inputs = audiofile, outputs = transcription_show)
77
+
78
+
79
+ if __name__ == "__main__":
80
+ app.launch()
favicon.png ADDED