Spaces:
Runtime error
Runtime error
Commit
·
0cea3a7
0
Parent(s):
Duplicate from anderbogia/dtp-asr-demo
Browse files- .gitattributes +35 -0
- README.md +14 -0
- app.py +80 -0
- favicon.png +0 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Dusun speech recognition
|
3 |
+
emoji: 🌾
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.35.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: gpl-3.0
|
11 |
+
duplicated_from: anderbogia/dtp-asr-demo
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""ASR MMS gradio space demo.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1TJE7dxiuXeb0nGmkc0AgFLFOnQx35ZXo
|
8 |
+
"""
|
9 |
+
|
10 |
+
#!pip install transformers
|
11 |
+
#!pip install gradio
|
12 |
+
import os
|
13 |
+
os.system("pip install git+https://github.com/huggingface/transformers datasets[torch]")
|
14 |
+
os.system("pip install torch accelerate torchaudio datasets")
|
15 |
+
os.system("pip install librosa")
|
16 |
+
|
17 |
+
#NumPy 1.24 or less needed by Numba
|
18 |
+
os.system("pip install numpy==1.24.0")
|
19 |
+
|
20 |
+
import gradio as gr
|
21 |
+
from transformers import pipeline, Wav2Vec2ForCTC, AutoProcessor
|
22 |
+
from datasets import load_dataset, Audio, Dataset
|
23 |
+
import torch
|
24 |
+
|
25 |
+
model_id = "facebook/mms-1b-all"
|
26 |
+
|
27 |
+
#Set target language to dtp (Kadazandusun)
|
28 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
29 |
+
model = Wav2Vec2ForCTC.from_pretrained(model_id).to("cpu")
|
30 |
+
processor.tokenizer.set_target_lang("dtp") #Change dtp to tih for Timugon Murut or iba for Iban
|
31 |
+
model.load_adapter("dtp")
|
32 |
+
|
33 |
+
asr_pipeline = pipeline(task = "automatic-speech-recognition", model = model_id) #Function that returns a dict, transcription stored in item with key "text"
|
34 |
+
|
35 |
+
import librosa #For converting audio sample rate to 16k
|
36 |
+
|
37 |
+
def preprocess(input): #Sets recording sampling rate to 16k and returns numpy ndarray from audio
|
38 |
+
speech, sample_rate = librosa.load(input)
|
39 |
+
speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
|
40 |
+
loaded_audio = Dataset.from_dict({"audio": [input]}).cast_column("audio", Audio(sampling_rate=16000))
|
41 |
+
audio_to_array = loaded_audio[0]["audio"]["array"]
|
42 |
+
return audio_to_array
|
43 |
+
|
44 |
+
def transcribe(input): #Gradio UI wrapper function
|
45 |
+
audioarray = preprocess(input) #Call preprocessor function
|
46 |
+
out = run(audioarray)
|
47 |
+
return out
|
48 |
+
#transcription = asr_pipeline(audioarray)
|
49 |
+
#return transcription["text"]
|
50 |
+
|
51 |
+
def run(input):
|
52 |
+
inputs = processor(input, sampling_rate=16_000, return_tensors="pt")#.to("cuda")
|
53 |
+
with torch.no_grad():
|
54 |
+
outputs = model(**inputs).logits
|
55 |
+
ids = torch.argmax(outputs, dim=-1)[0]
|
56 |
+
transcription = processor.decode(ids)
|
57 |
+
return transcription
|
58 |
+
|
59 |
+
with gr.Blocks(theme = gr.themes.Soft()) as app:
|
60 |
+
gr.Markdown(
|
61 |
+
"""
|
62 |
+
# Ponutun tuturan Boros Kadazandusun | Kadazandusun speech recognition
|
63 |
+
### Winonsoi di Ander © 2023 id Universiti Teknologi PETRONAS | Built by Ander © 2023 at Universiti Teknologi PETRONAS.
|
64 |
+
|
65 |
+
**Somit tutun tuturan** do boros Kadazandusun ii ginuno nopo nga mantad totoodo *Massive Multilingual Speech* di Meta.
|
66 |
+
|
67 |
+
Kadazandusun **automatic speech recognition model** used is from Meta's Massive Multilingual Speech project.
|
68 |
+
""")
|
69 |
+
fn = transcribe
|
70 |
+
audiofile = gr.Audio(source = "microphone", type = "filepath", label = "Dusunai oku | Say something to me in Kadazandusun")
|
71 |
+
transcription_show = gr.components.Textbox(label = "Dalinsuat | Transcription")
|
72 |
+
|
73 |
+
allow_flagging = "never"
|
74 |
+
|
75 |
+
button1 = gr.Button("Dalinsuato' | Transcribe")
|
76 |
+
button1.click(fn, inputs = audiofile, outputs = transcription_show)
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == "__main__":
|
80 |
+
app.launch()
|
favicon.png
ADDED
![]() |