Simonlob commited on
Commit
a9569f0
1 Parent(s): 3e58961

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -0
  2. app.py +36 -0
  3. requirements.txt +6 -0
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Wave2Vec Kyrgyz
3
+ emoji: 😻
4
+ colorFrom: green
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.32.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+ import librosa
6
+ import numpy as np
7
+ import re
8
+
9
+ processor = Wav2Vec2Processor.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
10
+ model = Wav2Vec2ForCTC.from_pretrained("the-cramer-project/Wav2vec-Kyrgyz")
11
+ # model.to("cuda")
12
+
13
+ def transcribe(file_):
14
+ arr_audio, _ = librosa.load(file_, sr=16000)
15
+ inputs = processor(arr_audio, sampling_rate=16_000, return_tensors="pt", padding=True)
16
+
17
+ with torch.no_grad():
18
+ logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
19
+
20
+ pred_ids = torch.argmax(logits, dim=-1)
21
+ text = processor.batch_decode(pred_ids)[0]
22
+ return text
23
+
24
+
25
+
26
+
27
+ iface = gr.Interface(
28
+ fn=transcribe,
29
+ inputs=gr.Audio(type="filepath"),
30
+ outputs="text",
31
+ title="Wave2Vec Kyrgyz",
32
+ description="Realtime demo for Kyrgyz speech recognition using a wave2vec model.",
33
+ )
34
+
35
+ iface.launch()
36
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchaudio
4
+ transformers
5
+ librosa
6
+ numpy