nurfarah57 commited on
Commit
3b35aeb
·
verified ·
1 Parent(s): 2d1b604

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +34 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
+ import gradio as gr
5
+
6
+ model = Wav2Vec2ForCTC.from_pretrained("tacab/tacab_asr_somali")
7
+ processor = Wav2Vec2Processor.from_pretrained("tacab/tacab_asr_somali")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model.to(device)
11
+
12
+ def transcribe(audio_path):
13
+ waveform, sample_rate = torchaudio.load(audio_path)
14
+ if sample_rate != 16000:
15
+ waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
16
+ if waveform.shape[0] > 1:
17
+ waveform = waveform.mean(dim=0, keepdim=True)
18
+ inputs = processor(waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt")
19
+ input_values = inputs.input_values.to(device)
20
+ with torch.no_grad():
21
+ logits = model(input_values).logits
22
+ predicted_ids = torch.argmax(logits, dim=-1)
23
+ transcription = processor.batch_decode(predicted_ids)[0]
24
+ return transcription.lower()
25
+
26
+ iface = gr.Interface(
27
+ fn=transcribe,
28
+ inputs=gr.Audio(type="filepath", label="🎙️ Somali Audio"),
29
+ outputs=gr.Text(label="📄 Transcription"),
30
+ title="Tacab Somali ASR",
31
+ description="Speak Somali and get transcription back!",
32
+ )
33
+
34
+ iface.launch(server_name="0.0.0.0")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers>=4.38.0
2
+ torch>=2.1.0
3
+ torchaudio>=2.1.0
4
+ gradio>=4.0.0