nurfarah57 commited on
Commit
12f9c3c
Β·
verified Β·
1 Parent(s): ed877fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -3,14 +3,16 @@ import torchaudio
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import gradio as gr
5
 
 
6
  model = Wav2Vec2ForCTC.from_pretrained("tacab/tacab_asr_somali")
7
  processor = Wav2Vec2Processor.from_pretrained("tacab/tacab_asr_somali")
8
 
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
  model.to(device)
11
 
12
- def transcribe(audio):
13
- waveform, sample_rate = torchaudio.load(audio)
 
14
  if sample_rate != 16000:
15
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
16
  if waveform.shape[0] > 1:
@@ -23,12 +25,14 @@ def transcribe(audio):
23
  transcription = processor.batch_decode(predicted_ids)[0]
24
  return transcription.lower()
25
 
 
26
  iface = gr.Interface(
27
  fn=transcribe,
28
- inputs=gr.Audio(type="filepath", label="πŸŽ™οΈ Ku hadal Af Soomaali"),
29
- outputs=gr.Text(label="πŸ“„ Qoraalka la helay"),
30
- title="Tacab ASR Somali",
31
- description="ASR model for Somali speech-to-text using Wav2Vec2.",
32
  )
33
 
34
- iface.launch(server_name="0.0.0.0") # πŸ”₯ This is the key fix
 
 
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import gradio as gr
5
 
6
+ # Load model and processor
7
  model = Wav2Vec2ForCTC.from_pretrained("tacab/tacab_asr_somali")
8
  processor = Wav2Vec2Processor.from_pretrained("tacab/tacab_asr_somali")
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model.to(device)
12
 
13
+ # Transcription function
14
+ def transcribe(audio_path):
15
+ waveform, sample_rate = torchaudio.load(audio_path)
16
  if sample_rate != 16000:
17
  waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
18
  if waveform.shape[0] > 1:
 
25
  transcription = processor.batch_decode(predicted_ids)[0]
26
  return transcription.lower()
27
 
28
+ # Setup Gradio Interface
29
  iface = gr.Interface(
30
  fn=transcribe,
31
+ inputs=gr.Audio(type="filepath", label="πŸŽ™οΈ Somali Audio"),
32
+ outputs=gr.Text(label="πŸ“„ Transcription"),
33
+ title="Tacab Somali ASR",
34
+ description="Speak Somali and get transcription back!",
35
  )
36
 
37
+ # βœ… Critical: This exposes /api/predict
38
+ iface.launch(server_name="0.0.0.0")