ixxan commited on
Commit
7a0f405
·
verified ·
1 Parent(s): 64601f3

Update asr.py

Browse files
Files changed (1) hide show
  1. asr.py +18 -4
asr.py CHANGED
@@ -1,24 +1,38 @@
1
  import torchaudio
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 
4
 
5
  # Load processor and model
6
  processor = AutoProcessor.from_pretrained("ixxan/whisper-small-ug-cv-15")
7
  model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-ug-cv-15")
8
 
9
 
10
- def transcribe(audio_data: tuple) -> str:
11
  """
12
  Transcribes audio to text using the Whisper model for Uyghur.
13
  Args:
14
- - audio_data (tuple): Gradio audio input (file path and sample rate).
15
  Returns:
16
  - str: The transcription of the audio.
17
  """
18
- audio_path = audio_data[0] # Extract the file path from the tuple
19
 
20
  # Load audio file
21
- audio_input, sampling_rate = torchaudio.load(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Resample if needed
24
  if sampling_rate != processor.feature_extractor.sampling_rate:
 
1
  import torchaudio
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
4
+ import numpy as np
5
 
6
  # Load processor and model
7
  processor = AutoProcessor.from_pretrained("ixxan/whisper-small-ug-cv-15")
8
  model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-ug-cv-15")
9
 
10
 
11
+ def transcribe(audio_data) -> str:
12
  """
13
  Transcribes audio to text using the Whisper model for Uyghur.
14
  Args:
15
+ - audio_data: Gradio audio input
16
  Returns:
17
  - str: The transcription of the audio.
18
  """
 
19
 
20
  # Load audio file
21
+ if not audio_data:
22
+ return "<<ERROR: Empty Audio Input>>"
23
+
24
+ if isinstance(audio_data, tuple):
25
+ # microphone
26
+ sampling_rate, audio_input = audio_data
27
+ audio_input = (audio_input / 32768.0).astype(np.float32)
28
+
29
+ if isinstance(audio_data, str):
30
+ # file upload
31
+ audio_input, sampling_rate = torchaudio.load(audio_data)
32
+
33
+ else:
34
+ return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data))
35
+
36
 
37
  # Resample if needed
38
  if sampling_rate != processor.feature_extractor.sampling_rate: