Spaces:

ixxan
/

uyghur-speech-models

Sleeping

ixxan commited on Nov 24, 2024

Commit

64601f3

verified ·

1 Parent(s): f23608f

Update asr.py

Files changed (1) hide show

asr.py CHANGED Viewed

@@ -6,16 +6,16 @@ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
 processor = AutoProcessor.from_pretrained("ixxan/whisper-small-ug-cv-15")
 model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-ug-cv-15")
-def transcribe(audio_path: str) -> str:
     """
     Transcribes audio to text using the Whisper model for Uyghur.
     Args:
-    - audio_path (str): Path to the audio file to transcribe.
     Returns:
     - str: The transcription of the audio.
     """
     # Load audio file
     audio_input, sampling_rate = torchaudio.load(audio_path)

 processor = AutoProcessor.from_pretrained("ixxan/whisper-small-ug-cv-15")
 model = AutoModelForSpeechSeq2Seq.from_pretrained("ixxan/whisper-small-ug-cv-15")
+def transcribe(audio_data: tuple) -> str:
     """
     Transcribes audio to text using the Whisper model for Uyghur.
     Args:
+    - audio_data (tuple): Gradio audio input (file path and sample rate).
     Returns:
     - str: The transcription of the audio.
     """
+    audio_path = audio_data[0]  # Extract the file path from the tuple
     # Load audio file
     audio_input, sampling_rate = torchaudio.load(audio_path)