Spaces:

demavior
/

whisper_gradio

Sleeping

demavior commited on Sep 30, 2024

Commit

42aa5ee

verified ·

1 Parent(s): bbc5e4c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,41 +1,36 @@
-import os
-os.system('pip install soundfile')
 import gradio as gr
 from transformers import pipeline
 import torch
-import soundfile as sf
-import io
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 def transcribe(audio):
     # Extract the audio data from the tuple
     audio_data = audio[0] if isinstance(audio, tuple) else audio
-    # Convert the audio data to flac format
-    audio_flac = io.BytesIO()
-    sf.write(audio_flac, audio_data, 16000, format='flac')  # Assuming a sample rate of 16000 Hz
-    audio_flac.seek(0)  # Reset the pointer to the beginning of the BytesIO object
     pipe = pipeline(
-      "automatic-speech-recognition",
-      model="openai/whisper-small",
-      chunk_length_s=30,
-      device=device,
     )
-    prediction = pipe(audio_flac, batch_size=8)["text"]
     return prediction
 gradio_app = gr.Interface(
-    transcribe,
-    inputs=gr.Audio(label="Input"),#sources=['audio'], type="pil"),
-    outputs=gr.Textbox(label="Result"),# gr.Label(label="Result", num_top_classes=2)],
     title="Transcribed",
 )
 if __name__ == "__main__":
-    gradio_app.launch()

 import gradio as gr
 from transformers import pipeline
 import torch
+import torchaudio
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 def transcribe(audio):
     # Extract the audio data from the tuple
     audio_data = audio[0] if isinstance(audio, tuple) else audio
+    # Load the audio data using torchaudio
+    waveform, sample_rate = torchaudio.load(audio_data)
+    # Convert the waveform to numpy array
+    waveform_np = waveform.numpy()
     pipe = pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-small",
+        chunk_length_s=30,
+        device=device,
     )
+    prediction = pipe(waveform_np)["text"]
     return prediction
 gradio_app = gr.Interface(
+    fn=transcribe,
+    inputs=gr.Audio(label="Input"),
+    outputs=gr.Textbox(label="Result"),
     title="Transcribed",
 )
 if __name__ == "__main__":
+    gradio_app.launch()