Spaces:

Anushkabhat9
/

DeepLearning

Sleeping

Anushkabhat9 commited on Sep 26, 2024

Commit

0c00dfa

verified ·

1 Parent(s): 5abec50

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,31 @@
 import gradio as gr
-import torch
 from transformers import pipeline
-from datasets import load_dataset
-# device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
-# sample = ds[0]["audio"]
-def transcribe_audio(sample):
- pipe = pipeline(
-  "automatic-speech-recognition",
-  model="openai/whisper-small",
-  chunk_length_s=30,
- )
- prediction = pipe(sample.copy(), batch_size=8)["text"]
-#prediction = pipe(sample, batch_size=8, return_timestamps=True)["chunks"]
- return prediction
-# we can also return timestamps for the predictions
 interface = gr.Interface(
     fn=transcribe_audio,  # The function to be applied to the audio input
     inputs=gr.Audio(type="filepath"),  # Users can record or upload audio
-    outputs="text",  # The output is the transcription (text)
     title="Whisper Small ASR",  # Title of your app
-    description="Transcription using Whisper Small."  # Description of your app
 )
-# **This line starts the Gradio app**
-interface.launch()

 import gradio as gr
 from transformers import pipeline
+# Initialize the Whisper ASR pipeline (Whisper Small model)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-small",
+    chunk_length_s=30,
+)
+# Define the transcription function for audio input
+def transcribe_audio(audio):
+    # Transcribe the uploaded or recorded audio
+    prediction = pipe(audio, batch_size=8, return_timestamps=True)["chunks"]
+    # Format the output to show text with timestamps
+    transcription = "\n".join([f"[{chunk['timestamp'][0]:.2f}s - {chunk['timestamp'][1]:.2f}s] {chunk['text']}" for chunk in prediction])
+    return transcription
+# Create a Gradio interface
 interface = gr.Interface(
     fn=transcribe_audio,  # The function to be applied to the audio input
     inputs=gr.Audio(type="filepath"),  # Users can record or upload audio
+    outputs="text",  # The output is the transcription (text with timestamps)
     title="Whisper Small ASR",  # Title of your app
+    description="Upload or record audio for transcription using Whisper Small."  # Description of your app
 )
+# Launch the Gradio app
+interface.launch()