Anushkabhat9 commited on
Commit
0c00dfa
·
verified ·
1 Parent(s): 5abec50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -23
app.py CHANGED
@@ -1,34 +1,31 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import pipeline
4
- from datasets import load_dataset
5
 
6
- # device = "cuda:0" if torch.cuda.is_available() else "cpu"
7
-
8
- # ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
9
- # sample = ds[0]["audio"]
10
-
11
- def transcribe_audio(sample):
12
- pipe = pipeline(
13
- "automatic-speech-recognition",
14
- model="openai/whisper-small",
15
- chunk_length_s=30,
16
- )
17
- prediction = pipe(sample.copy(), batch_size=8)["text"]
18
- #prediction = pipe(sample, batch_size=8, return_timestamps=True)["chunks"]
19
- return prediction
20
-
21
- # we can also return timestamps for the predictions
22
-
23
 
 
 
 
 
 
 
 
 
 
24
 
 
25
  interface = gr.Interface(
26
  fn=transcribe_audio, # The function to be applied to the audio input
27
  inputs=gr.Audio(type="filepath"), # Users can record or upload audio
28
- outputs="text", # The output is the transcription (text)
29
  title="Whisper Small ASR", # Title of your app
30
- description="Transcription using Whisper Small." # Description of your app
31
  )
32
 
33
- # **This line starts the Gradio app**
34
- interface.launch()
 
1
  import gradio as gr
 
2
  from transformers import pipeline
 
3
 
4
+ # Initialize the Whisper ASR pipeline (Whisper Small model)
5
+ pipe = pipeline(
6
+ "automatic-speech-recognition",
7
+ model="openai/whisper-small",
8
+ chunk_length_s=30,
9
+ )
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Define the transcription function for audio input
12
+ def transcribe_audio(audio):
13
+ # Transcribe the uploaded or recorded audio
14
+ prediction = pipe(audio, batch_size=8, return_timestamps=True)["chunks"]
15
+
16
+ # Format the output to show text with timestamps
17
+ transcription = "\n".join([f"[{chunk['timestamp'][0]:.2f}s - {chunk['timestamp'][1]:.2f}s] {chunk['text']}" for chunk in prediction])
18
+
19
+ return transcription
20
 
21
+ # Create a Gradio interface
22
  interface = gr.Interface(
23
  fn=transcribe_audio, # The function to be applied to the audio input
24
  inputs=gr.Audio(type="filepath"), # Users can record or upload audio
25
+ outputs="text", # The output is the transcription (text with timestamps)
26
  title="Whisper Small ASR", # Title of your app
27
+ description="Upload or record audio for transcription using Whisper Small." # Description of your app
28
  )
29
 
30
+ # Launch the Gradio app
31
+ interface.launch()