Spaces:

DarwinAnim8or
/

Whisper-Demo

Sleeping

DarwinAnim8or commited on Aug 10, 2023

Commit

253867d

1 Parent(s): 02ad7fc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,24 @@
 import gradio as gr
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, WhisperForConditionalGeneration, WhisperProcessor
 # Load the model and processor
 model_id = "openai/whisper-medium"
-processor = WhisperProcessor.from_pretrained(model_id)
-model = WhisperForConditionalGeneration.from_pretrained(model_id)
-model.config.forced_decoder_ids = None
 def transcribelocal(microphone, file_upload):
   # Check which input is not None
@@ -15,18 +27,8 @@ def transcribelocal(microphone, file_upload):
   else:
     audio = file_upload
-  # Use the processor to transcribe the audio
-  transcription = processor.transcribe(audio, 48)
-  # Extract the confidence score and the duration from the transcription
-  confidence = transcription.confidence
-  duration = transcription.duration
-  # Remove the special tokens from the transcription text
-  text = transcription.text.replace("<|startoftranscript|>", "").replace("<|endoftranscript|>", "")
-  # Return the text, confidence and duration as outputs
-  return text, confidence, duration
 # Create a Gradio interface with two modes: realtime and file upload
 iface = gr.Interface(

 import gradio as gr
+from transformers import pipeline
 # Load the model and processor
 model_id = "openai/whisper-medium"
+device = 0 if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 8
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=model_id,
+    chunk_length_s=30,
+    device=device,
+)
+def transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+    return  text
 def transcribelocal(microphone, file_upload):
   # Check which input is not None
   else:
     audio = file_upload
+  text = pipe(audio, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+  return text
 # Create a Gradio interface with two modes: realtime and file upload
 iface = gr.Interface(