whisper-small-indonesia-demo

Sleeping

App Files Files Community

EwoutLagendijk commited on 11 days ago

Commit

efa7028

•

1 Parent(s): cd7c511

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -28

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import torch
-from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
 import librosa
@@ -9,34 +9,14 @@ BATCH_SIZE = 8
 device = 0 if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="automatic-speech-recognition",
-    model=MODEL_NAME,
-    chunk_length_s=30,
-    device=device,
-)
-# Copied from https://github.com/openai/whisper/blob/c09a7ae299c4c34c5839a76380ae407e7d785914/whisper/utils.py#L50
-def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = "."):
-    if seconds is not None:
-        milliseconds = round(seconds * 1000.0)
-        hours = milliseconds // 3_600_000
-        milliseconds -= hours * 3_600_000
-        minutes = milliseconds // 60_000
-        milliseconds -= minutes * 60_000
-        seconds = milliseconds // 1_000
-        milliseconds -= seconds * 1_000
-        hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
-        return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
-    else:
-        # we have a malformed timestamp so just return it as is
-        return seconds
 def transcribe_speech(filepath):
     # Load the audio
@@ -58,7 +38,8 @@ def transcribe_speech(filepath):
         generated_ids = model.generate(
             inputs,
             max_new_tokens=444,  # Max allowed by Whisper
-            forced_decoder_ids=processor.get_decoder_prompt_ids(language="id", task="transcribe")
         )
         # Decode and append the transcription

 import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import gradio as gr
 import librosa
 device = 0 if torch.cuda.is_available() else "cpu"
+# Load model and processor
+model_name = "EwoutLagendijk/whisper-small-indonesian"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
+processor = AutoProcessor.from_pretrained(model_name)
+# Update the generation config for transcription
+model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="id", task="transcribe")
 def transcribe_speech(filepath):
     # Load the audio
         generated_ids = model.generate(
             inputs,
             max_new_tokens=444,  # Max allowed by Whisper
+            forced_decoder_ids=processor.get_decoder_prompt_ids(language="id", task="transcribe"),
+            return_timestamps = True
         )
         # Decode and append the transcription