Spaces:

Deepakkori45
/

AudioBot

Sleeping

App Files Files Community

Deepakkori45 commited on Feb 2

Commit

9759e24

verified ·

1 Parent(s): 2fcb804

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -1

app.py CHANGED Viewed

@@ -50,18 +50,41 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
     status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
     return chunks
 def transcribe(audio_file):
     """
     Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
     This uses librosa to load and resample the audio as required.
     """
     # Load audio with librosa at 16kHz (as required by Whisper)
     speech, sr = librosa.load(audio_file, sr=16000)
     input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
-    predicted_ids = model.generate(input_features)
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
     return transcription
 def transcribe_chunk(chunk, index, min_length_ms=100):
     """
     Transcribe an individual audio chunk.

     status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
     return chunks
+# def transcribe(audio_file):
+#     """
+#     Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
+#     This uses librosa to load and resample the audio as required.
+#     """
+#     # Load audio with librosa at 16kHz (as required by Whisper)
+#     speech, sr = librosa.load(audio_file, sr=16000)
+#     input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
+#     predicted_ids = model.generate(input_features)
+#     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+#     return transcription
 def transcribe(audio_file):
     """
     Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
     This uses librosa to load and resample the audio as required.
+    The transcription is forced to be in English.
+    Args:
+        audio_file (str): Path to the audio file.
+    Returns:
+        str: Transcribed text in English.
     """
     # Load audio with librosa at 16kHz (as required by Whisper)
     speech, sr = librosa.load(audio_file, sr=16000)
     input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
+    # Force the transcription output to be in English:
+    forced_ids = processor.get_decoder_prompt_ids(language="en", task="transcribe")
+    predicted_ids = model.generate(input_features, forced_decoder_ids=forced_ids)
     transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
     return transcription
 def transcribe_chunk(chunk, index, min_length_ms=100):
     """
     Transcribe an individual audio chunk.