Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -50,18 +50,41 @@ def split_audio_on_silence(audio_file_path, min_silence_len=500, silence_thresh=
|
|
50 |
status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
|
51 |
return chunks
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def transcribe(audio_file):
|
54 |
"""
|
55 |
Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
|
56 |
This uses librosa to load and resample the audio as required.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
"""
|
58 |
# Load audio with librosa at 16kHz (as required by Whisper)
|
59 |
speech, sr = librosa.load(audio_file, sr=16000)
|
60 |
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
|
61 |
-
|
|
|
|
|
62 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
63 |
return transcription
|
64 |
|
|
|
|
|
65 |
def transcribe_chunk(chunk, index, min_length_ms=100):
|
66 |
"""
|
67 |
Transcribe an individual audio chunk.
|
|
|
50 |
status_placeholder.info(f"Audio split into {len(chunks)} chunks.")
|
51 |
return chunks
|
52 |
|
53 |
+
# def transcribe(audio_file):
|
54 |
+
# """
|
55 |
+
# Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
|
56 |
+
# This uses librosa to load and resample the audio as required.
|
57 |
+
# """
|
58 |
+
# # Load audio with librosa at 16kHz (as required by Whisper)
|
59 |
+
# speech, sr = librosa.load(audio_file, sr=16000)
|
60 |
+
# input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
|
61 |
+
# predicted_ids = model.generate(input_features)
|
62 |
+
# transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
63 |
+
# return transcription
|
64 |
+
|
65 |
def transcribe(audio_file):
|
66 |
"""
|
67 |
Transcribe an audio file using the locally loaded Whisper model from Hugging Face.
|
68 |
This uses librosa to load and resample the audio as required.
|
69 |
+
The transcription is forced to be in English.
|
70 |
+
|
71 |
+
Args:
|
72 |
+
audio_file (str): Path to the audio file.
|
73 |
+
|
74 |
+
Returns:
|
75 |
+
str: Transcribed text in English.
|
76 |
"""
|
77 |
# Load audio with librosa at 16kHz (as required by Whisper)
|
78 |
speech, sr = librosa.load(audio_file, sr=16000)
|
79 |
input_features = processor(speech, sampling_rate=16000, return_tensors="pt").input_features
|
80 |
+
# Force the transcription output to be in English:
|
81 |
+
forced_ids = processor.get_decoder_prompt_ids(language="en", task="transcribe")
|
82 |
+
predicted_ids = model.generate(input_features, forced_decoder_ids=forced_ids)
|
83 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
84 |
return transcription
|
85 |
|
86 |
+
|
87 |
+
|
88 |
def transcribe_chunk(chunk, index, min_length_ms=100):
|
89 |
"""
|
90 |
Transcribe an individual audio chunk.
|