Spaces:
Running
on
Zero
Running
on
Zero
Update whisper.py
Browse files- whisper.py +3 -3
whisper.py
CHANGED
@@ -210,8 +210,8 @@ def processing_vad_threshold(audio, output_vad, threshold, max_duration, concate
|
|
210 |
|
211 |
return(transcription_audio)
|
212 |
|
213 |
-
def format_audio(
|
214 |
-
input_audio, sample_rate = torchaudio.load(
|
215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
216 |
input_audio = resampler(input_audio)
|
217 |
input_audio = input_audio.squeeze().numpy()
|
@@ -232,7 +232,7 @@ def generate(audio_path, use_v5):
|
|
232 |
output = processing_vad_threshold(audio, output_vad, threshold, max_duration, concatenated_segment)
|
233 |
else:
|
234 |
task = "transcribe"
|
235 |
-
output = transcribe_pipeline(format_audio(
|
236 |
|
237 |
clean_output = post_process_transcription(output, max_repeats=1)
|
238 |
|
|
|
210 |
|
211 |
return(transcription_audio)
|
212 |
|
213 |
+
def format_audio(audio_path):
|
214 |
+
input_audio, sample_rate = torchaudio.load(audio_path)
|
215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
216 |
input_audio = resampler(input_audio)
|
217 |
input_audio = input_audio.squeeze().numpy()
|
|
|
232 |
output = processing_vad_threshold(audio, output_vad, threshold, max_duration, concatenated_segment)
|
233 |
else:
|
234 |
task = "transcribe"
|
235 |
+
output = transcribe_pipeline(format_audio(audio_path), task)
|
236 |
|
237 |
clean_output = post_process_transcription(output, max_repeats=1)
|
238 |
|