Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
3 |
-
import
|
4 |
import soundfile as sf
|
5 |
|
6 |
# Load Whisper model and processor
|
@@ -15,8 +15,13 @@ def transcribe_and_analyze(audio_path):
|
|
15 |
# Load audio from the provided file
|
16 |
audio, sample_rate = sf.read(audio_path)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
# Process audio with Whisper
|
19 |
-
input_features = processor(audio, sampling_rate=
|
20 |
predicted_ids = model.generate(input_features)
|
21 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
22 |
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
|
3 |
+
import torchaudio
|
4 |
import soundfile as sf
|
5 |
|
6 |
# Load Whisper model and processor
|
|
|
15 |
# Load audio from the provided file
|
16 |
audio, sample_rate = sf.read(audio_path)
|
17 |
|
18 |
+
# Resample audio to 16000 Hz if necessary
|
19 |
+
if sample_rate != 16000:
|
20 |
+
audio_tensor = torchaudio.functional.resample(torch.tensor(audio), orig_freq=sample_rate, new_freq=16000)
|
21 |
+
audio = audio_tensor.numpy() # Convert back to numpy array
|
22 |
+
|
23 |
# Process audio with Whisper
|
24 |
+
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
|
25 |
predicted_ids = model.generate(input_features)
|
26 |
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
27 |
|