peterkros commited on
Commit
4ec81f7
·
verified ·
1 Parent(s): d2753e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
3
  import torch
 
4
 
5
  # Load Whisper model and processor from Hugging Face
6
  model_name = "openai/whisper-large-v3"
@@ -12,9 +13,12 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model.to(device)
13
 
14
  # Function to handle transcription with language set to English by default
15
- def transcribe(audio):
16
- # Load audio
17
- input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
 
 
 
18
 
19
  # Generate transcription with attention_mask and correct input_features
20
  attention_mask = torch.ones(input_features.shape, dtype=torch.long, device=device)
 
1
  import gradio as gr
2
  from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
3
  import torch
4
+ import soundfile as sf
5
 
6
  # Load Whisper model and processor from Hugging Face
7
  model_name = "openai/whisper-large-v3"
 
13
  model.to(device)
14
 
15
  # Function to handle transcription with language set to English by default
16
+ def transcribe(audio_path):
17
+ # Load audio from file
18
+ audio, sampling_rate = sf.read(audio_path)
19
+
20
+ # Process the audio to get input features
21
+ input_features = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
22
 
23
  # Generate transcription with attention_mask and correct input_features
24
  attention_mask = torch.ones(input_features.shape, dtype=torch.long, device=device)