Spaces:

peterkros
/

transcribeapi

Sleeping

peterkros commited on Aug 20, 2024

Commit

4ec81f7

verified ·

1 Parent(s): d2753e9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
 import torch
 # Load Whisper model and processor from Hugging Face
 model_name = "openai/whisper-large-v3"
@@ -12,9 +13,12 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 # Function to handle transcription with language set to English by default
-def transcribe(audio):
-    # Load audio
-    input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
     # Generate transcription with attention_mask and correct input_features
     attention_mask = torch.ones(input_features.shape, dtype=torch.long, device=device)

 import gradio as gr
 from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
 import torch
+import soundfile as sf
 # Load Whisper model and processor from Hugging Face
 model_name = "openai/whisper-large-v3"
 model.to(device)
 # Function to handle transcription with language set to English by default
+def transcribe(audio_path):
+    # Load audio from file
+    audio, sampling_rate = sf.read(audio_path)
+    # Process the audio to get input features
+    input_features = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
     # Generate transcription with attention_mask and correct input_features
     attention_mask = torch.ones(input_features.shape, dtype=torch.long, device=device)