Spaces:

peterkros
/

transcribeapi

Sleeping

peterkros commited on Aug 20, 2024

Commit

d2753e9

verified ·

1 Parent(s): 9e71ecb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,32 @@
 import gradio as gr
-from transformers import pipeline
-# Load Whisper model from Hugging Face
-# This uses the `transformers` library's pipeline to load the model
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 def transcribe(audio):
-    # Transcribe the audio using the Whisper model
-    result = transcriber(audio)["text"]
-    return result
 # Create a Gradio Interface
 interface = gr.Interface(
@@ -20,4 +38,4 @@ interface = gr.Interface(
 )
 # Launch the interface as an API
-interface.launch()

 import gradio as gr
+from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
+import torch
+# Load Whisper model and processor from Hugging Face
+model_name = "openai/whisper-large-v3"
+processor = WhisperProcessor.from_pretrained(model_name)
+model = WhisperForConditionalGeneration.from_pretrained(model_name)
+# Ensure the model is using the correct device (GPU or CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+# Function to handle transcription with language set to English by default
 def transcribe(audio):
+    # Load audio
+    input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
+    # Generate transcription with attention_mask and correct input_features
+    attention_mask = torch.ones(input_features.shape, dtype=torch.long, device=device)
+    generated_ids = model.generate(
+        input_features=input_features,
+        attention_mask=attention_mask,
+        language="en"  # Force translation to English
+    )
+    # Decode transcription
+    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return transcription
 # Create a Gradio Interface
 interface = gr.Interface(
 )
 # Launch the interface as an API
+interface.launch()