whisper-zero-shot-audio-classification

Runtime error

mskov commited on Sep 6, 2023

Commit

24a2384

1 Parent(s): 0aa0d62

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,20 +4,21 @@ import gradio as gr
 import whisper
 from whisper.tokenizer import get_tokenizer
 import classify
 model_cache = {}
 def zero_shot_classify(audio_path: str, class_names: str, model_name: str) -> Dict[str, float]:
     class_names = class_names.split(",")
-    # Specify the path to your fine-tuned model and configuration
-    model_path = "mskov/whisper-small-esc50"
-    # Load the model
-    model = whisper.Whisper.load(model_path)
-    # Load the tokenizer
-    tokenizer = whisper.tokenizer.get_tokenizer(multilingual=".en" not in model.config.name)
     internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
         model=model,
@@ -36,6 +37,7 @@ def zero_shot_classify(audio_path: str, class_names: str, model_name: str) -> Di
     return {class_name: score for class_name, score in zip(class_names, scores)}
 def main():
     CLASS_NAMES = "[dog barking],[helicopter whirring],[laughing],[birds chirping],[clock ticking],[popping],[sneezing],[sigh],[slurping],[mouth sounds],[clearing thoat],"
     AUDIO_PATHS = [
@@ -69,7 +71,7 @@ def main():
             gr.Audio(label="Input Audio",show_label=False,source="microphone",type="filepath"),
             gr.Textbox(lines=1, label="Candidate class names (comma-separated)"),
             gr.Radio(
-                choices=["whisper-small-esc50"],
                 value="small",
                 label="Model Name",
             ),

 import whisper
 from whisper.tokenizer import get_tokenizer
 import classify
+from transformers import AutoFeatureExtractor, WhisperForAudioClassification
+from datasets import load_dataset
 model_cache = {}
 def zero_shot_classify(audio_path: str, class_names: str, model_name: str) -> Dict[str, float]:
     class_names = class_names.split(",")
+    tokenizer = get_tokenizer(multilingual=".en" not in model_name)
+    if model_name not in model_cache:
+        model = whisper.load_model(model_name)
+        model_cache[model_name] = model
+    else:
+        model = model_cache[model_name]
     internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
         model=model,
     return {class_name: score for class_name, score in zip(class_names, scores)}
 def main():
     CLASS_NAMES = "[dog barking],[helicopter whirring],[laughing],[birds chirping],[clock ticking],[popping],[sneezing],[sigh],[slurping],[mouth sounds],[clearing thoat],"
     AUDIO_PATHS = [
             gr.Audio(label="Input Audio",show_label=False,source="microphone",type="filepath"),
             gr.Textbox(lines=1, label="Candidate class names (comma-separated)"),
             gr.Radio(
+                choices=["tiny", "base", "small", "medium", "large"],
                 value="small",
                 label="Model Name",
             ),