Spaces:

mskov
/

Misophonia_Trigger_Detection

Runtime error

App Files Files Community

mskov commited on Sep 21, 2023

Commit

fc004ff

1 Parent(s): 147b7bc

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+os.system("pip install git+https://github.com/openai/whisper.git")
+import whisper
+import evaluate
+from evaluate.utils import launch_gradio_widget
+import gradio as gr
+import torch
+import pandas as pd
+import random
+import classify
+import replace_explitives
+from whisper.model import Whisper
+from whisper.tokenizer import get_tokenizer
+from speechbrain.pretrained.interfaces import foreign_class
+from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer
+# pull in emotion detection
+# --- Add element for specification
+# pull in text classification
+# --- Add custom labels
+# --- Associate labels with radio elements
+# add logic to initiate mock notificaiton when detected
+# pull in misophonia-specific model
+model_cache = {}
+# Building prediction function for gradio
+emo_dict = {
+    'sad': 'Sad',
+    'hap': 'Happy',
+    'ang': 'Anger',
+    'neu': 'Neutral'
+}
+# static classes for now, but it would be best ot have the user select from multiple, and to enter their own
+class_options = {
+    "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
+}
+pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
+def classify_emotion(audio):
+    #### Emotion classification ####
+    emotion_classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
+    out_prob, score, index, text_lab = emotion_classifier.classify_file(audio)
+    return  emo_dict[text_lab[0]]
+def slider_logic(slider):
+    threshold = 0
+    if slider == 1:
+        threshold = .98
+    elif slider == 2:
+        threshold = .88
+    elif slider == 3:
+        threshold = .77
+    elif slider == 4:
+        threshold = .66
+    elif slider == 5:
+        threshold = .55
+    else:
+        threshold = []
+    return threshold
+# Create a Gradio interface with audio file and text inputs
+def classify_toxicity(audio_file, slider):
+    # Transcribe the audio file using Whisper ASR
+    if audio_file != None:
+        transcribed_text = pipe(audio_file)["text"]
+    else:
+        transcribed_text = text_input
+    threshold = slider_logic(slider)
+    model = whisper.load_model("large")
+    # model = model_cache[model_name]
+    # class_names = classify_anxiety.split(",")
+    classify_anxiety = "misophonia"
+    class_names_list = class_options.get(classify_anxiety, [])
+    class_str = ""
+    for elm in class_names_list:
+        class_str += elm + ","
+    #class_names = class_names_temp.split(",")
+    class_names = class_str.split(",")
+    print("class names ", class_names, "classify_anxiety ", classify_anxiety)
+    tokenizer = get_tokenizer("large")
+    # tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large")
+    internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
+        model=model,
+        class_names=class_names,
+        # class_names=classify_anxiety,
+        tokenizer=tokenizer,
+    )
+    audio_features = classify.calculate_audio_features(audio_file, model)
+    average_logprobs = classify.calculate_average_logprobs(
+        model=model,
+        audio_features=audio_features,
+        class_names=class_names,
+        tokenizer=tokenizer,
+    )
+    average_logprobs -= internal_lm_average_logprobs
+    scores = average_logprobs.softmax(-1).tolist()
+    holder1 = {class_name: score for class_name, score in zip(class_names, scores)}
+    # miso_label_dict = {label: score for label, score in classify_anxiety[0].items()}
+    holder2 = ""
+    holder3= " "
+    return {class_name: score for class_name, score in zip(class_names, scores)}
+def positive_affirmations():
+    affirmations = [
+        "I have survived my anxiety before and I will survive again now",
+        "I am not in danger; I am just uncomfortable; this too will pass",
+        "I forgive and release the past and look forward to the future",
+        "I can't control what other people say but I can control my breathing and my response"
+    ]
+    selected_affirm = random.choice(affirmations)
+    return selected_affirm
+with gr.Blocks() as iface:
+    show_state = gr.State([])
+    with gr.Column():
+        sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")
+    with gr.Column():
+        aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
+        submit_btn = gr.Button(label="Run")
+    with gr.Column():
+        # out_val = gr.Textbox()
+        out_class = gr.Label()
+    submit_btn.click(fn=classify_toxicity, inputs=[aud_input, sense_slider], outputs=out_class)
+iface.launch()