mskov commited on
Commit
fc004ff
·
1 Parent(s): 147b7bc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install git+https://github.com/openai/whisper.git")
3
+ import whisper
4
+ import evaluate
5
+ from evaluate.utils import launch_gradio_widget
6
+ import gradio as gr
7
+ import torch
8
+ import pandas as pd
9
+ import random
10
+ import classify
11
+ import replace_explitives
12
+ from whisper.model import Whisper
13
+ from whisper.tokenizer import get_tokenizer
14
+ from speechbrain.pretrained.interfaces import foreign_class
15
+ from transformers import AutoModelForSequenceClassification, pipeline, WhisperTokenizer, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer
16
+
17
+
18
+ # pull in emotion detection
19
+ # --- Add element for specification
20
+ # pull in text classification
21
+ # --- Add custom labels
22
+ # --- Associate labels with radio elements
23
+ # add logic to initiate mock notificaiton when detected
24
+ # pull in misophonia-specific model
25
+
26
+ model_cache = {}
27
+
28
+ # Building prediction function for gradio
29
+ emo_dict = {
30
+ 'sad': 'Sad',
31
+ 'hap': 'Happy',
32
+ 'ang': 'Anger',
33
+ 'neu': 'Neutral'
34
+ }
35
+
36
+ # static classes for now, but it would be best ot have the user select from multiple, and to enter their own
37
+ class_options = {
38
+ "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
39
+ }
40
+
41
+ pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
42
+
43
+ def classify_emotion(audio):
44
+ #### Emotion classification ####
45
+ emotion_classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
46
+ out_prob, score, index, text_lab = emotion_classifier.classify_file(audio)
47
+ return emo_dict[text_lab[0]]
48
+
49
+ def slider_logic(slider):
50
+ threshold = 0
51
+ if slider == 1:
52
+ threshold = .98
53
+ elif slider == 2:
54
+ threshold = .88
55
+ elif slider == 3:
56
+ threshold = .77
57
+ elif slider == 4:
58
+ threshold = .66
59
+ elif slider == 5:
60
+ threshold = .55
61
+ else:
62
+ threshold = []
63
+ return threshold
64
+
65
+ # Create a Gradio interface with audio file and text inputs
66
+ def classify_toxicity(audio_file, slider):
67
+ # Transcribe the audio file using Whisper ASR
68
+ if audio_file != None:
69
+ transcribed_text = pipe(audio_file)["text"]
70
+ else:
71
+ transcribed_text = text_input
72
+
73
+ threshold = slider_logic(slider)
74
+ model = whisper.load_model("large")
75
+ # model = model_cache[model_name]
76
+ # class_names = classify_anxiety.split(",")
77
+ classify_anxiety = "misophonia"
78
+ class_names_list = class_options.get(classify_anxiety, [])
79
+ class_str = ""
80
+ for elm in class_names_list:
81
+ class_str += elm + ","
82
+ #class_names = class_names_temp.split(",")
83
+ class_names = class_str.split(",")
84
+ print("class names ", class_names, "classify_anxiety ", classify_anxiety)
85
+
86
+ tokenizer = get_tokenizer("large")
87
+ # tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large")
88
+
89
+ internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
90
+ model=model,
91
+ class_names=class_names,
92
+ # class_names=classify_anxiety,
93
+ tokenizer=tokenizer,
94
+ )
95
+ audio_features = classify.calculate_audio_features(audio_file, model)
96
+ average_logprobs = classify.calculate_average_logprobs(
97
+ model=model,
98
+ audio_features=audio_features,
99
+ class_names=class_names,
100
+ tokenizer=tokenizer,
101
+ )
102
+ average_logprobs -= internal_lm_average_logprobs
103
+ scores = average_logprobs.softmax(-1).tolist()
104
+ holder1 = {class_name: score for class_name, score in zip(class_names, scores)}
105
+ # miso_label_dict = {label: score for label, score in classify_anxiety[0].items()}
106
+ holder2 = ""
107
+ holder3= " "
108
+ return {class_name: score for class_name, score in zip(class_names, scores)}
109
+
110
+ def positive_affirmations():
111
+ affirmations = [
112
+ "I have survived my anxiety before and I will survive again now",
113
+ "I am not in danger; I am just uncomfortable; this too will pass",
114
+ "I forgive and release the past and look forward to the future",
115
+ "I can't control what other people say but I can control my breathing and my response"
116
+ ]
117
+ selected_affirm = random.choice(affirmations)
118
+ return selected_affirm
119
+
120
+ with gr.Blocks() as iface:
121
+ show_state = gr.State([])
122
+ with gr.Column():
123
+ sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")
124
+ with gr.Column():
125
+ aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
126
+ submit_btn = gr.Button(label="Run")
127
+ with gr.Column():
128
+ # out_val = gr.Textbox()
129
+ out_class = gr.Label()
130
+ submit_btn.click(fn=classify_toxicity, inputs=[aud_input, sense_slider], outputs=out_class)
131
+
132
+ iface.launch()