aslanovaf commited on
Commit
fc92389
β€’
1 Parent(s): bd58d09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -3
app.py CHANGED
@@ -1,7 +1,162 @@
1
  import gradio as gr
 
 
 
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- hf_token = os.environ.get('HF_TOKEN')
5
 
6
- iface = gr.load(name="aslanovaf/Sentiment_Analysis_Azerbaijani", hf_token=hf_token, src="spaces")
7
- iface.queue(api_open=False).launch(show_api=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import whisper
3
+ from transformers import pipeline
4
+ import pandas as pd, numpy as np
5
  import os
6
+ import torchaudio
7
+ import librosa
8
+ from scipy.io.wavfile import write
9
+ import shutil
10
+ import soundfile as sf
11
+ import noisereduce as nr
12
+ from scipy.stats import skew
13
+ from tqdm import tqdm
14
+ import requests
15
+ import pickle
16
+ import dash
17
+ import dash_bootstrap_components as dbc
18
+ from dash import html
19
 
20
+ sr = 8000
21
 
22
+ url = "https://huggingface.co/spaces/aslanovaf/Sentiment_Analysis_Azerbaijani/resolve/main/sentiment_model_8000.pickle"
23
+ hf_token = os.environ.get("HF_TOKEN")
24
+ headers = {"Authorization": f"Bearer {hf_token}"}
25
+
26
+ response = requests.get(url, headers=headers)
27
+ if response.status_code == 200:
28
+ model = pickle.loads(response.content)
29
+ else:
30
+ st.markdown(f"Failed to download TTS from {url} (Status code: {response.status_code})")
31
+
32
+
33
+ def split_full_audio_15_sec(audio_file):
34
+ audio, orig_sr = sf.read(audio_file)
35
+ audio = librosa.resample(y=audio, orig_sr=orig_sr, target_sr=sr)
36
+
37
+ chunk_length = 15 * sr
38
+ total_length = len(audio)
39
+ start_index = 0
40
+ end_index = min(chunk_length, total_length)
41
+ f = 0
42
+ chunks = []
43
+
44
+ while start_index < total_length:
45
+ chunk = audio[start_index:end_index]
46
+ chunk_name = f"example_{f}.wav"
47
+ chunk_duration = len(chunk)/sr
48
+ if chunk_duration<3:
49
+ break
50
+ chunks.append(chunk)
51
+ start_index = end_index
52
+ end_index = min(end_index + chunk_length, total_length)
53
+ f+=1
54
+ return chunks
55
+
56
+ def get_mfcc(name):
57
+ resampled_audio = name
58
+
59
+ try:
60
+ reduced_noise = nr.reduce_noise(resampled_audio, sr=sr)
61
+ ft1 = librosa.feature.mfcc(y=reduced_noise, sr = sr, n_mfcc=16)
62
+ ft2 = librosa.feature.zero_crossing_rate(reduced_noise)[0]
63
+ ft3 = librosa.feature.spectral_rolloff(y=reduced_noise)[0]
64
+ ft4 = librosa.feature.spectral_centroid(y=reduced_noise)[0]
65
+ ft1_trunc = np.hstack((np.mean(ft1, axis=1), np.std(ft1, axis=1), skew(ft1, axis = 1), np.max(ft1, axis = 1), np.min(ft1, axis = 1)))
66
+ ft2_trunc = np.hstack((np.mean(ft2), np.std(ft2), skew(ft2), np.max(ft2), np.min(ft2)))
67
+ ft3_trunc = np.hstack((np.mean(ft3), np.std(ft3), skew(ft3), np.max(ft3), np.min(ft3)))
68
+ ft4_trunc = np.hstack((np.mean(ft4), np.std(ft4), skew(ft4), np.max(ft4), np.min(ft4)))
69
+ return pd.Series(np.hstack((ft1_trunc, ft2_trunc, ft3_trunc, ft4_trunc)))
70
+ except:
71
+ print('bad file')
72
+ return pd.Series([0]*95)
73
+
74
+
75
+ def analyze_sentiment(audio):
76
+ chunks = split_full_audio_15_sec(audio)
77
+ chunked_df = pd.DataFrame(data={'Chunk_order': [f'Chunk_{i+1}' for i in range(len(chunks))], 'Data': chunks})
78
+ df_features = chunked_df['Data'].apply(get_mfcc)
79
+ df = pd.concat([chunked_df, df_features], axis=1)
80
+ df = df.drop(columns=['Data'])
81
+ df.columns = ['Chunk_order']+[f'Feature_{i+1}' for i in range(95)]
82
+ df['Prediction'] = model.predict(df.drop(columns=['Chunk_order']))
83
+
84
+ df['Prediction'] = df['Prediction'].map({
85
+ 'pozitive_normal':'Normal',
86
+ 'scope':'Silence',
87
+ 'neqativ':'Negative'
88
+ })
89
+
90
+ clean_df = df[['Chunk_order', 'Prediction']]
91
+ predictions = df['Prediction'].tolist()
92
+ final_prediction = 'Negative' if 'Negative' in predictions else 'Normal' if 'Normal' in predictions else 'Silence'
93
+ final_prediction_2x = 'Negative' if predictions.count('Negative')>1 else 'Normal' if 'Normal' in predictions else 'Silence'
94
+
95
+ color_map = {
96
+ 'Normal':'success',
97
+ 'Silence': 'warning',
98
+ 'Negative': 'danger'
99
+ }
100
+
101
+ return (', '.join(predictions), final_prediction)
102
+
103
+
104
+ title = """<h1 align="center">🎀 Azerbaijani Audio Speech Sentiment Analysis πŸ’¬</h1>"""
105
+ image_path = "thmbnail.jpg"
106
+ description = """
107
+ πŸ’» This demo showcases a general-purpose sentiment analysis process. It is trained on a collection of audio calls from banking/fintech industries based on audio features. The main analysis predicts one of the categories (Normal/Negative/Silence) for each 15-second bucket in the audio. The final category for the whole audio is also estimated.
108
+ <br>
109
+ βš™οΈ Components of the tool:<br>
110
+ <br>
111
+ &nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis directly of the audios.<br>
112
+ <br>
113
+ ❓ Use the microphone for real-time audio recording.<br>
114
+ ↑ Or upload an audio file.<br>
115
+ <br>
116
+
117
+ ⚑️ The model will extract audio features and perform sentiment analysis on the audio.<br>
118
+
119
+ """
120
+
121
+ custom_css = """
122
+ #banner-image {
123
+ display: block;
124
+ margin-left: auto;
125
+ margin-right: auto;
126
+ }
127
+ #chat-message {
128
+ font-size: 14px;
129
+ min-height: 300px;
130
+ }
131
+ """
132
+
133
+ block = gr.Blocks(css=custom_css)
134
+
135
+
136
+ with block:
137
+ gr.HTML(title)
138
+
139
+ with gr.Row():
140
+ with gr.Column():
141
+ gr.HTML(description)
142
+ with gr.Column():
143
+ gr.Image(image_path, elem_id="banner-image", show_label=False)
144
+
145
+ gr.Interface(
146
+ fn=analyze_sentiment,
147
+ inputs=[
148
+ gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio"),
149
+ ],
150
+ outputs=[gr.Textbox(label="Sentiment Analysis Results of 15-second buckets"),gr.Textbox(label="Final Prediction")],
151
+ # layout="vertical",
152
+ # theme="huggingface",
153
+ examples=[
154
+ ["./Recording_1.wav", "analyze_sentiment"],
155
+ ["./Recording_2.wav", "analyze_sentiment"],
156
+ ],
157
+ cache_examples=True,
158
+ allow_flagging="never",
159
+ )
160
+ # gr.TabbedInterface([mic, file], ["Audio from Microphone", "Audio from File"])
161
+
162
+ block.launch()