Spaces:
Runtime error
Runtime error
Upload 5 files
Browse files- README.md +1 -14
- app.py +42 -0
- data_preprocessing.py +25 -0
- model_training.py +70 -0
- requirements.txt +8 -0
README.md
CHANGED
@@ -1,14 +1 @@
|
|
1 |
-
|
2 |
-
title: VocalBiomarkersForMentalHealth
|
3 |
-
emoji: π
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: pink
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.6.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: AI powered Vocal Biomarkers for Mental Health
|
12 |
-
---
|
13 |
-
|
14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
This repository is empty. Any task executed on it should be considered a new capability and needs to be created.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
|
7 |
+
# Load pre-trained models from Hugging Face Hub
|
8 |
+
emotion_model = pipeline("sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion")
|
9 |
+
|
10 |
+
# Define preprocessing functions for vocal input data
|
11 |
+
def preprocess_audio(audio):
|
12 |
+
y, sr = librosa.load(audio, sr=16000)
|
13 |
+
return y, sr
|
14 |
+
|
15 |
+
def extract_features(y, sr):
|
16 |
+
features = {
|
17 |
+
"mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1),
|
18 |
+
"chroma": librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1),
|
19 |
+
"mel": librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1),
|
20 |
+
"contrast": librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1),
|
21 |
+
"tonnetz": librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1)
|
22 |
+
}
|
23 |
+
return features
|
24 |
+
|
25 |
+
# Define prediction functions to analyze vocal biomarkers
|
26 |
+
def analyze_emotion(audio):
|
27 |
+
y, sr = preprocess_audio(audio)
|
28 |
+
features = extract_features(y, sr)
|
29 |
+
input_data = torch.tensor([features["mfcc"], features["chroma"], features["mel"], features["contrast"], features["tonnetz"]])
|
30 |
+
result = emotion_model(input_data)
|
31 |
+
return result
|
32 |
+
|
33 |
+
# Create Gradio interface for user input and analysis results
|
34 |
+
def analyze(audio):
|
35 |
+
result = analyze_emotion(audio)
|
36 |
+
return result
|
37 |
+
|
38 |
+
iface = gr.Interface(fn=analyze, inputs=gr.inputs.Audio(source="microphone", type="filepath"), outputs="text")
|
39 |
+
|
40 |
+
# Launch the Gradio app
|
41 |
+
if __name__ == "__main__":
|
42 |
+
iface.launch()
|
data_preprocessing.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
import scipy
|
4 |
+
|
5 |
+
def preprocess_audio(audio):
|
6 |
+
y, sr = librosa.load(audio, sr=16000)
|
7 |
+
return y, sr
|
8 |
+
|
9 |
+
def clean_audio(y, sr):
|
10 |
+
y = librosa.effects.trim(y)[0]
|
11 |
+
y = librosa.util.normalize(y)
|
12 |
+
return y
|
13 |
+
|
14 |
+
def extract_features(y, sr):
|
15 |
+
features = {
|
16 |
+
"mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1),
|
17 |
+
"chroma": librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1),
|
18 |
+
"mel": librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1),
|
19 |
+
"contrast": librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1),
|
20 |
+
"tonnetz": librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1)
|
21 |
+
}
|
22 |
+
return features
|
23 |
+
|
24 |
+
def store_preprocessed_data(features, filename):
|
25 |
+
np.savez(filename, **features)
|
model_training.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
3 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, mean_absolute_error, mean_squared_error
|
4 |
+
from data_preprocessing import preprocess_audio, extract_features
|
5 |
+
|
6 |
+
# Load pre-trained models from Hugging Face Hub
|
7 |
+
model_name = "bhadresh-savani/distilbert-base-uncased-emotion"
|
8 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
10 |
+
|
11 |
+
# Prepare the dataset using functions from data_preprocessing.py
|
12 |
+
def prepare_dataset(audio_files):
|
13 |
+
features = []
|
14 |
+
labels = []
|
15 |
+
for audio in audio_files:
|
16 |
+
y, sr = preprocess_audio(audio)
|
17 |
+
feature = extract_features(y, sr)
|
18 |
+
features.append(feature)
|
19 |
+
# Assuming labels are provided in the filename as the last character before the extension
|
20 |
+
label = int(audio.split('_')[-1].split('.')[0])
|
21 |
+
labels.append(label)
|
22 |
+
return features, labels
|
23 |
+
|
24 |
+
# Define the training loop for fine-tuning models
|
25 |
+
def train_model(model, tokenizer, train_features, train_labels, epochs=3, batch_size=8):
|
26 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
|
27 |
+
model.train()
|
28 |
+
for epoch in range(epochs):
|
29 |
+
for i in range(0, len(train_features), batch_size):
|
30 |
+
batch_features = train_features[i:i+batch_size]
|
31 |
+
batch_labels = train_labels[i:i+batch_size]
|
32 |
+
inputs = tokenizer(batch_features, return_tensors="pt", padding=True, truncation=True)
|
33 |
+
labels = torch.tensor(batch_labels)
|
34 |
+
outputs = model(**inputs, labels=labels)
|
35 |
+
loss = outputs.loss
|
36 |
+
loss.backward()
|
37 |
+
optimizer.step()
|
38 |
+
optimizer.zero_grad()
|
39 |
+
print(f"Epoch {epoch+1}, Batch {i//batch_size+1}, Loss: {loss.item()}")
|
40 |
+
|
41 |
+
# Implement functions to evaluate model performance
|
42 |
+
def evaluate_model(model, tokenizer, test_features, test_labels):
|
43 |
+
model.eval()
|
44 |
+
predictions = []
|
45 |
+
with torch.no_grad():
|
46 |
+
for feature in test_features:
|
47 |
+
inputs = tokenizer(feature, return_tensors="pt", padding=True, truncation=True)
|
48 |
+
outputs = model(**inputs)
|
49 |
+
prediction = torch.argmax(outputs.logits, dim=1).item()
|
50 |
+
predictions.append(prediction)
|
51 |
+
accuracy = accuracy_score(test_labels, predictions)
|
52 |
+
precision = precision_score(test_labels, predictions, average='weighted')
|
53 |
+
recall = recall_score(test_labels, predictions, average='weighted')
|
54 |
+
f1 = f1_score(test_labels, predictions, average='weighted')
|
55 |
+
auc_roc = roc_auc_score(test_labels, predictions, average='weighted', multi_class='ovr')
|
56 |
+
conf_matrix = confusion_matrix(test_labels, predictions)
|
57 |
+
mae = mean_absolute_error(test_labels, predictions)
|
58 |
+
rmse = mean_squared_error(test_labels, predictions, squared=False)
|
59 |
+
return accuracy, precision, recall, f1, auc_roc, conf_matrix, mae, rmse
|
60 |
+
|
61 |
+
# Save the trained models in a usable format
|
62 |
+
def save_model(model, path):
|
63 |
+
model.save_pretrained(path)
|
64 |
+
tokenizer.save_pretrained(path)
|
65 |
+
|
66 |
+
# Log training progress and performance metrics
|
67 |
+
def log_metrics(metrics, filename):
|
68 |
+
with open(filename, 'w') as f:
|
69 |
+
for key, value in metrics.items():
|
70 |
+
f.write(f"{key}: {value}\n")
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
librosa
|
5 |
+
numpy
|
6 |
+
scipy
|
7 |
+
pandas
|
8 |
+
scikit-learn
|