invincible-jha commited on
Commit
89afd96
Β·
verified Β·
1 Parent(s): 940974c

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +1 -14
  2. app.py +42 -0
  3. data_preprocessing.py +25 -0
  4. model_training.py +70 -0
  5. requirements.txt +8 -0
README.md CHANGED
@@ -1,14 +1 @@
1
- ---
2
- title: VocalBiomarkersForMentalHealth
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.6.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: AI powered Vocal Biomarkers for Mental Health
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ This repository is empty. Any task executed on it should be considered a new capability and needs to be created.
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import librosa
4
+ import numpy as np
5
+ import torch
6
+
7
+ # Load pre-trained models from Hugging Face Hub
8
+ emotion_model = pipeline("sentiment-analysis", model="bhadresh-savani/distilbert-base-uncased-emotion")
9
+
10
+ # Define preprocessing functions for vocal input data
11
+ def preprocess_audio(audio):
12
+ y, sr = librosa.load(audio, sr=16000)
13
+ return y, sr
14
+
15
+ def extract_features(y, sr):
16
+ features = {
17
+ "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1),
18
+ "chroma": librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1),
19
+ "mel": librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1),
20
+ "contrast": librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1),
21
+ "tonnetz": librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1)
22
+ }
23
+ return features
24
+
25
+ # Define prediction functions to analyze vocal biomarkers
26
+ def analyze_emotion(audio):
27
+ y, sr = preprocess_audio(audio)
28
+ features = extract_features(y, sr)
29
+ input_data = torch.tensor([features["mfcc"], features["chroma"], features["mel"], features["contrast"], features["tonnetz"]])
30
+ result = emotion_model(input_data)
31
+ return result
32
+
33
+ # Create Gradio interface for user input and analysis results
34
+ def analyze(audio):
35
+ result = analyze_emotion(audio)
36
+ return result
37
+
38
+ iface = gr.Interface(fn=analyze, inputs=gr.inputs.Audio(source="microphone", type="filepath"), outputs="text")
39
+
40
+ # Launch the Gradio app
41
+ if __name__ == "__main__":
42
+ iface.launch()
data_preprocessing.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import scipy
4
+
5
+ def preprocess_audio(audio):
6
+ y, sr = librosa.load(audio, sr=16000)
7
+ return y, sr
8
+
9
+ def clean_audio(y, sr):
10
+ y = librosa.effects.trim(y)[0]
11
+ y = librosa.util.normalize(y)
12
+ return y
13
+
14
+ def extract_features(y, sr):
15
+ features = {
16
+ "mfcc": librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1),
17
+ "chroma": librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1),
18
+ "mel": librosa.feature.melspectrogram(y=y, sr=sr).mean(axis=1),
19
+ "contrast": librosa.feature.spectral_contrast(y=y, sr=sr).mean(axis=1),
20
+ "tonnetz": librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1)
21
+ }
22
+ return features
23
+
24
+ def store_preprocessed_data(features, filename):
25
+ np.savez(filename, **features)
model_training.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
3
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, mean_absolute_error, mean_squared_error
4
+ from data_preprocessing import preprocess_audio, extract_features
5
+
6
+ # Load pre-trained models from Hugging Face Hub
7
+ model_name = "bhadresh-savani/distilbert-base-uncased-emotion"
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+
11
+ # Prepare the dataset using functions from data_preprocessing.py
12
+ def prepare_dataset(audio_files):
13
+ features = []
14
+ labels = []
15
+ for audio in audio_files:
16
+ y, sr = preprocess_audio(audio)
17
+ feature = extract_features(y, sr)
18
+ features.append(feature)
19
+ # Assuming labels are provided in the filename as the last character before the extension
20
+ label = int(audio.split('_')[-1].split('.')[0])
21
+ labels.append(label)
22
+ return features, labels
23
+
24
+ # Define the training loop for fine-tuning models
25
+ def train_model(model, tokenizer, train_features, train_labels, epochs=3, batch_size=8):
26
+ optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
27
+ model.train()
28
+ for epoch in range(epochs):
29
+ for i in range(0, len(train_features), batch_size):
30
+ batch_features = train_features[i:i+batch_size]
31
+ batch_labels = train_labels[i:i+batch_size]
32
+ inputs = tokenizer(batch_features, return_tensors="pt", padding=True, truncation=True)
33
+ labels = torch.tensor(batch_labels)
34
+ outputs = model(**inputs, labels=labels)
35
+ loss = outputs.loss
36
+ loss.backward()
37
+ optimizer.step()
38
+ optimizer.zero_grad()
39
+ print(f"Epoch {epoch+1}, Batch {i//batch_size+1}, Loss: {loss.item()}")
40
+
41
+ # Implement functions to evaluate model performance
42
+ def evaluate_model(model, tokenizer, test_features, test_labels):
43
+ model.eval()
44
+ predictions = []
45
+ with torch.no_grad():
46
+ for feature in test_features:
47
+ inputs = tokenizer(feature, return_tensors="pt", padding=True, truncation=True)
48
+ outputs = model(**inputs)
49
+ prediction = torch.argmax(outputs.logits, dim=1).item()
50
+ predictions.append(prediction)
51
+ accuracy = accuracy_score(test_labels, predictions)
52
+ precision = precision_score(test_labels, predictions, average='weighted')
53
+ recall = recall_score(test_labels, predictions, average='weighted')
54
+ f1 = f1_score(test_labels, predictions, average='weighted')
55
+ auc_roc = roc_auc_score(test_labels, predictions, average='weighted', multi_class='ovr')
56
+ conf_matrix = confusion_matrix(test_labels, predictions)
57
+ mae = mean_absolute_error(test_labels, predictions)
58
+ rmse = mean_squared_error(test_labels, predictions, squared=False)
59
+ return accuracy, precision, recall, f1, auc_roc, conf_matrix, mae, rmse
60
+
61
+ # Save the trained models in a usable format
62
+ def save_model(model, path):
63
+ model.save_pretrained(path)
64
+ tokenizer.save_pretrained(path)
65
+
66
+ # Log training progress and performance metrics
67
+ def log_metrics(metrics, filename):
68
+ with open(filename, 'w') as f:
69
+ for key, value in metrics.items():
70
+ f.write(f"{key}: {value}\n")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ librosa
5
+ numpy
6
+ scipy
7
+ pandas
8
+ scikit-learn