File size: 2,981 Bytes
51c4b3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import librosa
import tensorflow as tf
import soundfile
import gradio as gr 

import pandas as pd
import os
import random
import numpy as np  
# Set the environment variable
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

audio_files_path = 'examples'
c_names = ['Bronchiectasis', 'COPD', 'Healthy', 'Pneumonia', 'URTI']

# Loading Audio Files
audio_files = [] 
for file in os.listdir(audio_files_path):
    if file.endswith('.wav'): 
        audio_files.append(file)  
# convert the list to a df 
audio_files_df = pd.DataFrame(audio_files, columns=['audio_paths'])
# print(audio_files_df.iloc[0]['audio_file']) 
audio_files_to_show = audio_files_df['audio_paths'].tolist() 
# create a gradio interface
# 0. Load models 
# 1. Audio File input
# 2. clear and Submit button
# 3. Upon submit , first preprocess the audio file using log mel and then run the outputs through the AI model 
# # 4. Output the prediction

def load_model():
    # Load the model  
    return tf.keras.models.load_model("models/lung_disease_predictor_cnn_logmel_without_data_augmentation.keras")

def preprocessing(audio_file, mode):
    # we want to resample audio to 16 kHz
    sr_new = 16000 # 16kHz sample rate
    x, sr = librosa.load(audio_file, sr=sr_new) 
    # padding sound 
    # because duration of sound is dominantly 20 s and all of sample rate is 22050
    # we want to pad or truncated sound which is below or above 20 s respectively
    max_len = 5 * sr_new  # length of sound array = time x sample rate
    if x.shape[0] < max_len:
      # padding with zero
      pad_width = max_len - x.shape[0]
      x = np.pad(x, (0, pad_width))
    elif x.shape[0] > max_len:
      # truncated
      x = x[:max_len]
    
    if mode == 'mfcc':
      feature = librosa.feature.mfcc(y=x, sr=sr_new)
    
    elif mode == 'log_mel':
      feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=128, fmax=8000)
      feature = librosa.power_to_db(feature, ref=np.max) 

    return feature


def predict_lung_disease(audio_data):
    # Create a temporary file
    
    filename = "temp/lungs_audio.wav"  # Set your desired filename 
    soundfile.write(filename, audio_data[1],samplerate=audio_data[0])  # Save audio to file
    
    # Process the temporary audio file
    processed_audio = preprocessing(filename, 'log_mel').reshape((-1, 128, 157, 1)) 
    new_preds = model.predict(processed_audio) 
    new_classpreds = np.argmax(new_preds, axis=1)
    print(str(c_names[new_classpreds[0]]))
    return str(c_names[new_classpreds[0]])

# Gradio Interface
model = load_model() 

# have example audio files to test 

 

# Interface
iface = gr.Interface(
    fn=predict_lung_disease,
    inputs=["audio"],
    outputs="text",
    title="VitalSenseAI", 
    examples=
        [  
           [os.path.join(audio_files_path,audio_file)] for audio_file in audio_files_to_show
        ]
    )
 

iface.launch()