import librosa import tensorflow as tf import soundfile import gradio as gr import pandas as pd import os import random import numpy as np # Set the environment variable os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" audio_files_path = 'examples' c_names = ['Bronchiectasis', 'COPD', 'Healthy', 'Pneumonia', 'URTI'] # Loading Audio Files audio_files = [] for file in os.listdir(audio_files_path): if file.endswith('.wav'): audio_files.append(file) # convert the list to a df audio_files_df = pd.DataFrame(audio_files, columns=['audio_paths']) # print(audio_files_df.iloc[0]['audio_file']) audio_files_to_show = audio_files_df['audio_paths'].tolist() # create a gradio interface # 0. Load models # 1. Audio File input # 2. clear and Submit button # 3. Upon submit , first preprocess the audio file using log mel and then run the outputs through the AI model # # 4. Output the prediction def load_model(): # Load the model return tf.keras.models.load_model("models/lung_disease_predictor_cnn_logmel_without_data_augmentation.keras") def preprocessing(audio_file, mode): # we want to resample audio to 16 kHz sr_new = 16000 # 16kHz sample rate x, sr = librosa.load(audio_file, sr=sr_new) # padding sound # because duration of sound is dominantly 20 s and all of sample rate is 22050 # we want to pad or truncated sound which is below or above 20 s respectively max_len = 5 * sr_new # length of sound array = time x sample rate if x.shape[0] < max_len: # padding with zero pad_width = max_len - x.shape[0] x = np.pad(x, (0, pad_width)) elif x.shape[0] > max_len: # truncated x = x[:max_len] if mode == 'mfcc': feature = librosa.feature.mfcc(y=x, sr=sr_new) elif mode == 'log_mel': feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=128, fmax=8000) feature = librosa.power_to_db(feature, ref=np.max) return feature def predict_lung_disease(audio_data): # Create a temporary file filename = "temp/lungs_audio.wav" # Set your desired filename soundfile.write(filename, audio_data[1],samplerate=audio_data[0]) # Save audio to file # Process the temporary audio file processed_audio = preprocessing(filename, 'log_mel').reshape((-1, 128, 157, 1)) new_preds = model.predict(processed_audio) new_classpreds = np.argmax(new_preds, axis=1) print(str(c_names[new_classpreds[0]])) return str(c_names[new_classpreds[0]]) # Gradio Interface model = load_model() # have example audio files to test # Interface iface = gr.Interface( fn=predict_lung_disease, inputs=["audio"], outputs="text", title="VitalSenseAI", examples= [ [os.path.join(audio_files_path,audio_file)] for audio_file in audio_files_to_show ] ) iface.launch()