Spaces:

jiveshkalra
/

LungDiseaseDetector

Sleeping

51c4b3f verified 7 months ago

2.98 kB

	import librosa
	import tensorflow as tf
	import soundfile
	import gradio as gr

	import pandas as pd
	import os
	import random
	import numpy as np
	# Set the environment variable
	os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

	audio_files_path = 'examples'
	c_names = ['Bronchiectasis', 'COPD', 'Healthy', 'Pneumonia', 'URTI']

	# Loading Audio Files
	audio_files = []
	for file in os.listdir(audio_files_path):
	if file.endswith('.wav'):
	audio_files.append(file)
	# convert the list to a df
	audio_files_df = pd.DataFrame(audio_files, columns=['audio_paths'])
	# print(audio_files_df.iloc[0]['audio_file'])
	audio_files_to_show = audio_files_df['audio_paths'].tolist()
	# create a gradio interface
	# 0. Load models
	# 1. Audio File input
	# 2. clear and Submit button
	# 3. Upon submit , first preprocess the audio file using log mel and then run the outputs through the AI model
	# # 4. Output the prediction

	def load_model():
	# Load the model
	return tf.keras.models.load_model("models/lung_disease_predictor_cnn_logmel_without_data_augmentation.keras")

	def preprocessing(audio_file, mode):
	# we want to resample audio to 16 kHz
	sr_new = 16000 # 16kHz sample rate
	x, sr = librosa.load(audio_file, sr=sr_new)
	# padding sound
	# because duration of sound is dominantly 20 s and all of sample rate is 22050
	# we want to pad or truncated sound which is below or above 20 s respectively
	max_len = 5 * sr_new # length of sound array = time x sample rate
	if x.shape[0] < max_len:
	# padding with zero
	pad_width = max_len - x.shape[0]
	x = np.pad(x, (0, pad_width))
	elif x.shape[0] > max_len:
	# truncated
	x = x[:max_len]

	if mode == 'mfcc':
	feature = librosa.feature.mfcc(y=x, sr=sr_new)

	elif mode == 'log_mel':
	feature = librosa.feature.melspectrogram(y=x, sr=sr_new, n_mels=128, fmax=8000)
	feature = librosa.power_to_db(feature, ref=np.max)

	return feature


	def predict_lung_disease(audio_data):
	# Create a temporary file

	filename = "temp/lungs_audio.wav" # Set your desired filename
	soundfile.write(filename, audio_data[1],samplerate=audio_data[0]) # Save audio to file

	# Process the temporary audio file
	processed_audio = preprocessing(filename, 'log_mel').reshape((-1, 128, 157, 1))
	new_preds = model.predict(processed_audio)
	new_classpreds = np.argmax(new_preds, axis=1)
	print(str(c_names[new_classpreds[0]]))
	return str(c_names[new_classpreds[0]])

	# Gradio Interface
	model = load_model()

	# have example audio files to test



	# Interface
	iface = gr.Interface(
	fn=predict_lung_disease,
	inputs=["audio"],
	outputs="text",
	title="VitalSenseAI",
	examples=
	[
	[os.path.join(audio_files_path,audio_file)] for audio_file in audio_files_to_show
	]
	)


	iface.launch()