import pandas as pd def getSymptomsCausesAndDiseaseNameFromJSON(data: pd.DataFrame) -> dict: """ Generate Symptoms, Causes and Disease Name ========================= Parameters: ----------- data: description: Augmented Data type: pd.DataFrame ----------- Returns: -------- data: description: Symptoms, Causes and Disease Name type: dict -------------------------------------------------------------------------------------------- Working: -------- - Create a DataFrame from data - Create a new DataFrame with columns: disease, symptoms and causes - For each row in data: - Add all the column names that are 1 in array to the 'symptoms' key if the prefix is symptom_ otherwise to 'causes' key and set the value of 'disease' key to disease name - Return the new dict """ numberOfSymptoms = len([col for col in data.columns if col.startswith('symptoms_')]) diseases = data['disease'] symptoms = data.columns[1:numberOfSymptoms] causes = data.columns[numberOfSymptoms:] # Get symptoms column names where symptoms = 1 symptomsArray = data[symptoms].apply(lambda x: symptoms[x.values.astype(bool)].tolist(), axis=1) # Get causes column names where causes = 1 causesArray = data[causes].apply(lambda x: causes[x.values.astype(bool)].tolist(), axis=1) # Remove the prefix 'symptoms_' and 'causes_' from the symptomsArray and causesArray symptomsArray = [list(map(lambda x: x.replace('symptoms_', ''), symptom)) for symptom in symptomsArray] causesArray = [list(map(lambda x: x.replace('causes_', ''), cause)) for cause in causesArray] diseaseDict = { "diseases": [ { "disease": disease, "symptoms": symptom, "causes": cause } for disease, symptom, cause in zip(diseases, symptomsArray, causesArray) ] } return diseaseDict