File size: 2,070 Bytes
ba600a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd


def getSymptomsCausesAndDiseaseNameFromJSON(data: pd.DataFrame) -> dict:
    """
    Generate Symptoms, Causes and Disease Name
    =========================
    Parameters:
    -----------
        data:
            description: Augmented Data
            type: pd.DataFrame
    -----------
    Returns:
    --------
        data:
            description: Symptoms, Causes and Disease Name
            type: dict
    --------------------------------------------------------------------------------------------
    Working:
    --------
        - Create a DataFrame from data
        - Create a new DataFrame with columns: disease, symptoms and causes
        - For each row in data:
            - Add all the column names that are 1 in array to the 'symptoms' key if the prefix is symptom_ otherwise to 'causes' key and set the value of 'disease' key to disease name
        - Return the new dict
    """
    
    
    numberOfSymptoms = len([col for col in data.columns if col.startswith('symptoms_')])
    
    diseases = data['disease']
    symptoms = data.columns[1:numberOfSymptoms]
    causes = data.columns[numberOfSymptoms:]   
    
    # Get symptoms column names where symptoms = 1
    symptomsArray = data[symptoms].apply(lambda x: symptoms[x.values.astype(bool)].tolist(), axis=1)
    
    # Get causes column names where causes = 1
    causesArray = data[causes].apply(lambda x: causes[x.values.astype(bool)].tolist(), axis=1)
    
    # Remove the prefix 'symptoms_' and 'causes_' from the symptomsArray and causesArray
    symptomsArray = [list(map(lambda x: x.replace('symptoms_', ''), symptom)) for symptom in symptomsArray]
    causesArray = [list(map(lambda x: x.replace('causes_', ''), cause)) for cause in causesArray]
    
    diseaseDict = {
        "diseases": [
            {
                "disease": disease,
                "symptoms": symptom,
                "causes": cause
            }
            for disease, symptom, cause in zip(diseases, symptomsArray, causesArray)
        ]
    }
    
    return diseaseDict