File size: 4,417 Bytes
cf004a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
This file includes all tests for the data_preprocessing module.
"""

import pytest
import numpy as np
import pickle
from data_preprocessing.create_descriptors import (handle_inputs,
                                                   create_ecfp_fps,
                                                   create_rdkit_descriptors,
                                                   create_quantils,
                                                   preprocess_molecules)

class TestPreprocessMolecules:
    
    def test_handle_inputs(self, input_molecule_formats):
        """
        This functions check whether all 3 possible input formats are correctly 
        transformed into list.
        """
        
        # Check 1: Smiles
        output_smiles = handle_inputs(input_molecule_formats.smiles)
        assert isinstance(output_smiles, list)
        
        # Check 2: Smiles coma
        output_smiles_coma = handle_inputs(input_molecule_formats.smiles_coma)
        assert isinstance(output_smiles_coma, list)
        assert output_smiles_coma == input_molecule_formats.smiles_list
        
        # Check 3: Smiles list
        output_smiles_list = handle_inputs(input_molecule_formats.smiles_list)
        assert isinstance(output_smiles_list, list)
        
        # Check 4.1: Correct DataFrame
        output_smiles_df = handle_inputs(input_molecule_formats.smiles_df)
        assert isinstance(output_smiles_df, list)

        # Check 4.2: Wrong DataFrame
        with pytest.raises(ValueError):
            handle_inputs(input_molecule_formats.smiles_df_wrong_key)
    
    def test_create_ecfps_fps(self, input_mols_from_smiles, ecfps_from_smiles):
        """
        This function tests whether the ECFP fingerprints are correctly created.
        """
        
        # Check 1: Correct output type
        output_ecfps = create_ecfp_fps(input_mols_from_smiles)
        assert isinstance(output_ecfps, np.ndarray)
        
        # Check 2: Correct output shape
        assert output_ecfps.shape == ecfps_from_smiles.shape
        
        # Check 3: Correct output values
        assert np.allclose(output_ecfps, ecfps_from_smiles, 0, 0)
    
    def test_create_rdkit_descriptors(self, input_mols_from_smiles,
                                      rdkit_descrs_from_smiles):
        """
        This function tests whether the RDKit descriptors are correctly created.
        """
        
        # Check 1: Correct output type
        output_rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles)
        assert isinstance(output_rdkit_descrs, np.ndarray)
        
        # Check 2: Correct output shape
        assert output_rdkit_descrs.shape == rdkit_descrs_from_smiles.shape
        
        # Check 3: Correct output values
        assert np.allclose(output_rdkit_descrs, rdkit_descrs_from_smiles)
    
    def test_create_quantils(self, input_mols_from_smiles, rdkit_descr_quantils):
        """
        This function tests whether the quantils are correctly created.
        """
        current_loc = __file__.rsplit("/",3)[0]
        with open(current_loc + "/assets/data_preprocessing_objects/ecdfs.pkl",
                  "rb") as fl:
            ecdfs = pickle.load(fl)
        
        rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles)
        output_quantils = create_quantils(rdkit_descrs, ecdfs)
        
        # Check 1: Correct output type
        assert isinstance(output_quantils, np.ndarray)
        
        # Check 2: Correct output shape
        assert output_quantils.shape == rdkit_descr_quantils.shape
        
        # Check 3: Correct output values
        assert np.allclose(output_quantils, rdkit_descr_quantils)
        
    def test_preprocess_molecules(self, input_smiles,
                                  preprocessed_features):
        """
        This function tests whether the preprocessing of molecules is correctly
        done.
        """
        
        # Check 1: Correct output type
        output_preprocessed_features = preprocess_molecules(input_smiles)
        assert isinstance(output_preprocessed_features, np.ndarray)
        
        # Check 2: Correct output shape
        assert output_preprocessed_features.shape == preprocessed_features.shape
        
        # Check 3: Correct output values
        assert np.allclose(output_preprocessed_features, preprocessed_features)