Last commit not found
""" | |
This file includes all tests for the data_preprocessing module. | |
""" | |
import pytest | |
import numpy as np | |
import pickle | |
from data_preprocessing.create_descriptors import (handle_inputs, | |
create_ecfp_fps, | |
create_rdkit_descriptors, | |
create_quantils, | |
preprocess_molecules) | |
class TestPreprocessMolecules: | |
def test_handle_inputs(self, input_molecule_formats): | |
""" | |
This functions check whether all 3 possible input formats are correctly | |
transformed into list. | |
""" | |
# Check 1: Smiles | |
output_smiles = handle_inputs(input_molecule_formats.smiles) | |
assert isinstance(output_smiles, list) | |
# Check 2: Smiles coma | |
output_smiles_coma = handle_inputs(input_molecule_formats.smiles_coma) | |
assert isinstance(output_smiles_coma, list) | |
assert output_smiles_coma == input_molecule_formats.smiles_list | |
# Check 3: Smiles list | |
output_smiles_list = handle_inputs(input_molecule_formats.smiles_list) | |
assert isinstance(output_smiles_list, list) | |
# Check 4.1: Correct DataFrame | |
output_smiles_df = handle_inputs(input_molecule_formats.smiles_df) | |
assert isinstance(output_smiles_df, list) | |
# Check 4.2: Wrong DataFrame | |
with pytest.raises(ValueError): | |
handle_inputs(input_molecule_formats.smiles_df_wrong_key) | |
def test_create_ecfps_fps(self, input_mols_from_smiles, ecfps_from_smiles): | |
""" | |
This function tests whether the ECFP fingerprints are correctly created. | |
""" | |
# Check 1: Correct output type | |
output_ecfps = create_ecfp_fps(input_mols_from_smiles) | |
assert isinstance(output_ecfps, np.ndarray) | |
# Check 2: Correct output shape | |
assert output_ecfps.shape == ecfps_from_smiles.shape | |
# Check 3: Correct output values | |
assert np.allclose(output_ecfps, ecfps_from_smiles, 0, 0) | |
def test_create_rdkit_descriptors(self, input_mols_from_smiles, | |
rdkit_descrs_from_smiles): | |
""" | |
This function tests whether the RDKit descriptors are correctly created. | |
""" | |
# Check 1: Correct output type | |
output_rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) | |
assert isinstance(output_rdkit_descrs, np.ndarray) | |
# Check 2: Correct output shape | |
assert output_rdkit_descrs.shape == rdkit_descrs_from_smiles.shape | |
# Check 3: Correct output values | |
assert np.allclose(output_rdkit_descrs, rdkit_descrs_from_smiles) | |
def test_create_quantils(self, input_mols_from_smiles, rdkit_descr_quantils): | |
""" | |
This function tests whether the quantils are correctly created. | |
""" | |
current_loc = __file__.rsplit("/",3)[0] | |
with open(current_loc + "/assets/data_preprocessing_objects/ecdfs.pkl", | |
"rb") as fl: | |
ecdfs = pickle.load(fl) | |
rdkit_descrs = create_rdkit_descriptors(input_mols_from_smiles) | |
output_quantils = create_quantils(rdkit_descrs, ecdfs) | |
# Check 1: Correct output type | |
assert isinstance(output_quantils, np.ndarray) | |
# Check 2: Correct output shape | |
assert output_quantils.shape == rdkit_descr_quantils.shape | |
# Check 3: Correct output values | |
assert np.allclose(output_quantils, rdkit_descr_quantils) | |
def test_preprocess_molecules(self, input_smiles, | |
preprocessed_features): | |
""" | |
This function tests whether the preprocessing of molecules is correctly | |
done. | |
""" | |
# Check 1: Correct output type | |
output_preprocessed_features = preprocess_molecules(input_smiles) | |
assert isinstance(output_preprocessed_features, np.ndarray) | |
# Check 2: Correct output shape | |
assert output_preprocessed_features.shape == preprocessed_features.shape | |
# Check 3: Correct output values | |
assert np.allclose(output_preprocessed_features, preprocessed_features) | |