""" Contains abstract functionality for learning locally linear sparse model. """ import numpy as np import scipy as sp from sklearn.linear_model import Ridge, lars_path from sklearn.utils import check_random_state class LimeBase(object): """Class for learning a locally linear sparse model from perturbed data""" def __init__(self, kernel_fn, verbose=False, random_state=None): """Init function Args: kernel_fn: function that transforms an array of distances into an array of proximity values (floats). verbose: if true, print local prediction values from linear model. random_state: an integer or numpy.RandomState that will be used to generate random numbers. If None, the random state will be initialized using the internal numpy seed. """ self.kernel_fn = kernel_fn self.verbose = verbose self.random_state = check_random_state(random_state) @staticmethod def generate_lars_path(weighted_data, weighted_labels): """Generates the lars path for weighted data. Args: weighted_data: data that has been weighted by kernel weighted_label: labels, weighted by kernel Returns: (alphas, coefs), both are arrays corresponding to the regularization parameter and coefficients, respectively """ x_vector = weighted_data alphas, _, coefs = lars_path(x_vector, weighted_labels, method='lasso', verbose=False) return alphas, coefs def forward_selection(self, data, labels, weights, num_features): """Iteratively adds features to the model""" clf = Ridge(alpha=0, fit_intercept=True, random_state=self.random_state) used_features = [] for _ in range(min(num_features, data.shape[1])): max_ = -100000000 best = 0 for feature in range(data.shape[1]): if feature in used_features: continue clf.fit(data[:, used_features + [feature]], labels, sample_weight=weights) score = clf.score(data[:, used_features + [feature]], labels, sample_weight=weights) if score > max_: best = feature max_ = score used_features.append(best) return np.array(used_features) def feature_selection(self, data, labels, weights, num_features, method): """Selects features for the model. see explain_instance_with_data to understand the parameters.""" if method == 'none': return np.array(range(data.shape[1])) elif method == 'forward_selection': return self.forward_selection(data, labels, weights, num_features) elif method == 'highest_weights': clf = Ridge(alpha=0.01, fit_intercept=True, random_state=self.random_state) # print("data shape: ", data.shape) # print("labels shape: ", labels.shape) # assert(False) clf.fit(data, labels, sample_weight=weights) coef = clf.coef_ if sp.sparse.issparse(data): coef = sp.sparse.csr_matrix(clf.coef_) weighted_data = coef.multiply(data[0]) # Note: most efficient to slice the data before reversing sdata = len(weighted_data.data) argsort_data = np.abs(weighted_data.data).argsort() # Edge case where data is more sparse than requested number of feature importances # In that case, we just pad with zero-valued features if sdata < num_features: nnz_indexes = argsort_data[::-1] indices = weighted_data.indices[nnz_indexes] num_to_pad = num_features - sdata indices = np.concatenate((indices, np.zeros(num_to_pad, dtype=indices.dtype))) indices_set = set(indices) pad_counter = 0 for i in range(data.shape[1]): if i not in indices_set: indices[pad_counter + sdata] = i pad_counter += 1 if pad_counter >= num_to_pad: break else: nnz_indexes = argsort_data[sdata - num_features:sdata][::-1] indices = weighted_data.indices[nnz_indexes] return indices else: weighted_data = coef * data[0] feature_weights = sorted( zip(range(data.shape[1]), weighted_data), key=lambda x: np.abs(x[1]), reverse=True) return np.array([x[0] for x in feature_weights[:num_features]]) elif method == 'lasso_path': weighted_data = ((data - np.average(data, axis=0, weights=weights)) * np.sqrt(weights[:, np.newaxis])) weighted_labels = ((labels - np.average(labels, weights=weights)) * np.sqrt(weights)) nonzero = range(weighted_data.shape[1]) _, coefs = self.generate_lars_path(weighted_data, weighted_labels) for i in range(len(coefs.T) - 1, 0, -1): nonzero = coefs.T[i].nonzero()[0] if len(nonzero) <= num_features: break used_features = nonzero return used_features elif method == 'auto': if num_features <= 6: n_method = 'forward_selection' else: n_method = 'highest_weights' return self.feature_selection(data, labels, weights, num_features, n_method) def explain_instance_with_data(self, neighborhood_data, neighborhood_labels, distances, label, num_features, feature_selection='auto', model_regressor=None): """Takes perturbed data, labels and distances, returns explanation. Args: neighborhood_data: perturbed data, 2d array. first element is assumed to be the original data point. neighborhood_labels: corresponding perturbed labels. should have as many columns as the number of possible labels. distances: distances to original data point. label: label for which we want an explanation num_features: maximum number of features in explanation feature_selection: how to select num_features. options are: 'forward_selection': iteratively add features to the model. This is costly when num_features is high 'highest_weights': selects the features that have the highest product of absolute weight * original data point when learning with all the features 'lasso_path': chooses features based on the lasso regularization path 'none': uses all features, ignores num_features 'auto': uses forward_selection if num_features <= 6, and 'highest_weights' otherwise. model_regressor: sklearn regressor to use in explanation. Defaults to Ridge regression if None. Must have model_regressor.coef_ and 'sample_weight' as a parameter to model_regressor.fit() Returns: (intercept, exp, score, local_pred): intercept is a float. exp is a sorted list of tuples, where each tuple (x,y) corresponds to the feature id (x) and the local weight (y). The list is sorted by decreasing absolute value of y. score is the R^2 value of the returned explanation local_pred is the prediction of the explanation model on the original instance """ weights = self.kernel_fn(distances) labels_column = neighborhood_labels[:, label] used_features = self.feature_selection(neighborhood_data, labels_column, weights, num_features, feature_selection) if model_regressor is None: model_regressor = Ridge(alpha=1, fit_intercept=True, random_state=self.random_state) easy_model = model_regressor easy_model.fit(neighborhood_data[:, used_features], labels_column, sample_weight=weights) prediction_score = easy_model.score( neighborhood_data[:, used_features], labels_column, sample_weight=weights) local_pred = easy_model.predict(neighborhood_data[0, used_features].reshape(1, -1)) if self.verbose: print('Intercept', easy_model.intercept_) print('Prediction_local', local_pred,) print('Right:', neighborhood_labels[0, label]) return (easy_model.intercept_, sorted(zip(used_features, easy_model.coef_), key=lambda x: np.abs(x[1]), reverse=True), prediction_score, local_pred)