Source code for ielearn.predict.feature_selection
import numpy as np
from sklearn.feature_selection.univariate_selection import _BaseFilter, f_classif, _clean_nans
from sklearn.utils.validation import check_is_fitted
[docs]class SelectThreshold(_BaseFilter):
"""Select features according to the threshold.
Read more in the :ref:`User Guide <univariate_feature_selection>`.
Parameters
----------
score_func : callable
Function taking two arrays X and y, and returning a pair of arrays
(scores, pvalues) or a single array with scores.
Default is f_classif (see below "See also"). The default function only
works with classification tasks.
thresh : int or "none", optional, default=0.1
Score threshold for feature inclusion.
The "none" option bypasses selection, for use in a parameter search.
Attributes
----------
scores_ : array-like, shape=(n_features,)
Scores of features.
pvalues_ : array-like, shape=(n_features,)
p-values of feature scores, None if `score_func` returned only scores.
Notes
-----
Ties between features with equal scores will be broken in an unspecified
way.
See also
--------
f_classif: ANOVA F-value between label/feature for classification tasks.
mutual_info_classif: Mutual information for a discrete target.
chi2: Chi-squared stats of non-negative features for classification tasks.
f_regression: F-value between label/feature for regression tasks.
mutual_info_regression: Mutual information for a continuous target.
SelectPercentile: Select features based on percentile of the highest scores.
SelectFpr: Select features based on a false positive rate test.
SelectFdr: Select features based on an estimated false discovery rate.
SelectFwe: Select features based on family-wise error rate.
GenericUnivariateSelect: Univariate feature selector with configurable mode.
"""
def __init__(self, score_func=f_classif, thresh=0.1):
super(SelectThreshold, self).__init__(score_func)
self.thresh = thresh
def _check_params(self, X, y):
if not (self.thresh == "none" or 0 <= self.thresh <= 1):
raise ValueError("thresh should be >=0, <= 1; got %r."
"Use thresh='none' to return all features."
% self.thresh)
def _get_support_mask(self):
check_is_fitted(self, 'scores_')
if self.thresh == 'none':
return np.ones(self.scores_.shape, dtype=bool)
elif self.thresh == 0:
return np.zeros(self.scores_.shape, dtype=bool)
else:
scores = _clean_nans(self.scores_)
mask = np.zeros(scores.shape, dtype=bool)
# Request a stable sort. Mergesort takes more memory (~40MB per
# megafeature on x86-64).
return np.array(scores > self.thresh, dtype=bool)