Source code for ielearn.predict.feature_selection

import numpy as np
from sklearn.feature_selection.univariate_selection import _BaseFilter, f_classif, _clean_nans
from sklearn.utils.validation import check_is_fitted


[docs]class SelectThreshold(_BaseFilter):
    """Select features according to the threshold.

    Read more in the :ref:`User Guide <univariate_feature_selection>`.

    Parameters
    ----------
    score_func : callable
        Function taking two arrays X and y, and returning a pair of arrays
        (scores, pvalues) or a single array with scores.
        Default is f_classif (see below "See also"). The default function only
        works with classification tasks.

    thresh : int or "none", optional, default=0.1
        Score threshold for feature inclusion.
        The "none" option bypasses selection, for use in a parameter search.

    Attributes
    ----------
    scores_ : array-like, shape=(n_features,)
        Scores of features.

    pvalues_ : array-like, shape=(n_features,)
        p-values of feature scores, None if `score_func` returned only scores.

    Notes
    -----
    Ties between features with equal scores will be broken in an unspecified
    way.

    See also
    --------
    f_classif: ANOVA F-value between label/feature for classification tasks.
    mutual_info_classif: Mutual information for a discrete target.
    chi2: Chi-squared stats of non-negative features for classification tasks.
    f_regression: F-value between label/feature for regression tasks.
    mutual_info_regression: Mutual information for a continuous target.
    SelectPercentile: Select features based on percentile of the highest scores.
    SelectFpr: Select features based on a false positive rate test.
    SelectFdr: Select features based on an estimated false discovery rate.
    SelectFwe: Select features based on family-wise error rate.
    GenericUnivariateSelect: Univariate feature selector with configurable mode.
    """

    def __init__(self, score_func=f_classif, thresh=0.1):
        super(SelectThreshold, self).__init__(score_func)
        self.thresh = thresh

    def _check_params(self, X, y):
        if not (self.thresh == "none" or 0 <= self.thresh <= 1):
            raise ValueError("thresh should be >=0, <= 1; got %r."
                             "Use thresh='none' to return all features."
                             % self.thresh)

    def _get_support_mask(self):
        check_is_fitted(self, 'scores_')

        if self.thresh == 'none':
            return np.ones(self.scores_.shape, dtype=bool)
        elif self.thresh == 0:
            return np.zeros(self.scores_.shape, dtype=bool)
        else:
            scores = _clean_nans(self.scores_)
            mask = np.zeros(scores.shape, dtype=bool)

            # Request a stable sort. Mergesort takes more memory (~40MB per
            # megafeature on x86-64).
            return np.array(scores > self.thresh, dtype=bool)
Source code for ielearn.predict.feature_selection

img-edit-learn

Navigation

Related Topics