Source code for sharp.qoi._qoi

import copy
from .base import BaseQoI, BaseRankQoI


[docs] class DiffQoI(BaseQoI): """ A general QoI, suitable for models/methods that output label predictions or scores. ``target_function`` can output either scores or binary labels. Parameters ---------- target_function : function Method used to predict a label or score. The output of this function should be a 1-dimensional array with the expected target (i.e., label or score) for each of the passed observations. Notes ----- This QoI was formerly defined as just ``QoI``. """ def _estimate(self, rows): return self.target_function(rows) def _calculate(self, rows1, rows2): return (self.estimate(rows1) - self.estimate(rows2)).mean()
[docs] class FlipQoI(BaseQoI): """ Implements equation 4 from [1]_. This QoI is designed for classification, using label predictions. Although it was originally intended for binary classification, multiclass problems may be quantified directly using this QoI. This QoI's influence score quantifies how "pivotal" a given feature is. ``target_function`` should output class predictions. References ---------- .. [1] Datta, A., Sen, S., & Zick, Y. (2016). Algorithmic transparency via quantitative input influence: Theory and experiments with learning systems. In 2016 IEEE symposium on security and privacy (SP) (pp. 598-617). IEEE. Notes ----- This QoI was formerly defined as ``BCFlipped``. """ def _estimate(self, rows): return self.target_function(rows) def _calculate(self, rows1, rows2): y_pred1 = self.estimate(rows1) y_pred2 = self.estimate(rows2) return 1 - (y_pred2 == y_pred1).mean()
[docs] class LikelihoodQoI(BaseQoI): """ Implements equation 3 from [1]_. This QoI is designed for binary classification problems only. It calculates the difference between the likelihoods for ``rows1`` and ``rows2`` to obtain the positive label. ``target_function`` should output either scores or class label predictions. References ---------- .. [1] Datta, A., Sen, S., & Zick, Y. (2016). Algorithmic transparency via quantitative input influence: Theory and experiments with learning systems. In 2016 IEEE symposium on security and privacy (SP) (pp. 598-617). IEEE. Notes ----- This QoI was formerly defined as ``BCLikelihood``. """ def _estimate(self, rows): y_pred = self.target_function(rows) # .squeeze() y_pred_mean = (y_pred if y_pred.ndim == 1 else y_pred[:, -1]).mean() return y_pred_mean def _calculate(self, rows1, rows2): return self.estimate(rows1) - self.estimate(rows2) # .mean()
[docs] class RankQoI(BaseRankQoI): """ Rank specific QoI. Uses rank as the quantity being measured. The influence score is based on the comparison between the rank of a sample and synthetic data (based on the original sample). ``target_function`` should output scores. Notes ----- This QoI was formerly defined as ``RankingRank``. """ def _estimate(self, rows): return self.rank(rows) def _calculate(self, rows1, rows2): return (self.estimate(rows2) - self.estimate(rows1)).mean()
[docs] class RankScoreQoI(BaseRankQoI): """ A general, ranking-oriented QoI, similar to ``DiffQoI``. ``target_function`` must output scores. Notes ----- This QoI was formerly defined as ``RankingScore``. """ def _estimate(self, rows): return self.target_function(rows) def _calculate(self, rows1, rows2): return (self.estimate(rows1) - self.estimate(rows2)).mean()
[docs] class TopKQoI(BaseRankQoI): """ Rank-specific QoI. Estimates the likelihood of reaching the top-K as the quantity of interest. Parameters ---------- top_k : int, default=10 The number of items to consider as part of the top-ranked group. """ def __init__(self, target_function=None, top_k=10, X=None): super().__init__(target_function=target_function, X=X) self.top_k = top_k def _estimate(self, rows): ranks = self.rank(rows) return (ranks <= self.top_k).astype(int) def _calculate(self, rows1, rows2): return (self.estimate(rows1) - self.estimate(rows2)).mean()
_QOI_OBJECTS = { "diff": DiffQoI, "flip": FlipQoI, "likelihood": LikelihoodQoI, "rank": RankQoI, "rank_score": RankScoreQoI, "top_k": TopKQoI, }
[docs] def get_qoi_names(): """Get the names of all available quantities of interest. These names can be passed to :func:`~sharp.qoi.get_qoi` to retrieve the QoI object. Returns ------- list of str Names of all available quantities of interest. Examples -------- >>> from sharp.qoi import get_qoi_names >>> all_qois = get_qoi_names() >>> type(all_qois) <class 'list'> >>> all_qois[:3] ['diff', 'flip', 'likelihood'] >>> "ranking" in all_qois True """ return sorted(_QOI_OBJECTS.keys())
[docs] def get_qoi(qoi): """Get a quantity of interest from string. :func:`~sharp.qoi.get_qoi_names` can be used to retrieve the names of all available quantities of interest. Parameters ---------- qoi : str, callable or None Quantity of interest as string. If callable it is returned as is. If None, returns None. Returns ------- quantity : callable The quantity of interest. Notes ----- When passed a string, this function always returns a copy of the scorer object. Calling `get_qoi` twice for the same scorer results in two separate QoI objects. """ if isinstance(qoi, str): try: quantity = copy.deepcopy(_QOI_OBJECTS[qoi]) except KeyError: raise ValueError( "%r is not a valid scoring value. " "Use sklearn.metrics.get_scorer_names() " "to get valid options." % qoi ) else: quantity = qoi return quantity