"""
Methods for scoring prediction results (CA, AUC, ...).
Examples
--------
>>> import Orange
>>> data = Orange.data.Table('iris')
>>> learner = Orange.classification.LogisticRegressionLearner(solver="liblinear")
>>> results = Orange.evaluation.TestOnTrainingData(data, [learner])
"""
import math
import numpy as np
import sklearn.metrics as skl_metrics
from sklearn.metrics import confusion_matrix
from Orange.data import DiscreteVariable, ContinuousVariable, Domain
from Orange.misc.wrapper_meta import WrapperMeta
__all__ = ["CA", "Precision", "Recall", "F1", "PrecisionRecallFSupport", "AUC",
"MSE", "RMSE", "MAE", "MAPE", "R2", "LogLoss", "MatthewsCorrCoefficient"]
class ScoreMetaType(WrapperMeta):
"""
Maintain a registry of non-abstract subclasses and assign the default
value of `name`.
The existing meta class Registry cannot be used since a meta class cannot
have multiple inherited __new__ methods."""
def __new__(mcs, name, bases, dict_, **kwargs):
cls = WrapperMeta.__new__(mcs, name, bases, dict_)
# Essentially `if cls is not Score`, except that Score may not exist yet
if hasattr(cls, "registry"):
if not kwargs.get("abstract"):
# Don't use inherited names, look into dict_
cls.name = dict_.get("name", name)
cls.long_name = dict_.get("long_name", cls.name)
cls.registry[name] = cls
else:
cls.registry = {}
return cls
def __init__(cls, *args, **_):
WrapperMeta.__init__(cls, *args)
class Score(metaclass=ScoreMetaType):
"""
${sklpar}
Parameters
----------
results : Orange.evaluation.Results
Stored predictions and actual data in model testing.
"""
__wraps__ = None
separate_folds = False
is_scalar = True
is_binary = False #: If true, compute_score accepts `target` and `average`
#: If the class doesn't explicitly contain `abstract=True`, it is not
#: abstract; essentially, this attribute is not inherited
abstract = True
class_types = ()
name = None
long_name = None #: A short user-readable name (e.g. a few words)
default_visible = True
priority = 100
def __new__(cls, results=None, **kwargs):
self = super().__new__(cls)
if results is not None:
self.__init__()
return self(results, **kwargs)
else:
return self
def __call__(self, results, **kwargs):
if self.separate_folds and results.score_by_folds and results.folds:
scores = self.scores_by_folds(results, **kwargs)
return self.average(scores)
return self.compute_score(results, **kwargs)
def average(self, scores):
if self.is_scalar:
return np.mean(scores, axis=0)
return NotImplementedError
def scores_by_folds(self, results, **kwargs):
nfolds = len(results.folds)
nmodels = len(results.predicted)
if self.is_scalar:
scores = np.empty((nfolds, nmodels), dtype=np.float64)
else:
scores = [None] * nfolds
for fold in range(nfolds):
fold_results = results.get_fold(fold)
scores[fold] = self.compute_score(fold_results, **kwargs)
return scores
def compute_score(self, results):
wraps = type(self).__wraps__ # self.__wraps__ is invisible
if wraps:
return self.from_predicted(results, wraps)
else:
return NotImplementedError
@staticmethod
def from_predicted(results, score_function, **kwargs):
return np.fromiter(
(score_function(results.actual, predicted, **kwargs)
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))
@staticmethod
def is_compatible(domain: Domain) -> bool:
raise NotImplementedError
class ClassificationScore(Score, abstract=True):
class_types = (DiscreteVariable, )
@staticmethod
def is_compatible(domain: Domain) -> bool:
return domain.has_discrete_class
class RegressionScore(Score, abstract=True):
class_types = (ContinuousVariable, )
@staticmethod
def is_compatible(domain: Domain) -> bool:
return domain.has_continuous_class
# pylint: disable=invalid-name
[docs]
class CA(ClassificationScore):
__wraps__ = skl_metrics.accuracy_score
name = "CA"
long_name = "Classification accuracy"
priority = 20
[docs]
class PrecisionRecallFSupport(ClassificationScore):
__wraps__ = skl_metrics.precision_recall_fscore_support
is_scalar = False
class TargetScore(ClassificationScore):
"""
Base class for scorers that need a target value (a "positive" class).
Parameters
----------
results : Orange.evaluation.Results
Stored predictions and actual data in model testing.
target : int, optional (default=None)
Target class value.
When None:
- if averaging is specified, use all classes and average results
- if average is 'binary' and class variable has exactly 2 values,
use the value '1' as the positive class
average: str, method for averaging (default='binary')
Default requires a binary class or target to be set.
Options: 'weighted', 'macro', 'micro', None
"""
is_binary = True
abstract = True
__wraps__ = None # Subclasses should set the scoring function
def compute_score(self, results, target=None, average='binary'):
if average == 'binary':
if target is None:
if len(results.domain.class_var.values) > 2:
raise ValueError(
"Multiclass data: specify target class or select "
"averaging ('weighted', 'macro', 'micro')")
target = 1 # Default: use 1 as "positive" class
average = None
labels = None if target is None else [target]
return self.from_predicted(
results, type(self).__wraps__, labels=labels, average=average)
[docs]
class Precision(TargetScore):
__wraps__ = skl_metrics.precision_score
name = "Prec"
long_name = "Precision"
priority = 40
[docs]
class Recall(TargetScore):
__wraps__ = skl_metrics.recall_score
name = long_name = "Recall"
priority = 50
[docs]
class F1(TargetScore):
__wraps__ = skl_metrics.f1_score
name = long_name = "F1"
priority = 30
[docs]
class AUC(ClassificationScore):
"""
${sklpar}
Parameters
----------
results : Orange.evaluation.Results
Stored predictions and actual data in model testing.
target : int, optional (default=None)
Value of class to report.
"""
__wraps__ = skl_metrics.roc_auc_score
separate_folds = True
is_binary = True
name = "AUC"
long_name = "Area under ROC curve"
priority = 10
@staticmethod
def calculate_weights(results):
classes = np.unique(results.actual)
class_cases = [sum(results.actual == class_)
for class_ in classes]
N = results.actual.shape[0]
weights = np.array([c * (N - c) for c in class_cases])
wsum = np.sum(weights)
if wsum == 0:
raise ValueError("Class variable has less than two values")
else:
return weights / wsum
@staticmethod
def single_class_auc(results, target):
y = np.array(results.actual == target, dtype=int)
return np.fromiter(
(skl_metrics.roc_auc_score(y, probabilities[:, int(target)])
for probabilities in results.probabilities),
dtype=np.float64, count=len(results.predicted))
def multi_class_auc(self, results):
classes = np.unique(results.actual)
weights = self.calculate_weights(results)
auc_array = np.array([self.single_class_auc(results, class_)
for class_ in classes])
return np.sum(auc_array.T * weights, axis=1)
def compute_score(self, results, target=None, average=None):
domain = results.domain
n_classes = len(domain.class_var.values)
if n_classes < 2:
raise ValueError("Class variable has less than two values")
elif n_classes == 2:
return self.single_class_auc(results, 1)
else:
if target is None:
return self.multi_class_auc(results)
else:
return self.single_class_auc(results, target)
[docs]
class LogLoss(ClassificationScore):
"""
${sklpar}
Parameters
----------
results : Orange.evaluation.Results
Stored predictions and actual data in model testing.
eps : float
Log loss is undefined for p=0 or p=1, so probabilities are
clipped to max(eps, min(1 - eps, p)).
normalize : bool, optional (default=True)
If true, return the mean loss per sample.
Otherwise, return the sum of the per-sample losses.
sample_weight : array-like of shape = [n_samples], optional
Sample weights.
Examples
--------
>>> Orange.evaluation.LogLoss(results)
array([0.3...])
"""
__wraps__ = skl_metrics.log_loss
priority = 120
name = "LogLoss"
long_name = "Logistic loss"
default_visible = False
def compute_score(self, results, eps=1e-15, normalize=True,
sample_weight=None):
return np.fromiter(
(skl_metrics.log_loss(results.actual,
probabilities,
eps=eps,
normalize=normalize,
sample_weight=sample_weight)
for probabilities in results.probabilities),
dtype=np.float64, count=len(results.probabilities))
class Specificity(ClassificationScore):
is_binary = True
priority = 110
name = "Spec"
long_name = "Specificity"
default_visible = False
@staticmethod
def calculate_weights(results):
classes, counts = np.unique(results.actual, return_counts=True)
n = np.array(results.actual).shape[0]
return counts / n, classes
@staticmethod
def specificity(y_true, y_pred):
tn, fp, _, _ = confusion_matrix(y_true, y_pred).ravel()
return tn / (tn + fp)
def single_class_specificity(self, results, target):
y_true = (np.array(results.actual) == target).astype(int)
return np.fromiter(
(self.specificity(y_true,
np.array(predicted == target, dtype=int))
for predicted in results.predicted),
dtype=np.float64, count=len(results.predicted))
def multi_class_specificity(self, results):
weights, classes = self.calculate_weights(results)
scores = np.array([self.single_class_specificity(results, class_)
for class_ in classes])
return np.sum(scores.T * weights, axis=1)
def compute_score(self, results, target=None, average="binary"):
domain = results.domain
n_classes = len(domain.class_var.values)
if target is None:
if average == "weighted":
return self.multi_class_specificity(results)
elif average == "binary": # average is binary
if n_classes != 2:
raise ValueError(
"Binary averaging needs two classes in data: "
"specify target class or use "
"weighted averaging.")
return self.single_class_specificity(results, 1)
else:
raise ValueError(
"Wrong parameters: For averaging select one of the "
"following values: ('weighted', 'binary')")
elif target is not None:
return self.single_class_specificity(results, target)
class MatthewsCorrCoefficient(ClassificationScore):
__wraps__ = skl_metrics.matthews_corrcoef
name = "MCC"
long_name = "Matthews correlation coefficient"
# Regression scores
[docs]
class MSE(RegressionScore):
__wraps__ = skl_metrics.mean_squared_error
name = "MSE"
long_name = "Mean square error"
priority = 20
class RMSE(RegressionScore):
name = "RMSE"
long_name = "Root mean square error"
def compute_score(self, results):
return np.sqrt(MSE(results))
priority = 30
[docs]
class MAE(RegressionScore):
__wraps__ = skl_metrics.mean_absolute_error
name = "MAE"
long_name = "Mean absolute error"
priority = 40
class MAPE(RegressionScore):
__wraps__ = skl_metrics.mean_absolute_percentage_error
name = "MAPE"
long_name = "Mean absolute percentage error"
priority = 45
# pylint: disable=invalid-name
[docs]
class R2(RegressionScore):
__wraps__ = skl_metrics.r2_score
name = "R2"
long_name = "Coefficient of determination"
priority = 50
class CVRMSE(RegressionScore):
name = "CVRMSE"
long_name = "Coefficient of variation of the RMSE"
priority = 110
default_visible = False
def compute_score(self, results):
mean = np.nanmean(results.actual)
if mean < 1e-10:
raise ValueError("Mean value is too small")
return RMSE(results) / mean * 100