Source code for Orange.classification.majority

from hashlib import sha1

import numpy as np

from Orange.classification import Learner, Model
from Orange.statistics import distribution

__all__ = ["MajorityLearner"]


[docs] class MajorityLearner(Learner): """ A majority classifier. Always returns most frequent class from the training set, regardless of the attribute values from the test data instance. Returns class value distribution if class probabilities are requested. Can be used as a baseline when comparing classifiers. In the special case of uniform class distribution within the training data, class value is selected randomly. In order to produce consistent results on the same dataset, this value is selected based on hash of the class vector. """
[docs] def fit_storage(self, dat): if not dat.domain.has_discrete_class: raise ValueError("classification.MajorityLearner expects a domain " "with a (single) categorical variable") dist = distribution.get_distribution(dat, dat.domain.class_var) N = dist.sum() if N > 0: dist /= N else: dist.fill(1 / len(dist)) probs = np.array(dist) ties = np.flatnonzero(probs == probs.max()) if len(ties) > 1: random_idx = int(sha1(np.ascontiguousarray(dat.Y).data) .hexdigest(), 16) % len(ties) unif_maj = ties[random_idx] else: unif_maj = None return ConstantModel(dist=dist, unif_maj=unif_maj)
class ConstantModel(Model): """ A classification model that returns a given class value. """ def __init__(self, dist, unif_maj=None): """ Constructs `Orange.classification.MajorityModel` that always returns majority value of given distribution. If no or empty distribution given, constructs a model that returns equal probabilities for each class value. :param dist: domain for the `Table` :param unif_maj: majority class for the special case of uniform class distribution in the training data :type dist: Orange.statistics.distribution.Discrete :return: regression model that returns majority value :rtype: Orange.classification.Model """ self.dist = np.array(dist) self.unif_maj = unif_maj def predict(self, X): """ Returns majority class for each given instance in X. :param X: data table for which to make predictions :type X: Orange.data.Table :return: predicted value :rtype: vector of majority values """ probs = np.tile(self.dist, (X.shape[0], 1)) if self.unif_maj is not None: value = np.tile(self.unif_maj, (X.shape[0], )) return value, probs return probs def __str__(self): return 'ConstantModel {}'.format(self.dist) MajorityLearner.__returns__ = ConstantModel