Source code for Orange.classification.softmax_regression

import numpy as np
from scipy.optimize import fmin_l_bfgs_b

from Orange.classification import Learner, Model
from Orange.data.filter import HasClass
from Orange.preprocess import Continuize, RemoveNaNColumns, Impute, Normalize

__all__ = ["SoftmaxRegressionLearner"]


[docs]class SoftmaxRegressionLearner(Learner): r"""L2 regularized softmax regression classifier. Uses the L-BFGS algorithm to minimize the categorical cross entropy cost with L2 regularization. This model is suitable when dealing with a multi-class classification problem. When using this learner you should: - choose a suitable regularization parameter lambda\_, - consider using many logistic regression models (one for each value of the class variable) instead of softmax regression. Parameters ---------- lambda\_ : float, optional (default=1.0) Regularization parameter. It controls trade-off between fitting the data and keeping parameters small. Higher values of lambda\_ force parameters to be smaller. preprocessors : list, optional Preprocessors are applied to data before training or testing. Default preprocessors: `[RemoveNaNClasses(), RemoveNaNColumns(), Impute(), Continuize(), Normalize()]` - remove columns with all values as NaN - replace NaN values with suitable values - continuize all discrete attributes, - transform the dataset so that the columns are on a similar scale, fmin_args : dict, optional Parameters for L-BFGS algorithm. """ name = 'softmax' preprocessors = [HasClass(), RemoveNaNColumns(), Impute(), Continuize(), Normalize()] def __init__(self, lambda_=1.0, preprocessors=None, **fmin_args): super().__init__(preprocessors=preprocessors) self.lambda_ = lambda_ self.fmin_args = fmin_args self.num_classes = None def cost_grad(self, theta_flat, X, Y): theta = theta_flat.reshape((self.num_classes, X.shape[1])) M = X.dot(theta.T) P = np.exp(M - np.max(M, axis=1)[:, None]) P /= np.sum(P, axis=1)[:, None] cost = -np.sum(np.log(P) * Y) cost += self.lambda_ * theta_flat.dot(theta_flat) / 2.0 cost /= X.shape[0] grad = X.T.dot(P - Y).T grad += self.lambda_ * theta grad /= X.shape[0] return cost, grad.ravel() def fit(self, X, Y, W=None): if len(Y.shape) > 1: raise ValueError('Softmax regression does not support ' 'multi-label classification') if np.isnan(np.sum(X)) or np.isnan(np.sum(Y)): raise ValueError('Softmax regression does not support ' 'unknown values') X = np.hstack((X, np.ones((X.shape[0], 1)))) self.num_classes = np.unique(Y).size Y = np.eye(self.num_classes)[Y.ravel().astype(int)] theta = np.zeros(self.num_classes * X.shape[1]) theta, j, ret = fmin_l_bfgs_b(self.cost_grad, theta, args=(X, Y), **self.fmin_args) theta = theta.reshape((self.num_classes, X.shape[1])) return SoftmaxRegressionModel(theta)
class SoftmaxRegressionModel(Model): def __init__(self, theta): super().__init__() self.theta = theta def predict(self, X): X = np.hstack((X, np.ones((X.shape[0], 1)))) M = X.dot(self.theta.T) P = np.exp(M - np.max(M, axis=1)[:, None]) P /= np.sum(P, axis=1)[:, None] return P if __name__ == '__main__': import Orange.data def numerical_grad(f, params, e=1e-4): grad = np.zeros_like(params) perturb = np.zeros_like(params) for i in range(params.size): perturb[i] = e j1 = f(params - perturb) j2 = f(params + perturb) grad[i] = (j2 - j1) / (2.0 * e) perturb[i] = 0 return grad d = Orange.data.Table('iris') # gradient check m = SoftmaxRegressionLearner(lambda_=1.0) m.num_classes = 3 Theta = np.random.randn(3 * 4) Y = np.eye(3)[d.Y.ravel().astype(int)] ga = m.cost_grad(Theta, d.X, Y)[1] gn = numerical_grad(lambda t: m.cost_grad(t, d.X, Y)[0], Theta) print(ga) print(gn)