Source code for Orange.regression.simple_random_forest

import numpy as np

from Orange.regression import Learner
from Orange.classification.simple_random_forest import SimpleRandomForestModel as SRFM

__all__ = ['SimpleRandomForestLearner']

[docs] class SimpleRandomForestLearner(Learner): """ A random forest regressor, optimized for speed. Trees in the forest are constructed with :obj:`SimpleTreeLearner` classification trees. Parameters ---------- n_estimators : int, optional (default = 10) Number of trees in the forest. min_instances : int, optional (default = 2) Minimal number of data instances in leaves. When growing the three, new nodes are not introduced if they would result in leaves with fewer instances than min_instances. Instance count is weighed. max_depth : int, optional (default = 1024) Maximal depth of tree. max_majority : float, optional (default = 1.0) Maximal proportion of majority class. When this is exceeded, induction stops (only used for classification). skip_prob : string, optional (default = "sqrt") Data attribute will be skipped with probability ``skip_prob``. - if float, then skip attribute with this probability. - if "sqrt", then `skip_prob = 1 - sqrt(n_features) / n_features` - if "log2", then `skip_prob = 1 - log2(n_features) / n_features` seed : int, optional (default = 42) Random seed. """ name = 'simple rf reg' def __init__(self, n_estimators=10, min_instances=2, max_depth=1024, max_majority=1.0, skip_prob='sqrt', seed=42): super().__init__() self.n_estimators = n_estimators self.skip_prob = skip_prob self.max_depth = max_depth self.min_instances = min_instances self.max_majority = max_majority self.seed = seed
[docs] def fit_storage(self, data): return SimpleRandomForestModel(self, data)
class SimpleRandomForestModel(SRFM): def __init__(self, learner, data): self.estimators_ = [] self.learn(learner, data) def predict(self, X): p = np.zeros(X.shape[0]) X = np.ascontiguousarray(X) # so that it is a no-op for individual trees for tree in self.estimators_: # SimpleTrees do not have preprocessors and domain conversion # was already handled within this class so we can call tree.predict() directly # instead of going through tree.__call__ pt = tree.predict(X) p += pt p /= len(self.estimators_) return p