Source code for Orange.data.instance

from itertools import chain
from math import isnan
from numbers import Real, Integral

import numpy as np

from Orange.data import Value, Unknown, DiscreteVariable

__all__ = ["Instance"]


[docs] class Instance: def __init__(self, domain, data=None, id=None): """ Construct a new data instance. :param domain: domain that describes the instance's variables :type domain: Orange.data.Domain :param data: instance's values :type data: Orange.data.Instance or a sequence of values :param id: instance id :type id: hashable value """ if data is None and isinstance(domain, Instance): data = domain domain = data.domain self._domain = domain if data is None: self._x = np.repeat(Unknown, len(domain.attributes)) self._y = np.repeat(Unknown, len(domain.class_vars)) self._metas = np.array([var.Unknown for var in domain.metas], dtype=object) self._weight = 1 elif isinstance(data, Instance) and data.domain == domain: self._x = np.array(data._x) self._y = np.atleast_1d(np.array(data._y)) self._metas = np.array(data._metas) self._weight = data._weight else: self._x, self._y, self._metas = domain.convert(data) self._y = np.atleast_1d(self._y) self._weight = 1 if id is not None: self.id = id else: from Orange.data import Table self.id = Table.new_id() @property def domain(self): """The domain describing the instance's values.""" return self._domain @property def x(self): """ Instance's attributes as a 1-dimensional numpy array whose length equals `len(self.domain.attributes)`. """ return self._x @property def y(self): """ Instance's classes as a 1-dimensional numpy array whose length equals `len(self.domain.attributes)`. """ return self._y @property def metas(self): """ Instance's meta attributes as a 1-dimensional numpy array whose length equals `len(self.domain.attributes)`. """ return self._metas @property def list(self): """ All instance's values, including attributes, classes and meta attributes, as a list whose length equals `len(self.domain.attributes) + len(self.domain.class_vars) + len(self.domain.metas)`. """ n_self, n_metas = len(self), len(self._metas) return [self[i].value if i < n_self else self[n_self - i - 1].value for i in range(n_self + n_metas)] @property def weight(self): """The weight of the data instance. Default is 1.""" return self._weight @weight.setter def weight(self, weight): self._weight = weight def __setitem__(self, key, value): if not isinstance(key, Integral): key = self._domain.index(key) value = self._domain[key].to_val(value) if key >= 0 and not isinstance(value, (int, float)): raise TypeError("Expected primitive value, got '%s'" % type(value).__name__) if 0 <= key < len(self._domain.attributes): self._x[key] = value elif len(self._domain.attributes) <= key: self._y[key - len(self.domain.attributes)] = value else: self._metas[-1 - key] = value def __getitem__(self, key): idx = key if isinstance(key, Integral) else self._domain.index(key) if 0 <= idx < len(self._domain.attributes): value = self._x[idx] elif idx >= len(self._domain.attributes): if self._y.ndim == 0: value = self._y else: value = self._y[idx - len(self.domain.attributes)] else: value = self._metas[-1 - idx] var = self._domain[idx] if isinstance(key, DiscreteVariable) and var is not key: value = key.get_mapper_from(var)(value) var = key return Value(var, value) #TODO Should we return an instance of `object` if we have a meta attribute # that is not Discrete or Continuous? E.g. when we have strings, we'd # like to be able to use startswith, lower etc... # Or should we even return Continuous as floats and use Value only # for discrete attributes?! # Same in Table.__getitem__ @staticmethod def str_values(data, variables, limit=True): if limit: s = ", ".join(var.str_val(val) for var, val in zip(variables, data[:5])) if len(data) > 5: s += ", ..." return s else: return ", ".join(var.str_val(val) for var, val in zip(variables, data)) def _str(self, limit): s = "[" + self.str_values(self._x, self._domain.attributes, limit) if self._domain.class_vars: s += " | " + \ self.str_values(self._y, self._domain.class_vars, limit) s += "]" if self._domain.metas: s += " {" + \ self.str_values(self._metas, self._domain.metas, limit) + \ "}" return s def __str__(self): return self._str(False) def __repr__(self): return self._str(True) def __eq__(self, other): if not isinstance(other, Instance): other = Instance(self._domain, other) def same(x1, x2): nan1 = np.isnan(x1) nan2 = np.isnan(x2) return np.array_equal(nan1, nan2) and \ np.array_equal(x1[~nan1], x2[~nan2]) return same(self._x, other._x) and same(self._y, other._y) \ and all(m1 == m2 or type(m1) == type(m2) == float and isnan(m1) and isnan(m2) for m1, m2 in zip(self._metas, other._metas)) @classmethod def __hash__(cls): raise TypeError(f"unhashable type: '{type(cls.__name__)}'") def __iter__(self): return chain(iter(self._x), iter(self._y)) def values(self): return (Value(var, val) for var, val in zip(self.domain.variables, self)) def __len__(self): return len(self._x) + len(self._y)
[docs] def attributes(self): """Return iterator over the instance's attributes""" return iter(self._x)
[docs] def classes(self): """Return iterator over the instance's class attributes""" return iter(self._y)
# A helper function for get_class and set_class def _check_single_class(self): if not self._domain.class_vars: raise TypeError("Domain has no class variable") elif len(self._domain.class_vars) > 1: raise TypeError("Domain has multiple class variables")
[docs] def get_class(self): """ Return the class value as an instance of :obj:`Orange.data.Value`. Throws an exception if there are multiple classes. """ self._check_single_class() return Value(self._domain.class_var, self._y[0])
[docs] def get_classes(self): """ Return the class value as a list of instances of :obj:`Orange.data.Value`. """ return (Value(var, value) for var, value in zip(self._domain.class_vars, self._y))
[docs] def set_class(self, value): """ Set the instance's class. Throws an exception if there are multiple classes. """ self._check_single_class() if not isinstance(value, Real): self._y[0] = self._domain.class_var.to_val(value) else: self._y[0] = value