import numpy as np
import scipy.spatial
from Orange.preprocess.preprocess import RemoveNaNRows, Continuize, Scale
from Orange.projection import LinearProjector, DomainProjection
__all__ = ["FreeViz"]
class FreeVizModel(DomainProjection):
var_prefix = "freeviz"
[docs]
class FreeViz(LinearProjector):
name = 'FreeViz'
supports_sparse = False
preprocessors = [RemoveNaNRows(),
Continuize(multinomial_treatment=Continuize.FirstAsBase),
Scale(scale=Scale.Span)]
projection = FreeVizModel
def __init__(self, weights=None, center=True, scale=True, dim=2, p=1,
initial=None, maxiter=500, alpha=0.1, gravity=None,
atol=1e-5, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.weights = weights
self.center = center
self.scale = scale
self.dim = dim
self.p = p
self.initial = initial
self.maxiter = maxiter
self.alpha = alpha
self.atol = atol
self.gravity = gravity
self.is_class_discrete = False
self.components_ = None
def __call__(self, data):
if data is not None:
self.is_class_discrete = data.domain.class_var.is_discrete
if len([attr for attr in data.domain.attributes
if attr.is_discrete and len(attr.values) > 2]):
raise ValueError("Can not handle discrete variables"
" with more than two values")
return super().__call__(data)
def get_components(self, X, Y):
return self.freeviz(
X, Y, weights=self.weights, center=self.center, scale=self.scale,
dim=self.dim, p=self.p, initial=self.initial,
maxiter=self.maxiter, alpha=self.alpha, atol=self.atol,
gravity=self.gravity,
is_class_discrete=self.is_class_discrete)[1].T
@classmethod
def squareform(cls, d):
"""
Parameters
----------
d : (N * (N - 1) // 2, ) ndarray
A hollow symmetric square array in condensed form
Returns
-------
D : (N, N) ndarray
A symmetric square array in redundant form.
See also
--------
scipy.spatial.distance.squareform
"""
assert d.ndim == 1
return scipy.spatial.distance.squareform(d, checks=False)
@classmethod
def row_v(cls, a):
"""
Return a view of `a` as a row vector.
"""
return a.reshape((1, -1))
@classmethod
def col_v(cls, a):
"""
Return a view of `a` as a column vector.
"""
return a.reshape((-1, 1))
@classmethod
def allclose(cls, a, b, rtol=1e-5, atol=1e-8, equal_nan=False):
# same as np.allclose in numpy==1.10
return np.all(np.isclose(a, b, rtol, atol, equal_nan=equal_nan))
@classmethod
def forces_regression(cls, distances, y, p=1):
y = np.asarray(y)
ydist = scipy.spatial.distance.pdist(y.reshape(-1, 1), "sqeuclidean")
mask = distances > np.finfo(distances.dtype).eps * 100
F = ydist
if p == 1:
F[mask] /= distances[mask]
else:
F[mask] /= distances[mask] ** p
return F
@classmethod
def forces_classification(cls, distances, y, p=1, gravity=None):
diffclass = scipy.spatial.distance.pdist(y.reshape(-1, 1), "hamming") != 0
# handle attractive force
if p == 1:
F = -distances
else:
F = -(distances ** p)
# handle repulsive force
mask = (diffclass &
(distances > np.finfo(distances.dtype).eps * 100))
assert mask.shape == F.shape and mask.dtype == bool
if p == 1:
F[mask] = 1 / distances[mask]
else:
F[mask] = 1 / (distances[mask] ** p)
if gravity is not None:
F[mask] *= -np.sum(F[~mask]) / np.sum(F[mask]) / gravity
return F
@classmethod
def gradient(cls, X, embeddings, forces, embedding_dist=None, weights=None):
X = np.asarray(X)
embeddings = np.asarray(embeddings)
if weights is not None:
weights = np.asarray(weights)
if weights.ndim != 1:
raise ValueError("weights.ndim != 1 ({})".format(weights.ndim))
N, P = X.shape
_, dim = embeddings.shape
if not N == embeddings.shape[0]:
raise ValueError("X and embeddings must have the same length ({}!={})"
.format(X.shape[0], embeddings.shape[0]))
if weights is not None and X.shape[0] != weights.shape[0]:
raise ValueError("X.shape[0] != weights.shape[0] ({}!={})"
.format(X.shape[0], weights.shape[0]))
# all pairwise vector differences between embeddings
embedding_diff = (embeddings[:, np.newaxis, :] -
embeddings[np.newaxis, :, :])
assert embedding_diff.shape == (N, N, dim)
assert cls.allclose(embedding_diff[0, 1], embeddings[0] - embeddings[1])
assert cls.allclose(embedding_diff[1, 0], -embedding_diff[0, 1])
# normalize the direction vectors to unit direction vectors
if embedding_dist is not None:
# use supplied precomputed distances
diff_norm = cls.squareform(embedding_dist)
else:
diff_norm = np.linalg.norm(embedding_diff, axis=2)
mask = diff_norm > np.finfo(diff_norm.dtype).eps * 100
embedding_diff[mask] /= diff_norm[mask][:, np.newaxis]
forces = cls.squareform(forces)
if weights is not None:
# multiply in the instance weights
forces *= cls.row_v(weights)
forces *= cls.col_v(weights)
# multiply unit direction vectors with the force magnitude
F = embedding_diff * forces[:, :, np.newaxis]
assert F.shape == (N, N, dim)
# sum all the forces acting on a particle
F = np.sum(F, axis=0)
assert F.shape == (N, dim)
# Transfer forces to the 'anchors'
# (P, dim) array of gradients
G = X.T.dot(F)
assert G.shape == (P, dim)
return G
@classmethod
def freeviz_gradient(cls, X, y, embedding, p=1, weights=None,
gravity=None, is_class_discrete=False):
"""
Return the gradient for the FreeViz [1]_ projection.
Parameters
----------
X : (N, P) ndarray
The data instance coordinates
y : (N,) ndarray
The instance target/class values
embedding : (N, dim) ndarray
The current FreeViz point embeddings.
p : positive number
The force 'power', e.g. if p=1 (default) the attractive/repulsive
forces follow linear/inverse linear law, for p=2 the forces follow
square/inverse square law, ...
weights : (N, ) ndarray, optional
Optional vector of sample weights.
Returns
-------
G : (P, dim) ndarray
The projection gradient.
.. [1] Janez Demsar, Gregor Leban, Blaz Zupan
FreeViz - An Intelligent Visualization Approach for Class-Labeled
Multidimensional Data Sets, Proceedings of IDAMAP 2005, Edinburgh.
"""
X = np.asarray(X)
y = np.asarray(y)
embedding = np.asarray(embedding)
assert X.ndim == 2 and X.shape[0] == y.shape[0] == embedding.shape[0]
D = scipy.spatial.distance.pdist(embedding)
if is_class_discrete:
forces = cls.forces_classification(D, y, p=p, gravity=gravity)
else:
forces = cls.forces_regression(D, y, p=p)
G = cls.gradient(X, embedding, forces, embedding_dist=D, weights=weights)
return G
@classmethod
def _rotate(cls, A):
"""
Rotate a 2D projection A so the first axis (row in A) is aligned with
vector (1, 0).
"""
assert A.ndim == 2 and A.shape[1] == 2
phi = np.arctan2(A[0, 1], A[0, 0])
R = [[np.cos(-phi), np.sin(-phi)],
[-np.sin(-phi), np.cos(-phi)]]
return np.dot(A, R)
@classmethod
def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1,
initial=None, maxiter=500, alpha=0.1, atol=1e-5, gravity=None,
is_class_discrete=False):
"""
FreeViz
Compute a linear lower dimensional projection to optimize separation
between classes ([1]_).
Parameters
----------
X : (N, P) ndarray
The input data instances
y : (N, ) ndarray
The instance class labels
weights : (N, ) ndarray, optional
Instance weights
center : bool or (P,) ndarray
If `True` then X will have mean subtracted out, if False no
centering is performed. Alternatively can be a P vector to subtract
from X.
scale : bool or (P,) ndarray
If `True` the X's column will be scaled by 1/SD, if False no scaling
is performed. Alternatively can be a P vector to divide X by.
dim : int
The dimension of the projected points/embedding.
p : positive number
The force 'power', e.g. if p=1 (default) the attractive/repulsive
forces follow linear/inverse linear law, for p=2 the forces follow
square/inverse square law, ...
initial : (P, dim) ndarray, optional
Initial projection matrix
maxiter : int
Maximum number of iterations.
alpha : float
The step size ('learning rate')
atol : float
Terminating numerical tolerance (absolute).
Returns
-------
embeddings : (N, dim) ndarray
The point projections (`= X.dot(P)`)
projection : (P, dim)
The projection matrix.
center : (P,) ndarray or None
The translation applied to X (if any).
scale : (P,) ndarray or None
The scaling applied to X (if any).
.. [1] Janez Demsar, Gregor Leban, Blaz Zupan
FreeViz - An Intelligent Visualization Approach for Class-Labeled
Multidimensional Data Sets, Proceedings of IDAMAP 2005, Edinburgh.
"""
needcopy = center is not False or scale is not False
X = np.array(X, copy=needcopy)
y = np.asarray(y)
N, P = X.shape
_N, = y.shape
if N != _N:
raise ValueError("X and y must have the same length")
if weights is not None:
weights = np.asarray(weights)
if isinstance(center, bool):
if center:
center = np.mean(X, axis=0)
else:
center = None
else:
center = np.asarray(center, dtype=X.dtype)
if center.shape != (P, ):
raise ValueError("center.shape != (X.shape[1], ) ({} != {})"
.format(center.shape, (X.shape[1], )))
if isinstance(scale, bool):
if scale:
scale = np.std(X, axis=0)
else:
scale = None
else:
scale = np.asarray(scale, dtype=X.dtype)
if scale.shape != (P, ):
raise ValueError("scale.shape != (X.shape[1],) ({} != {}))"
.format(scale.shape, (P, )))
if initial is not None:
initial = np.asarray(initial)
if initial.ndim != 2 or initial.shape != (P, dim):
raise ValueError
else:
initial = cls.init_random(P, dim)
# initial = np.random.random((P, dim)) * 2 - 1
# Center/scale X if requested
if center is not None:
X -= center
if scale is not None:
scalenonzero = np.abs(scale) > np.finfo(scale.dtype).eps
X[:, scalenonzero] /= scale[scalenonzero]
A = initial
embeddings = np.dot(X, A)
step_i = 0
while step_i < maxiter:
G = cls.freeviz_gradient(X, y, embeddings, p=p, weights=weights,
gravity=gravity,
is_class_discrete=is_class_discrete)
# Scale the changes (the largest anchor move is alpha * radius)
with np.errstate(divide="ignore"): # inf's will be ignored by min
step = np.min(np.linalg.norm(A, axis=1)
/ np.linalg.norm(G, axis=1))
if not np.isfinite(step):
break
step = alpha * step
Anew = A - step * G
# Center anchors (?? This does not seem right; it changes the
# projection axes direction somewhat arbitrarily)
Anew = Anew - np.mean(Anew, axis=0)
# Scale (so that the largest radius is 1)
maxr = np.max(np.linalg.norm(Anew, axis=1))
if maxr >= 0.001:
Anew /= maxr
change = np.linalg.norm(Anew - A, axis=1)
if cls.allclose(change, 0, atol=atol):
break
A = Anew
embeddings = np.dot(X, A)
step_i = step_i + 1
if dim == 2:
A = cls._rotate(A)
return embeddings, A, center, scale
@staticmethod
def init_radial(p):
"""
Return a 2D projection with a circular anchor placement.
"""
assert p > 0
if p == 1:
axes_angle = [0]
elif p == 2:
axes_angle = [0, np.pi / 2]
else:
axes_angle = np.linspace(0, 2 * np.pi, p, endpoint=False)
A = np.c_[np.cos(axes_angle), np.sin(axes_angle)]
return A
@staticmethod
def init_random(p, dim, rstate=None):
if not isinstance(rstate, np.random.RandomState):
rstate = np.random.RandomState(rstate if rstate is not None else 0)
return rstate.rand(p, dim) * 2 - 1