# HG changeset patch # User JD Walsh # Date 1414293018 14400 # Node ID 15e4e467c9c086f65395c9fbff39d9d8546acd35 # Parent 30a92d9cbe89ad7f5a3068f83705c1e391804870 cmdscale rewritten for MatLab compatibility diff -r 30a92d9cbe89 -r 15e4e467c9c0 inst/cmdscale.m --- a/inst/cmdscale.m Fri Oct 17 15:00:28 2014 +0200 +++ b/inst/cmdscale.m Sat Oct 25 23:10:18 2014 -0400 @@ -1,54 +1,139 @@ -## Copyright (C) 2014 Nir Krakauer +## Copyright (C) 2014 JD Walsh ## -## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. ## -## This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. ## -## You should have received a copy of the GNU General Public License along with this program; if not, see . +## You should have received a copy of the GNU General Public License +## along with this program; if not, see . ## -*- texinfo -*- ## @deftypefn {Function File} @var{Y} = cmdscale (@var{D}) -## @deftypefnx{Function File} address@hidden, @var{e}] = cmdscale (@var{D}) -## Classical multidimensional scaling of a matrix. Also known as principal coordinates analysis. +## @deftypefnx{Function File} address@hidden, @var{e} ] = cmdscale (@var{D}) +## Classical multidimensional scaling of a matrix. ## -## Given an @var{n} by @var{n} Euclidean distance matrix @var{D}, find @var{n} points in @var{p} dimensional space which have this distance matrix. The coordinates of the points @var{Y} are returned. -## -## @var{D} should be a full distance matrix (hollow, symmetric, entries obeying the triangle inequality), or can be a vector of length @code{n(n-1)/2} containing the upper triangular elements of the distance matrix (such as that returned by the pdist function). If @var{D} is not a valid distance matrix, points @var{Y} will be returned whose distance matrix approximates @var{D}. +## Takes an @var{n} by @var{n} distance (or difference, similarity, or +## dissimilarity) matrix @var{D}. Returns @var{Y}, a matrix of @var{n} points +## with coordinates in @var{p} dimensional space which approximate those +## distances (or differences, similarities, or dissimilarities). Also returns +## the eigenvalues @var{e} of +## @address@hidden = -1/2 * @var{J} * (@var{D}.^2) * @var{J}}, where +## @code{J = eye(@var{n}) - ones(@var{n},@var{n})/@var{n}}. @var{p}, the number +## of columns of @var{Y}, is equal to the number of positive real eigenvalues of +## @var{B}. ## -## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible coordinates of the points in @var{p} dimensional space (@code{p < n}). Of course, any translation, rotation, or reflection of these would also have the same distance matrix. -## -## Can also return the eigenvalues @var{e} of @code{(D(1, :) .^ 2 + D(:, 1) .^ 2 - D .^ 2) / 2}, where the number of positive eigenvalues determines @var{p}. +## @var{D} can be a full or sparse matrix or a vector of length +## @address@hidden(@var{n}-1)/2} containing the upper triangular elements (like +## the output of the @code{pdist} function). It must be symmetric with +## non-negative entries whose values are further restricted by the type of +## matrix being represented: ## -## Reference: Rudolf Mathar (1985), The best Euclidian fit to a given distance matrix in prescribed dimensions, Linear Algebra and its Applications, 67: 1-6, doi: 10.1016/0024-3795(85)90181-8 -## -## @seealso{pdist, squareform} +## * If @var{D} is either a distance, dissimilarity, or difference matrix, then +## it must have zero entries along the main diagonal. In this case the points +## @var{Y} equal or approximate the distances given by @var{D}. +## +## * If @var{D} is a similarity matrix, the elements must all be less than or +## equal to one, with ones along the the main diagonal. In this case the points +## @var{Y} equal or approximate the distances given by +## @address@hidden = sqrt(ones(@var{n},@var{n})address@hidden)}. +## +## @var{D} is a Euclidean matrix if and only if @var{B} is positive +## semi-definite. When this is the case, then @var{Y} is an exact representation +## of the distances given in @var{D}. If @var{D} is non-Euclidean, @var{Y} only +## approximates the distance given in @var{D}. The approximation used by +## @code{cmdscale} minimizes the statistical loss function known as +## @var{strain}. +## +## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible +## coordinates of the points in @var{p} dimensional space +## (@address@hidden < @var{n}}). The columns are correspond to the positive +## eigenvalues of @var{B} in descending order. A translation, rotation, or +## reflection of the coordinates given by @var{Y} will satisfy the same distance +## matrix up to the limits of machine precision. +## +## For any @address@hidden <= @var{p}}, if the largest @var{k} positive +## eigenvalues of @var{B} are significantly greater in absolute magnitude than +## its other eigenvalues, the first @var{k} columns of @var{Y} provide a +## @var{k}-dimensional reduction of @var{Y} which approximates the distances +## given by @var{D}. The optional return @var{e} can be used to consider various +## values of @var{k}, or to evaluate the accuracy of specific dimension +## reductions (e.g., @address@hidden = 2}). +## +## Reference: Ingwer Borg and Patrick J.F. Groenen (2005), Modern +## Multidimensional Scaling, Second Edition, Springer, ISBN: 978-0-387-25150-9 +## (Print) 978-0-387-28981-6 (Online) +## +## @seealso{pdist} ## @end deftypefn -## Author: Nir Krakauer +## Author: JD Walsh +## Created: 2014-10-25 ## Description: Classical multidimensional scaling +## Keywords: multidimensional-scaling mds distance clustering + +## TO DO: include missing functions `mdscale' and `procrustes' in @seealso function [Y, e] = cmdscale (D) - + + % Check for input + if (nargin == 0) + print_usage (); + endif + + % Convert vector input to matrix if isvector (D) D = squareform (D); endif - - warning ("off", "Octave:broadcast","local"); - M = (D(1, :) .^ 2 + D(:, 1) .^ 2 - D .^ 2) / 2; - [v e] = eig(M); - - e = diag(e); - pe = (e > 0); #positive eigenvalues - - Y = v(:, pe) * diag(sqrt(e(pe))); - - + n = size (D,1); + % Check for valid format (see help above); If similarity matrix, convert + if (~all (D >= 0)) + print_usage (); + elseif (trace (D) ~= 0) + if ((~all (diag (D) == 1)) || (~all (D <= 1))) + print_usage (); + endif + D = sqrt (ones (n,n) - D); + endif + + % Build centering matrix, perform double centering, extract eigenpairs + J = eye (n) - ones (n,n) / n; + B = -1 / 2 * J * (D .^ 2) * J; + [Q, e] = eig (B); + e = diag (e); + etmp = e; + + % Remove complex eigenpairs (only possible due to machine approximation) + if (iscomplex (etmp)) + for i = 1 : size (etmp,1) + cmp(i) = (isreal (etmp(i))); + endfor + etmp = etmp(cmp); + Q = Q(:,cmp); + endif + + % Order eigenpairs + [etmp, ord] = sort (etmp, 'descend'); + Q = Q(:,ord); + + % Remove negative eigenpairs + cmp = (etmp > 0); + etmp = etmp(cmp); + Q = Q(:,cmp); + + % Build output matrix Y + Y = Q * diag (sqrt (etmp)); + endfunction +%!shared m, n, X, D +%! m = randi(100) + 1; n = randi(100) + 1; X = rand(m, n); D = pdist(X); +%!assert(norm(pdist(cmdscale(D))), norm(D), sqrt(eps)) +%!assert(norm(pdist(cmdscale(squareform(D)))), norm(D), sqrt(eps)) -%!shared m, n, X, D -%! m = 4; n = 3; X = rand(m, n); D = pdist(X); -%!assert(pdist(cmdscale(D)), D, m*n*eps) -%!assert(pdist(cmdscale(squareform(D))), D, m*n*eps) -