# HG changeset patch
# User JD Walsh
# Date 1414293018 14400
# Node ID 15e4e467c9c086f65395c9fbff39d9d8546acd35
# Parent 30a92d9cbe89ad7f5a3068f83705c1e391804870
cmdscale rewritten for MatLab compatibility
diff -r 30a92d9cbe89 -r 15e4e467c9c0 inst/cmdscale.m
--- a/inst/cmdscale.m Fri Oct 17 15:00:28 2014 +0200
+++ b/inst/cmdscale.m Sat Oct 25 23:10:18 2014 -0400
@@ -1,54 +1,139 @@
-## Copyright (C) 2014 Nir Krakauer
+## Copyright (C) 2014 JD Walsh
##
-## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
##
-## This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
##
-## You should have received a copy of the GNU General Public License along with this program; if not, see .
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see .
## -*- texinfo -*-
## @deftypefn {Function File} @var{Y} = cmdscale (@var{D})
-## @deftypefnx{Function File} address@hidden, @var{e}] = cmdscale (@var{D})
-## Classical multidimensional scaling of a matrix. Also known as principal coordinates analysis.
+## @deftypefnx{Function File} address@hidden, @var{e} ] = cmdscale (@var{D})
+## Classical multidimensional scaling of a matrix.
##
-## Given an @var{n} by @var{n} Euclidean distance matrix @var{D}, find @var{n} points in @var{p} dimensional space which have this distance matrix. The coordinates of the points @var{Y} are returned.
-##
-## @var{D} should be a full distance matrix (hollow, symmetric, entries obeying the triangle inequality), or can be a vector of length @code{n(n-1)/2} containing the upper triangular elements of the distance matrix (such as that returned by the pdist function). If @var{D} is not a valid distance matrix, points @var{Y} will be returned whose distance matrix approximates @var{D}.
+## Takes an @var{n} by @var{n} distance (or difference, similarity, or
+## dissimilarity) matrix @var{D}. Returns @var{Y}, a matrix of @var{n} points
+## with coordinates in @var{p} dimensional space which approximate those
+## distances (or differences, similarities, or dissimilarities). Also returns
+## the eigenvalues @var{e} of
+## @address@hidden = -1/2 * @var{J} * (@var{D}.^2) * @var{J}}, where
+## @code{J = eye(@var{n}) - ones(@var{n},@var{n})/@var{n}}. @var{p}, the number
+## of columns of @var{Y}, is equal to the number of positive real eigenvalues of
+## @var{B}.
##
-## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible coordinates of the points in @var{p} dimensional space (@code{p < n}). Of course, any translation, rotation, or reflection of these would also have the same distance matrix.
-##
-## Can also return the eigenvalues @var{e} of @code{(D(1, :) .^ 2 + D(:, 1) .^ 2 - D .^ 2) / 2}, where the number of positive eigenvalues determines @var{p}.
+## @var{D} can be a full or sparse matrix or a vector of length
+## @address@hidden(@var{n}-1)/2} containing the upper triangular elements (like
+## the output of the @code{pdist} function). It must be symmetric with
+## non-negative entries whose values are further restricted by the type of
+## matrix being represented:
##
-## Reference: Rudolf Mathar (1985), The best Euclidian fit to a given distance matrix in prescribed dimensions, Linear Algebra and its Applications, 67: 1-6, doi: 10.1016/0024-3795(85)90181-8
-##
-## @seealso{pdist, squareform}
+## * If @var{D} is either a distance, dissimilarity, or difference matrix, then
+## it must have zero entries along the main diagonal. In this case the points
+## @var{Y} equal or approximate the distances given by @var{D}.
+##
+## * If @var{D} is a similarity matrix, the elements must all be less than or
+## equal to one, with ones along the the main diagonal. In this case the points
+## @var{Y} equal or approximate the distances given by
+## @address@hidden = sqrt(ones(@var{n},@var{n})address@hidden)}.
+##
+## @var{D} is a Euclidean matrix if and only if @var{B} is positive
+## semi-definite. When this is the case, then @var{Y} is an exact representation
+## of the distances given in @var{D}. If @var{D} is non-Euclidean, @var{Y} only
+## approximates the distance given in @var{D}. The approximation used by
+## @code{cmdscale} minimizes the statistical loss function known as
+## @var{strain}.
+##
+## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible
+## coordinates of the points in @var{p} dimensional space
+## (@address@hidden < @var{n}}). The columns are correspond to the positive
+## eigenvalues of @var{B} in descending order. A translation, rotation, or
+## reflection of the coordinates given by @var{Y} will satisfy the same distance
+## matrix up to the limits of machine precision.
+##
+## For any @address@hidden <= @var{p}}, if the largest @var{k} positive
+## eigenvalues of @var{B} are significantly greater in absolute magnitude than
+## its other eigenvalues, the first @var{k} columns of @var{Y} provide a
+## @var{k}-dimensional reduction of @var{Y} which approximates the distances
+## given by @var{D}. The optional return @var{e} can be used to consider various
+## values of @var{k}, or to evaluate the accuracy of specific dimension
+## reductions (e.g., @address@hidden = 2}).
+##
+## Reference: Ingwer Borg and Patrick J.F. Groenen (2005), Modern
+## Multidimensional Scaling, Second Edition, Springer, ISBN: 978-0-387-25150-9
+## (Print) 978-0-387-28981-6 (Online)
+##
+## @seealso{pdist}
## @end deftypefn
-## Author: Nir Krakauer
+## Author: JD Walsh
+## Created: 2014-10-25
## Description: Classical multidimensional scaling
+## Keywords: multidimensional-scaling mds distance clustering
+
+## TO DO: include missing functions `mdscale' and `procrustes' in @seealso
function [Y, e] = cmdscale (D)
-
+
+ % Check for input
+ if (nargin == 0)
+ print_usage ();
+ endif
+
+ % Convert vector input to matrix
if isvector (D)
D = squareform (D);
endif
-
- warning ("off", "Octave:broadcast","local");
- M = (D(1, :) .^ 2 + D(:, 1) .^ 2 - D .^ 2) / 2;
- [v e] = eig(M);
-
- e = diag(e);
- pe = (e > 0); #positive eigenvalues
-
- Y = v(:, pe) * diag(sqrt(e(pe)));
-
-
+ n = size (D,1);
+ % Check for valid format (see help above); If similarity matrix, convert
+ if (~all (D >= 0))
+ print_usage ();
+ elseif (trace (D) ~= 0)
+ if ((~all (diag (D) == 1)) || (~all (D <= 1)))
+ print_usage ();
+ endif
+ D = sqrt (ones (n,n) - D);
+ endif
+
+ % Build centering matrix, perform double centering, extract eigenpairs
+ J = eye (n) - ones (n,n) / n;
+ B = -1 / 2 * J * (D .^ 2) * J;
+ [Q, e] = eig (B);
+ e = diag (e);
+ etmp = e;
+
+ % Remove complex eigenpairs (only possible due to machine approximation)
+ if (iscomplex (etmp))
+ for i = 1 : size (etmp,1)
+ cmp(i) = (isreal (etmp(i)));
+ endfor
+ etmp = etmp(cmp);
+ Q = Q(:,cmp);
+ endif
+
+ % Order eigenpairs
+ [etmp, ord] = sort (etmp, 'descend');
+ Q = Q(:,ord);
+
+ % Remove negative eigenpairs
+ cmp = (etmp > 0);
+ etmp = etmp(cmp);
+ Q = Q(:,cmp);
+
+ % Build output matrix Y
+ Y = Q * diag (sqrt (etmp));
+
endfunction
+%!shared m, n, X, D
+%! m = randi(100) + 1; n = randi(100) + 1; X = rand(m, n); D = pdist(X);
+%!assert(norm(pdist(cmdscale(D))), norm(D), sqrt(eps))
+%!assert(norm(pdist(cmdscale(squareform(D)))), norm(D), sqrt(eps))
-%!shared m, n, X, D
-%! m = 4; n = 3; X = rand(m, n); D = pdist(X);
-%!assert(pdist(cmdscale(D)), D, m*n*eps)
-%!assert(pdist(cmdscale(squareform(D))), D, m*n*eps)
-