# HG changeset patch
# User JD Walsh <address@hidden>
# Date 1414293018 14400
# Node ID 15e4e467c9c086f65395c9fbff39d9d8546acd35
# Parent  30a92d9cbe89ad7f5a3068f83705c1e391804870
cmdscale rewritten for MatLab compatibility

diff -r 30a92d9cbe89 -r 15e4e467c9c0 inst/cmdscale.m
--- a/inst/cmdscale.m	Fri Oct 17 15:00:28 2014 +0200
+++ b/inst/cmdscale.m	Sat Oct 25 23:10:18 2014 -0400
@@ -1,54 +1,139 @@
-## Copyright (C) 2014 Nir Krakauer
+## Copyright (C) 2014 JD Walsh <address@hidden>
 ##
-## This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
+## This program is free software; you can redistribute it and/or modify 
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
 ##
-## This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
 ##
-## You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>.
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
 ## @deftypefn {Function File} @var{Y} = cmdscale (@var{D})
-## @deftypefnx{Function File} address@hidden, @var{e}] = cmdscale (@var{D})
-## Classical multidimensional scaling of a matrix. Also known as principal coordinates analysis.
+## @deftypefnx{Function File} address@hidden, @var{e} ] = cmdscale (@var{D})
+## Classical multidimensional scaling of a matrix.
 ##
-## Given an @var{n} by @var{n} Euclidean distance matrix @var{D}, find @var{n} points in @var{p} dimensional space which have this distance matrix. The coordinates of the points @var{Y} are returned.
-## 
-## @var{D} should be a full distance matrix (hollow, symmetric, entries obeying the triangle inequality), or can be a vector of length @code{n(n-1)/2} containing the upper triangular elements of the distance matrix (such as that returned by the pdist function). If @var{D} is not a valid distance matrix, points @var{Y} will be returned whose distance matrix approximates @var{D}.
+## Takes an @var{n} by @var{n} distance (or difference, similarity, or
+## dissimilarity) matrix @var{D}. Returns @var{Y}, a matrix of @var{n} points
+## with coordinates in @var{p} dimensional space which approximate those
+## distances (or differences, similarities, or dissimilarities). Also returns
+## the eigenvalues @var{e} of
+## @address@hidden = -1/2 * @var{J} * (@var{D}.^2) * @var{J}}, where
+## @code{J = eye(@var{n}) - ones(@var{n},@var{n})/@var{n}}. @var{p}, the number
+## of columns of @var{Y}, is equal to the number of positive real eigenvalues of
+## @var{B}.
 ##
-## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible coordinates of the points in @var{p} dimensional space (@code{p < n}). Of course, any translation, rotation, or reflection of these would also have the same distance matrix.
-## 
-## Can also return the eigenvalues @var{e} of @code{(D(1, :) .^ 2 +  D(:, 1) .^ 2 - D .^ 2) / 2}, where the number of positive eigenvalues determines @var{p}.
+## @var{D} can be a full or sparse matrix or a vector of length
+## @address@hidden(@var{n}-1)/2} containing the upper triangular elements (like
+## the output of the @code{pdist} function). It must be symmetric with
+## non-negative entries whose values are further restricted by the type of
+## matrix being represented:
 ##
-## Reference: Rudolf Mathar (1985), The best Euclidian fit to a given distance matrix in prescribed dimensions, Linear Algebra and its Applications, 67: 1-6, doi: 10.1016/0024-3795(85)90181-8
-## 
-## @seealso{pdist, squareform}
+## * If @var{D} is either a distance, dissimilarity, or difference matrix, then
+## it must have zero entries along the main diagonal. In this case the points
+## @var{Y} equal or approximate the distances given by @var{D}.
+##
+## * If @var{D} is a similarity matrix, the elements must all be less than or
+## equal to one, with ones along the the main diagonal. In this case the points
+## @var{Y} equal or approximate the distances given by
+## @address@hidden = sqrt(ones(@var{n},@var{n})address@hidden)}.
+##
+## @var{D} is a Euclidean matrix if and only if @var{B} is positive
+## semi-definite. When this is the case, then @var{Y} is an exact representation
+## of the distances given in @var{D}. If @var{D} is non-Euclidean, @var{Y} only
+## approximates the distance given in @var{D}. The approximation used by
+## @code{cmdscale} minimizes the statistical loss function known as
+## @var{strain}.
+##
+## The returned @var{Y} is an @var{n} by @var{p} matrix showing possible
+## coordinates of the points in @var{p} dimensional space
+## (@address@hidden < @var{n}}). The columns are correspond to the positive
+## eigenvalues of @var{B} in descending order. A translation, rotation, or
+## reflection of the coordinates given by @var{Y} will satisfy the same distance
+## matrix up to the limits of machine precision.
+##
+## For any @address@hidden <= @var{p}}, if the largest @var{k} positive
+## eigenvalues of @var{B} are significantly greater in absolute magnitude than
+## its other eigenvalues, the first @var{k} columns of @var{Y} provide a
+## @var{k}-dimensional reduction of @var{Y} which approximates the distances
+## given by @var{D}. The optional return @var{e} can be used to consider various
+## values of @var{k}, or to evaluate the accuracy of specific dimension
+## reductions (e.g., @address@hidden = 2}).
+##
+## Reference: Ingwer Borg and Patrick J.F. Groenen (2005), Modern
+## Multidimensional Scaling, Second Edition, Springer, ISBN: 978-0-387-25150-9
+## (Print) 978-0-387-28981-6 (Online)
+##
+## @seealso{pdist}
 ## @end deftypefn
 
-## Author: Nir Krakauer <address@hidden>
+## Author: JD Walsh <address@hidden>
+## Created: 2014-10-25
 ## Description: Classical multidimensional scaling
+## Keywords: multidimensional-scaling mds distance clustering
+
+## TO DO: include missing functions `mdscale' and `procrustes' in @seealso
 
 function [Y, e] = cmdscale (D)
-  
+
+  % Check for input
+  if (nargin == 0)
+    print_usage ();
+  endif
+
+  % Convert vector input to matrix
   if isvector (D)
     D = squareform (D);
   endif
-  
-  warning ("off", "Octave:broadcast","local");
-  M = (D(1, :) .^ 2 +  D(:, 1) .^ 2 - D .^ 2) / 2;
 
-  [v e] = eig(M);
-  
-  e = diag(e);
-  pe = (e > 0); #positive eigenvalues
-  
-  Y = v(:, pe) * diag(sqrt(e(pe)));
- 
-  
+  n = size (D,1);
+  % Check for valid format (see help above); If similarity matrix, convert
+  if (~all (D >= 0))
+    print_usage ();
+  elseif (trace (D) ~= 0)
+      if ((~all (diag (D) == 1)) || (~all (D <= 1)))
+        print_usage ();
+      endif
+      D = sqrt (ones (n,n) - D);
+  endif
+
+  % Build centering matrix, perform double centering, extract eigenpairs
+  J = eye (n) - ones (n,n) / n;
+  B = -1 / 2 * J * (D .^ 2) * J;
+  [Q, e] = eig (B);
+  e = diag (e);
+  etmp = e;
+
+  % Remove complex eigenpairs (only possible due to machine approximation)
+  if (iscomplex (etmp))
+    for i = 1 : size (etmp,1)
+      cmp(i) = (isreal (etmp(i)));
+    endfor
+    etmp = etmp(cmp);
+    Q = Q(:,cmp);
+  endif
+
+  % Order eigenpairs
+  [etmp, ord] = sort (etmp, 'descend');
+  Q = Q(:,ord);
+
+  % Remove negative eigenpairs
+  cmp = (etmp > 0);
+  etmp = etmp(cmp);
+  Q = Q(:,cmp);
+
+  % Build output matrix Y
+  Y = Q * diag (sqrt (etmp));
+
 endfunction
 
+%!shared m, n, X, D
+%! m = randi(100) + 1; n = randi(100) + 1; X = rand(m, n); D = pdist(X);
+%!assert(norm(pdist(cmdscale(D))), norm(D), sqrt(eps))
+%!assert(norm(pdist(cmdscale(squareform(D)))), norm(D), sqrt(eps))
 
-%!shared m, n, X, D
-%! m = 4; n = 3; X = rand(m, n); D = pdist(X);
-%!assert(pdist(cmdscale(D)), D, m*n*eps)
-%!assert(pdist(cmdscale(squareform(D))), D, m*n*eps)
-