clear all; NN = 2.^(2:23); % data sizes MM = 2.^(0:10); % number of bins xx = randn(max(NN), 1); Te = Tl = Tc = Ts = zeros(length(NN), length(MM)); for iN = 1:length(NN) x = xx(1:NN(iN)); for iM = 1:length(MM) nbins = MM(iM); tic; min_val = min (x); max_val = max (x); edges = min_val + (max_val - min_val) * (0:nbins) / nbins; binwidth = edges(2) - edges(1); bin = 1 + floor ((x - min_val) / binwidth); % compute bin indices in O(Nx) bin(x > edges(end)) = 0; % set too large entries to zero idx = bin(bin != 0); % remove all out-of-scope entries n = accumarray (idx, 1, [nbins+1, 1])'; % cout number of identical indices to get the histogram, in O(Nx) n = [n(1:end-2), n(end-1)+n(end)]; % last bin should also contain right edge Te(iN, iM) = toc; end end save -ascii 'hist_bench_equi.dat' Te; for iN = 1:length(NN) x = xx(1:NN(iN)); for iM = 1:length(MM) nbins = MM(iM); tic; min_val = min (x); max_val = max (x); edges = min_val + (max_val - min_val) * (0:nbins) / nbins; bin = lookup (edges, x); % compute bin indices in O(Nx*log(nbins)), all x < edges(1) will be zero bin(x > edges(end)) = 0; % set too large entries to zero idx = bin(bin != 0); % remove all out-of-scope entries n = accumarray (idx, 1, [nbins+1, 1])'; % cout number of identical indices to get the histogram, in O(Nx) n = [n(1:end-2), n(end-1)+n(end)]; % last bin should also contain right edge Tl(iN, iM) = toc; end end save -ascii 'hist_bench_lookup.dat' Tl; for iN = 1:length(NN) x = xx(1:NN(iN)); for iM = 1:length(MM) nbins = MM(iM); tic; len = NN(iN); min_val = min (x); max_val = max (x); m = [0.5:nbins]'/nbins; m = (max_val - min_val) * m + min_val * ones (size (m)); cutoff = (m(1:end-1,:) + m(2:end,:)) / 2; [s, idx] = sort ([x; cutoff]); chist = cumsum (idx <= len); chist = [0; chist(idx > len); chist(end)-sum(isnan(x))]; freq = diff (chist); Ts(iN, iM) = toc; end end save -ascii 'hist_bench_sort.dat' Ts; for iN = 1:length(NN) x = xx(1:NN(iN)); for iM = 1:length(MM) nbins = MM(iM); tic; min_val = min (x); max_val = max (x); m = [0.5:nbins]'/nbins; m = (max_val - min_val) * m + min_val * ones (size (m)); cutoff = (m(1:end-1,:) + m(2:end,:)) / 2; chist = zeros (nbins+1, 1); for i = 1:nbins-1 chist(i+1,:) = sum (x <= cutoff(i)); endfor chist(nbins+1,:) = sum (! isnan (x)); freq = diff (chist); Tc(iN, iM) = toc; end end save -ascii 'hist_bench_cdf.dat' Tc; loglog(NN,Te,'b', NN,Tl,'g', NN,Tc,'r', NN,Ts,'k')