[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnuastro-commits] master 57ee612 1/2: Relative extension of range in au
From: |
Mohammad Akhlaghi |
Subject: |
[gnuastro-commits] master 57ee612 1/2: Relative extension of range in automatic binning in Statistics lib |
Date: |
Thu, 1 Nov 2018 17:33:27 -0400 (EDT) |
branch: master
commit 57ee612d5916da104621837675fce8689de58706
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>
Relative extension of range in automatic binning in Statistics lib
In order to count the last element when estimating the bin range
automatically, Until now, in `gal_statistics_regular_bins' we would simply
add 1e-6 to the maximum value. This would cause problems when the range of
the dataset is smaller than that (for example when using angestroms). Using
a very small value was also not possible, because of floating point errors.
So now, the histogram function uses a different check when it is
considering the last bin. This fixes all the complications above.
---
doc/gnuastro.texi | 16 ++++++++++++----
lib/statistics.c | 25 ++++++++++++++++++-------
2 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 7699ce4..c2977ed 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -27622,10 +27622,18 @@ shifted to accommodate this request.
@deftypefun {gal_data_t *} gal_statistics_histogram (gal_data_t @code{*input},
gal_data_t @code{*bins}, int @code{normalize}, int @code{maxone})
Make a histogram of all the elements in the given dataset with bin values
that are defined in the @code{inbins} structure (see
address@hidden). @code{inbins} is not mandatory, if you
-pass a @code{NULL} pointer, the bins structure will be built within this
-function based on the @code{numbins} input. As a result, when you have
-already defined the bins, @code{numbins} is not used.
address@hidden, they currently have to be equally
+spaced). @code{inbins} is not mandatory, if you pass a @code{NULL} pointer,
+the bins structure will be built within this function based on the
address@hidden input. As a result, when you have already defined the bins,
address@hidden is not used.
+
+Let's write the center of the @mymath{i}th element of the bin array as
address@hidden, and the fixed half-bin width as @mymath{h}. Then element
address@hidden of the input array (@mymath{in_j}) will be counted in
address@hidden if @mymath{(b_i-h) \le in_j < (b_i+h)}. However, if
address@hidden is somewhere in the last bin, the condition changes to
address@hidden(b_i-h) \le in_j \le (b_i+h)}.
@end deftypefun
diff --git a/lib/statistics.c b/lib/statistics.c
index 981bbec..11a04d1 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -1550,7 +1550,9 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t
*inrange,
{
tmp=gal_data_copy_to_new_type_free(gal_statistics_maximum(input),
GAL_TYPE_FLOAT64);
- max=*((double *)(tmp->array))+1e-6;
+ max=*((double *)(tmp->array));
+
+ /* Clean up. */
gal_data_free(tmp);
}
else max=ra[1];
@@ -1568,7 +1570,9 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t
*inrange,
gal_data_free(tmp);
tmp=gal_data_copy_to_new_type_free(gal_statistics_maximum(input),
GAL_TYPE_FLOAT64);
- max=*((double *)(tmp->array)) + 1e-6;
+ max=*((double *)(tmp->array));
+
+ /* Clean up. */
gal_data_free(tmp);
}
@@ -1605,7 +1609,7 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t
*inrange,
printf("onebinstart: %.10f\n", onebinstart);
printf("binwidth: %g\n", binwidth);
for(i=0;i<numbins;++i)
- printf("%zu: %.4f\t(%f, %f)\n", i, b[i], b[i]-hbw, b[i]+hbw);
+ printf("%zu: %.4g\t(%g, %g)\n", i, b[i], b[i]-hbw, b[i]+hbw);
*/
/* Set the status of the bins to regular and return. */
@@ -1626,7 +1630,13 @@ gal_statistics_regular_bins(gal_data_t *input,
gal_data_t *inrange,
#define HISTOGRAM_TYPESET(IT) { \
IT *a=input->array, *af=a+input->size; \
- do if( *a>=min && *a<max) ++h[ (size_t)( (*a-min)/binwidth ) ]; \
+ do \
+ if(*a>=min) \
+ { \
+ h_i=(*a-min)/binwidth; \
+ if(h_i == last_i) { if( *a<=max ) ++h[ h_i ]; } \
+ else { if( *a< max ) ++h[ h_i ]; } \
+ } \
while(++a<af); \
}
@@ -1634,9 +1644,9 @@ gal_data_t *
gal_statistics_histogram(gal_data_t *input, gal_data_t *bins, int normalize,
int maxone)
{
- size_t *h;
float *f, *ff;
gal_data_t *hist;
+ size_t *h, h_i, last_i;
double *d, min, max, ref=NAN, binwidth;
@@ -1668,8 +1678,9 @@ gal_statistics_histogram(gal_data_t *input, gal_data_t
*bins, int normalize,
/* Set the minimum and maximum range of the histogram from the bins. */
d=bins->array;
binwidth=d[1]-d[0];
- max = d[bins->size - 1] + binwidth/2;
- min = d[0] - binwidth/2;
+ last_i=bins->size-1;
+ min = d[ 0 ] - binwidth/2;
+ max = d[ last_i ] + binwidth/2;
/* Go through all the elements and find out which bin they belong to. */