gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 57ee612 1/2: Relative extension of range in au


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 57ee612 1/2: Relative extension of range in automatic binning in Statistics lib
Date: Thu, 1 Nov 2018 17:33:27 -0400 (EDT)

branch: master
commit 57ee612d5916da104621837675fce8689de58706
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Relative extension of range in automatic binning in Statistics lib
    
    In order to count the last element when estimating the bin range
    automatically, Until now, in `gal_statistics_regular_bins' we would simply
    add 1e-6 to the maximum value. This would cause problems when the range of
    the dataset is smaller than that (for example when using angestroms). Using
    a very small value was also not possible, because of floating point errors.
    
    So now, the histogram function uses a different check when it is
    considering the last bin. This fixes all the complications above.
---
 doc/gnuastro.texi | 16 ++++++++++++----
 lib/statistics.c  | 25 ++++++++++++++++++-------
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 7699ce4..c2977ed 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -27622,10 +27622,18 @@ shifted to accommodate this request.
 @deftypefun {gal_data_t *} gal_statistics_histogram (gal_data_t @code{*input}, 
gal_data_t @code{*bins}, int @code{normalize}, int @code{maxone})
 Make a histogram of all the elements in the given dataset with bin values
 that are defined in the @code{inbins} structure (see
address@hidden). @code{inbins} is not mandatory, if you
-pass a @code{NULL} pointer, the bins structure will be built within this
-function based on the @code{numbins} input. As a result, when you have
-already defined the bins, @code{numbins} is not used.
address@hidden, they currently have to be equally
+spaced). @code{inbins} is not mandatory, if you pass a @code{NULL} pointer,
+the bins structure will be built within this function based on the
address@hidden input. As a result, when you have already defined the bins,
address@hidden is not used.
+
+Let's write the center of the @mymath{i}th element of the bin array as
address@hidden, and the fixed half-bin width as @mymath{h}. Then element
address@hidden of the input array (@mymath{in_j}) will be counted in
address@hidden if @mymath{(b_i-h) \le in_j < (b_i+h)}. However, if
address@hidden is somewhere in the last bin, the condition changes to
address@hidden(b_i-h) \le in_j \le (b_i+h)}.
 @end deftypefun
 
 
diff --git a/lib/statistics.c b/lib/statistics.c
index 981bbec..11a04d1 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -1550,7 +1550,9 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t 
*inrange,
             {
               tmp=gal_data_copy_to_new_type_free(gal_statistics_maximum(input),
                                                  GAL_TYPE_FLOAT64);
-              max=*((double *)(tmp->array))+1e-6;
+              max=*((double *)(tmp->array));
+
+              /* Clean up. */
               gal_data_free(tmp);
             }
           else max=ra[1];
@@ -1568,7 +1570,9 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t 
*inrange,
       gal_data_free(tmp);
       tmp=gal_data_copy_to_new_type_free(gal_statistics_maximum(input),
                                          GAL_TYPE_FLOAT64);
-      max=*((double *)(tmp->array)) + 1e-6;
+      max=*((double *)(tmp->array));
+
+      /* Clean up. */
       gal_data_free(tmp);
     }
 
@@ -1605,7 +1609,7 @@ gal_statistics_regular_bins(gal_data_t *input, gal_data_t 
*inrange,
   printf("onebinstart: %.10f\n", onebinstart);
   printf("binwidth: %g\n", binwidth);
   for(i=0;i<numbins;++i)
-    printf("%zu: %.4f\t(%f, %f)\n", i, b[i], b[i]-hbw, b[i]+hbw);
+    printf("%zu: %.4g\t(%g, %g)\n", i, b[i], b[i]-hbw, b[i]+hbw);
   */
 
   /* Set the status of the bins to regular and return. */
@@ -1626,7 +1630,13 @@ gal_statistics_regular_bins(gal_data_t *input, 
gal_data_t *inrange,
 
 #define HISTOGRAM_TYPESET(IT) {                                         \
     IT *a=input->array, *af=a+input->size;                              \
-    do if( *a>=min && *a<max) ++h[ (size_t)( (*a-min)/binwidth ) ];     \
+    do                                                                  \
+      if(*a>=min)                                                       \
+        {                                                               \
+          h_i=(*a-min)/binwidth;                                        \
+          if(h_i == last_i) { if( *a<=max ) ++h[ h_i ]; }               \
+          else              { if( *a< max ) ++h[ h_i ]; }               \
+        }                                                               \
     while(++a<af);                                                      \
   }
 
@@ -1634,9 +1644,9 @@ gal_data_t *
 gal_statistics_histogram(gal_data_t *input, gal_data_t *bins, int normalize,
                          int maxone)
 {
-  size_t *h;
   float *f, *ff;
   gal_data_t *hist;
+  size_t *h, h_i, last_i;
   double *d, min, max, ref=NAN, binwidth;
 
 
@@ -1668,8 +1678,9 @@ gal_statistics_histogram(gal_data_t *input, gal_data_t 
*bins, int normalize,
   /* Set the minimum and maximum range of the histogram from the bins. */
   d=bins->array;
   binwidth=d[1]-d[0];
-  max = d[bins->size - 1] + binwidth/2;
-  min = d[0]              - binwidth/2;
+  last_i=bins->size-1;
+  min = d[ 0      ] - binwidth/2;
+  max = d[ last_i ] + binwidth/2;
 
 
   /* Go through all the elements and find out which bin they belong to. */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]