[gnuastro-commits] master 561cf18: Library (statistics): STD and sigclip

gnuastro-commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 561cf18: Library (statistics): STD and sigclip

From:	Mohammad Akhlaghi
Subject:	[gnuastro-commits] master 561cf18: Library (statistics): STD and sigclip functions fixed for one input
Date:	Wed, 6 May 2020 21:25:32 -0400 (EDT)

branch: master
commit 561cf18f10769698410ce836422aae81a37a3821
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Library (statistics): STD and sigclip functions fixed for one input
    
    Until now, when the input to the standard deviation and sigma-clipping
    operators only had one element, it would sometimes return NaN results
    (which is wrong, the standard deviation of a single-element dataset is
    0). The reason was that we were simply using the standard equation for the
    standard deviation which can be non-zero due to floating point errors.
    
    With this commit, this has been fixed by not using the equation at all when
    there is only element. In such cases the standard deviation functions will
    just return 0 and the sigma-clipping will just return the four standard
    values (1 for number, value for median and mean and 0 for standard
    deviation).
    
    This bug was found while working on a project with Zahra Sharbaf.
    
    This fixes bug #58315.
---
 NEWS                         |  1 +
 doc/announce-acknowledge.txt |  1 +
 lib/statistics.c             | 94 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/NEWS b/NEWS
index ee91c0a..10ab205 100644
--- a/NEWS
+++ b/NEWS
@@ -132,6 +132,7 @@ See the end of the file for license conditions.
   bug #57921: Arithmetic's interpolation operator not reading metric.
   bug #57989: Warp not accounting for translation in pixel grid.
   bug #57995: Fits lib's date to second function affected by host's timezone.
+  bug #58315: Some NaNs with sigma-clip operators in Arithmetic and one input.
 
 
 
diff --git a/doc/announce-acknowledge.txt b/doc/announce-acknowledge.txt
index 005db25..464ba55 100644
--- a/doc/announce-acknowledge.txt
+++ b/doc/announce-acknowledge.txt
@@ -2,6 +2,7 @@ Alphabetically ordered list to acknowledge in the next release.
 
 Raúl Infante-Sainz
 Alejandro Serrano Borlaff
+Zahra Sharbaf
 Joseph Putko
 
 
diff --git a/lib/statistics.c b/lib/statistics.c
index 2b59ba2..a263eef 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -196,17 +196,30 @@ gal_data_t *
 gal_statistics_std(gal_data_t *input)
 {
   size_t dsize=1, n=0;
-  double s=0.0f, s2=0.0f;
+  double s=0.0f, s2=0.0f, *o;
   gal_data_t *out=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &dsize,
                                  NULL, 1, -1, 1, NULL, NULL, NULL);
 
   /* See if the input actually has any elements. */
-  if(input->size)
-    /* Parse the input. */
-    GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {++n; s += *i; s2 += *i * *i;});
+  o=out->array;
+  switch(input->size)
+    {
+    /* No inputs. */
+    case 0: o[0]=GAL_BLANK_FLOAT64; break;
 
-  /* Write the standard deviation into the output. */
-  *((double *)(out->array)) = n ? sqrt( (s2-s*s/n)/n ) : GAL_BLANK_FLOAT64;
+    /* When we only have a single element, theoretically the standard
+       deviation should be 0. But due to floating-point errors, it will
+       probably not be. So we'll manually set it to zero. */
+    case 1: o[0]=0; break;
+
+    /* More than one element. */
+    default:
+      GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {++n; s += *i; s2 += *i * *i;});
+      o[0]=sqrt( (s2-s*s/n)/n );
+      break;
+    }
+
+  /* Return the output dataset. */
   return out;
 }
 
@@ -221,17 +234,34 @@ gal_data_t *
 gal_statistics_mean_std(gal_data_t *input)
 {
   size_t dsize=2, n=0;
-  double s=0.0f, s2=0.0f;
+  double s=0.0f, s2=0.0f, *o;
   gal_data_t *out=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &dsize,
                                  NULL, 1, -1, 1, NULL, NULL, NULL);
+
   /* See if the input actually has any elements. */
-  if(input->size)
-    /* Parse the input. */
-    GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {++n; s += *i; s2 += *i * *i;});
+  o=out->array;
+  switch(input->size)
+    {
+    /* No inputs. */
+    case 0: o[0]=o[1]=GAL_BLANK_FLOAT64; break;
+
+    /* When we only have a single element, theoretically the standard
+       deviation should be 0. But due to floating-point errors, it will
+       probably not be. So we'll manually set it to zero. */
+    case 1:
+      GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {s+=*i;});
+      o[0]=s; o[1]=0;
+      break;
 
-  /* Write in the output values and return. */
-  ((double *)(out->array))[0] = n ? s/n                  : GAL_BLANK_FLOAT64;
-  ((double *)(out->array))[1] = n ? sqrt( (s2-s*s/n)/n ) : GAL_BLANK_FLOAT64;
+    /* More than one element. */
+    default:
+      GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {++n; s += *i; s2 += *i * *i;});
+      o[0]=s/n;
+      o[1]=sqrt( (s2-s*s/n)/n );
+      break;
+    }
+
+  /* Return the output dataset. */
   return out;
 }
 
@@ -2015,7 +2045,7 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
   uint8_t bytolerance = param>=1.0f ? 0 : 1;
   double oldmed=NAN, oldmean=NAN, oldstd=NAN;
   size_t num=0, one=1, four=4, size, oldsize;
-  gal_data_t *median_i, *median_d, *out, *meanstd;
+  gal_data_t *fcopy, *median_i, *median_d, *out, *meanstd;
   gal_data_t *nbs=gal_statistics_no_blank_sorted(input, inplace);
   size_t maxnum = param>=1.0f ? param : GAL_STATISTICS_SIG_CLIP_MAX_CONVERGE;
 
@@ -2051,21 +2081,45 @@ gal_statistics_sigma_clip(gal_data_t *input, float 
multip, float param,
   /* Only continue processing if we have non-blank elements. */
   oa=out->array;
   nbs_array=nbs->array;
-  if(nbs->size==0)
+  switch(nbs->size)
     {
+    /* There was nothing in the input! */
+    case 0:
       if(!quiet)
         printf("NO SIGMA-CLIPPING: all input elements are blank or input's "
                "size is zero.\n");
       oa[0] = oa[1] = oa[2] = oa[3] = NAN;
-    }
-  else
-    {
-      /* Print the comments. */
+      break;
+
+    /* Only one element, convert it to floating point and put it as the
+       mean and median (the standard deviation will be zero by
+       definition). */
+    case 1:
+      /* Write the values. */
+      fcopy=gal_data_copy_to_new_type(nbs, GAL_TYPE_FLOAT32);
+      oa[0] = 1;
+      oa[1] = *((float *)(fcopy->array));
+      oa[2] = *((float *)(fcopy->array));
+      oa[3] = 0;
+      gal_data_free(fcopy);
+
+      /* Print the comments if requested. */
+      if(!quiet)
+        {
+          printf("%-8s %-10s %-15s %-15s %-15s\n",
+                 "round", "number", "median", "mean", "STD");
+          printf("%-8d %-10.0f %-15g %-15g %-15g\n",
+                 0, oa[0], oa[1], oa[2], oa[3]);
+        }
+      break;
+
+    /* More than one element. */
+    default:
+      /* Print the comments if requested. */
       if(!quiet)
         printf("%-8s %-10s %-15s %-15s %-15s\n",
                "round", "number", "median", "mean", "STD");
 
-
       /* Do the clipping, but first initialize the values that will be
          changed during the clipping: the start of the array and the
          array's size. */

[Prev in Thread]

Current Thread

[Next in Thread]

[gnuastro-commits] master 561cf18: Library (statistics): STD and sigclip functions fixed for one input, Mohammad Akhlaghi <=

Prev by Date: [gnuastro-commits] master c9069ad: Book: corrected typo in column arithmetic
Next by Date: [gnuastro-commits] master 7a2a9c4: Table: New --inpolygon, --outpolygon and --polygon options added
Previous by thread: [gnuastro-commits] master c9069ad: Book: corrected typo in column arithmetic
Next by thread: [gnuastro-commits] master 7a2a9c4: Table: New --inpolygon, --outpolygon and --polygon options added
Index(es):
- Date
- Thread