gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 1a40218b: MakeCatalog: new --std column for ST


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 1a40218b: MakeCatalog: new --std column for STD of pixels in values image
Date: Sat, 2 Jul 2022 11:52:41 -0400 (EDT)

branch: master
commit 1a40218bd36ac41a0c9d80354109faffe3b0f723
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    MakeCatalog: new --std column for STD of pixels in values image
    
    Until now, there no way to estimate the standard deviation of values of
    each labeled region (objects or clumps) using MakeCatalog.
    
    With this option, the newly added '--std' option will do this job. Until a
    few commits ago, this was the name for the sky standard deviation, so the
    NEWS file was also updated in the "changes" section.
    
    In the process, the calculation of the standard deviation from the sum,
    sum-of-squares and number of elements has been taken into a new Gnuastro
    library function in 'statistics.h': 'gal_statistics_std_from_sums'
---
 NEWS                      | 11 ++++++++++-
 bin/mkcatalog/args.h      | 14 ++++++++++++++
 bin/mkcatalog/columns.c   | 28 ++++++++++++++++++++++++++++
 bin/mkcatalog/main.h      |  2 ++
 bin/mkcatalog/parse.c     | 10 ++++++----
 bin/mkcatalog/ui.c        |  1 +
 bin/mkcatalog/ui.h        |  1 +
 doc/gnuastro.texi         | 19 +++++++++++++++----
 lib/gnuastro/statistics.h |  3 +++
 lib/statistics.c          | 41 ++++++++++++++++++++++++++++++++++-------
 10 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/NEWS b/NEWS
index 4f921ec3..ae27629c 100644
--- a/NEWS
+++ b/NEWS
@@ -117,13 +117,22 @@ See the end of the file for license conditions.
    - gal_arithmetic_load_col: Low-level function for parsing the string of
      the newly added 'load-col' operator, mentioned in the Arithmetic
      section above.
+   - gal_statistics_std_from_sums: return the standard deviation using the
+     sum, sum of squares and number of a distribution.
 
 ** Removed features
 
 ** Changed features
 
   MakeCatalog:
-   --skystd: new name for old '--std' (see bug #62685).
+   --std: measures the standard deviation of the object/clump pixels of the
+          values image. Until now, there was no column to calculate this,
+          and the '--std' column returned the _sky_ standard deviation over
+          the object/clump labels (from the standard deviation image,
+          ignoring the values image). But that was confusing, especially
+          since we have columns like '--mean' or '--median' which only use
+          the values image (see bug #62685).
+   --skystd: new name for old '--std', see description of '--std' above.
    --sfmagnsigma: default value changed to 3 (more commonly used).
    --sfmagarea: default value set to 100 arcsec^2 (more commonly used).
 
diff --git a/bin/mkcatalog/args.h b/bin/mkcatalog/args.h
index 2171c0c9..748cb4da 100644
--- a/bin/mkcatalog/args.h
+++ b/bin/mkcatalog/args.h
@@ -1143,6 +1143,20 @@ struct argp_option program_options[] =
       GAL_OPTIONS_NOT_SET,
       ui_column_codes_ll
     },
+    {
+      "std",
+      UI_KEY_STD,
+      0,
+      0,
+      "Standard dev. of values in object/clump.",
+      UI_GROUP_COLUMNS_BRIGHTNESS,
+      0,
+      GAL_TYPE_INVALID,
+      GAL_OPTIONS_RANGE_ANY,
+      GAL_OPTIONS_NOT_MANDATORY,
+      GAL_OPTIONS_NOT_SET,
+      ui_column_codes_ll
+    },
     {
       "median",
       UI_KEY_MEDIAN,
diff --git a/bin/mkcatalog/columns.c b/bin/mkcatalog/columns.c
index 42c5972e..8c0c9ec9 100644
--- a/bin/mkcatalog/columns.c
+++ b/bin/mkcatalog/columns.c
@@ -33,6 +33,7 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <gnuastro/wcs.h>
 #include <gnuastro/units.h>
 #include <gnuastro/pointer.h>
+#include <gnuastro/statistics.h>
 
 #include <gnuastro-internal/checkset.h>
 
@@ -1261,6 +1262,21 @@ columns_define_alloc(struct mkcatalogparams *p)
                                    ciflag[ CCOL_RIV_SUM ] = 1;
           break;
 
+        case UI_KEY_STD:
+          name           = "STD";
+          unit           = MKCATALOG_NO_UNIT;
+          ocomment       = "Standard deviation of sky subtracted values.";
+          ccomment       = "Standard deviation of pixels subtracted by 
rivers.";
+          otype          = GAL_TYPE_FLOAT32;
+          ctype          = GAL_TYPE_FLOAT32;
+          disp_fmt       = GAL_TABLE_DISPLAY_FMT_GENERAL;
+          disp_width     = 10;
+          disp_precision = 5;
+          oiflag[ OCOL_NUM   ] = ciflag[ CCOL_NUM   ] = 1;
+          oiflag[ OCOL_SUM   ] = ciflag[ CCOL_SUM   ] = 1;
+          oiflag[ OCOL_SUMP2 ] = ciflag[ CCOL_SUMP2 ] = 1;
+          break;
+
         case UI_KEY_MEDIAN:
           name           = "MEDIAN";
           unit           = MKCATALOG_NO_UNIT;
@@ -2550,6 +2566,12 @@ columns_fill(struct mkcatalog_passparams *pp)
                                       : NAN );
           break;
 
+        case UI_KEY_STD:
+          ((float *)colarr)[oind] =
+            gal_statistics_std_from_sums(oi[ OCOL_SUM ], oi[ OCOL_SUMP2 ],
+                                         oi[ OCOL_NUM ]);
+          break;
+
         case UI_KEY_MEDIAN:
           ((float *)colarr)[oind] = ( oi[ OCOL_NUM ]>0.0f
                                       ? oi[ OCOL_MEDIAN ]
@@ -2906,6 +2928,12 @@ columns_fill(struct mkcatalog_passparams *pp)
                                         /ci[CCOL_NUM] );
             break;
 
+          case UI_KEY_STD:
+            ((float *)colarr)[cind] =
+              gal_statistics_std_from_sums(oi[ CCOL_SUM ], oi[ CCOL_SUMP2 ],
+                                           oi[ CCOL_NUM ]);
+            break;
+
           case UI_KEY_MEDIAN:
             ((float *)colarr)[cind] = ( ci[ CCOL_NUM ]>0.0f
                                         ? ci[ CCOL_MEDIAN ] : NAN );
diff --git a/bin/mkcatalog/main.h b/bin/mkcatalog/main.h
index 70041c75..410148ca 100644
--- a/bin/mkcatalog/main.h
+++ b/bin/mkcatalog/main.h
@@ -75,6 +75,7 @@ enum objectcols
     OCOL_NUM,            /* Area/Number of values used in this object.*/
     OCOL_NUMXY,          /* Number of values in the first two dims.   */
     OCOL_SUM,            /* Sum of (value-sky) in object.             */
+    OCOL_SUMP2,          /* Sum^2 of (value-sky) in object.           */
     OCOL_SUM_VAR,        /* Variance including values (not just sky). */
     OCOL_SUM_VAR_NUM,    /* Number of pixels used for OCOL_SUM_VAR.   */
     OCOL_MEDIAN,         /* Median of values in object.               */
@@ -142,6 +143,7 @@ enum clumpcols
     CCOL_NUM,            /* Number of values used in clump.           */
     CCOL_NUMXY,          /* Number of values only in first two dims.  */
     CCOL_SUM,            /* River subtracted brightness.              */
+    CCOL_SUMP2,          /* River subtracted brightness to power of 2.*/
     CCOL_SUM_VAR,        /* Variance including values (not just sky). */
     CCOL_SUM_VAR_NUM,    /* Number of pixels used for CCOL_SUM_VAR.   */
     CCOL_MEDIAN,         /* Median of values in clump.                */
diff --git a/bin/mkcatalog/parse.c b/bin/mkcatalog/parse.c
index dcf43ace..2f21184f 100644
--- a/bin/mkcatalog/parse.c
+++ b/bin/mkcatalog/parse.c
@@ -578,8 +578,9 @@ parse_objects(struct mkcatalog_passparams *pp)
 
                   /* General flux summations. */
                   if(xybin) xybinarr[ pind ]=2;
-                  if(oif[ OCOL_NUM ]) oi[ OCOL_NUM ]++;
-                  if(oif[ OCOL_SUM ]) oi[ OCOL_SUM ] += *V;
+                  if(oif[ OCOL_NUM ])   oi[ OCOL_NUM   ]++;
+                  if(oif[ OCOL_SUM ])   oi[ OCOL_SUM   ] += *V;
+                  if(oif[ OCOL_SUMP2 ]) oi[ OCOL_SUMP2 ] += *V * *V;
 
                   /* Get the necessary clump information. */
                   if(p->clumps && *C>0)
@@ -986,8 +987,9 @@ parse_clumps(struct mkcatalog_passparams *pp)
                       goodvalue=1;
 
                       /* Fill in the necessary information. */
-                      if(cif[ CCOL_NUM   ]) ci[ CCOL_NUM ]++;
-                      if(cif[ CCOL_SUM   ]) ci[ CCOL_SUM ] += *V;
+                      if(cif[ CCOL_NUM   ]) ci[ CCOL_NUM   ]++;
+                      if(cif[ CCOL_SUM   ]) ci[ CCOL_SUM   ] += *V;
+                      if(cif[ CCOL_SUMP2 ]) ci[ CCOL_SUMP2 ] += *V * *V;
                       if(cif[ CCOL_NUMXY ])
                         ((uint8_t *)(xybin[cind].array))[ pind ] = 2;
 
diff --git a/bin/mkcatalog/ui.c b/bin/mkcatalog/ui.c
index c3c4dadf..519b1871 100644
--- a/bin/mkcatalog/ui.c
+++ b/bin/mkcatalog/ui.c
@@ -1008,6 +1008,7 @@ ui_necessary_inputs(struct mkcatalogparams *p, int 
*values, int *sky,
         case OCOL_NUM:                *values        = 1;          break;
         case OCOL_NUMXY:              *values        = 1;          break;
         case OCOL_SUM:                *values        = 1;          break;
+        case OCOL_SUMP2:              *values        = 1;          break;
         case OCOL_SUM_VAR:            *values = *std = 1;          break;
         case OCOL_SUM_VAR_NUM:        *values = *std = 1;          break;
         case OCOL_MEDIAN:             *values        = 1;          break;
diff --git a/bin/mkcatalog/ui.h b/bin/mkcatalog/ui.h
index 8cba8f35..42098cd6 100644
--- a/bin/mkcatalog/ui.h
+++ b/bin/mkcatalog/ui.h
@@ -155,6 +155,7 @@ enum option_keys_enum
   UI_KEY_BRIGHTNESSERR,
   UI_KEY_CLUMPSBRIGHTNESS,
   UI_KEY_BRIGHTNESSNORIVER,
+  UI_KEY_STD,
   UI_KEY_MEAN,
   UI_KEY_MEDIAN,
   UI_KEY_MAXIMUM,
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 2a2ff825..2b25463c 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -22043,6 +22043,10 @@ If no usable pixels are present over the clump or 
object (for example they are a
 The mean sky subtracted value of pixels within the object or clump.
 For clumps, the average river flux is subtracted from the sky subtracted mean.
 
+@item --std
+The standard deviation of the pixels within the object or clump.
+For clumps, the river pixels are not subtracted because that is a constant 
(per pixel) value and should not affect the standard deviation.
+
 @item --median
 The median sky subtracted value of pixels within the object or clump.
 For clumps, the average river flux is subtracted from the sky subtracted 
median.
@@ -22203,9 +22207,9 @@ See the description under @option{--halfsumradius} for 
more on converting area t
 Because of its non-parametric nature, this column is most reliable on clumps 
and should only be used in objects with great caution.
 This is because objects can have more than one clump (peak with true signal) 
and multiple peaks are not treated separately in objects, so the result of this 
column will be biased.
 
-Also, because of its non-parametric nature, this FWHM it does not account for 
the PSF, and it will be strongly affected by noise if the object is faint.
-So when half the maximum value (which can be requested using the 
@option{--maximum} column) is too close to the local noise level (which can be 
requested using the @option{--std} column), the value returned in this column 
is meaningless (its just noise peaks which are randomly distributed over the 
area).
-You can therefore use the @option{--maximum} and @option{--std} columns to 
remove unreliable FWHMs.
+Also, because of its non-parametric nature, this FWHM it does not account for 
the PSF, and it will be strongly affected by noise if the object is 
faint/diffuse
+So when half the maximum value (which can be requested using the 
@option{--maximum} column) is too close to the local noise level (which can be 
requested using the @option{--skystd} column), the value returned in this 
column is meaningless (its just noise peaks which are randomly distributed over 
the area).
+You can therefore use the @option{--maximum} and @option{--skystd} columns to 
remove, or flag, unreliable FWHMs.
 For example if a labeled region's maximum is less than 2 times the sky 
standard deviation, the value will certainly be unreliable (half of that is 
@mymath{1\sigma}!).
 For a more reliable value, this fraction should be around 4 (so half the 
maximum is 2@mymath{\sigma}).
 
@@ -22403,7 +22407,7 @@ For more, see the description of @option{--spectrum} in 
@ref{MakeCatalog measure
 @cindex Limit, Surface brightness
 When possible, MakeCatalog will also measure the full input's noise level 
(also known as surface brightness limit, see @ref{Quantifying measurement 
limits}).
 Since these measurements are related to the noise and not any particular 
labeled object, they are stored as keywords in the output table.
-Furthermore, they are only possible when a standard deviation image has been 
loaded (done automatically for any column measurement that involves noise, for 
example @option{--sn} or @option{--std}).
+Furthermore, they are only possible when a standard deviation image has been 
loaded (done automatically for any column measurement that involves noise, for 
example @option{--sn}, @option{--magnitudeerr} or @option{--skystd}).
 But if you just want the surface brightness limit and no noise-related column, 
you can use @option{--forcereadstd}.
 All these keywords start with @code{SBL} (for ``surface brightness limit'') 
and are described below:
 
@@ -33223,6 +33227,13 @@ necessary, this function is much more efficient than 
calling
 @code{gal_statistics_mean} and @code{gal_statistics_std} separately.
 @end deftypefun
 
+@deftypefun double gal_statistics_std_from_sums (double @code{sum}, double 
@code{sump2}, size_t @code{num})
+Return the standard deviation from the values that can be obtained in a single 
pass through the distribution: @code{sum}: the sum of the elements, 
@code{sump2}: the sum of the power-of-2 of each element, and @code{num}: the 
number of elements.
+
+This is a low-level function that is only useful after the distribution of 
values has been parsed (and the three input arguments are calculated).
+It is the lower-level function that is used in functions like 
@code{gal_statistics_std}, or other components of Gnuastro that measure the 
standard deviation (for example MakeCatalog's @option{--std} column).
+@end deftypefun
+
 @cindex Median
 @deftypefun {gal_data_t *} gal_statistics_median (gal_data_t @code{*input}, 
int @code{inplace})
 Return a single-element dataset containing the median of the non-blank
diff --git a/lib/gnuastro/statistics.h b/lib/gnuastro/statistics.h
index 1046b032..2293d873 100644
--- a/lib/gnuastro/statistics.h
+++ b/lib/gnuastro/statistics.h
@@ -91,6 +91,9 @@ gal_statistics_std(gal_data_t *input);
 gal_data_t *
 gal_statistics_mean_std(gal_data_t *input);
 
+double
+gal_statistics_std_from_sums(double sum, double sump2, size_t num);
+
 gal_data_t *
 gal_statistics_median(gal_data_t *input, int inplace);
 
diff --git a/lib/statistics.c b/lib/statistics.c
index b055a157..977394eb 100644
--- a/lib/statistics.c
+++ b/lib/statistics.c
@@ -190,6 +190,37 @@ gal_statistics_mean(gal_data_t *input)
 
 
 
+/* Calculate the standard deviation from the already measured (after
+   parsing) sum and the sum of squares. */
+double
+gal_statistics_std_from_sums(double sum, double sump2, size_t num)
+{
+  double ss;
+  switch(num)
+    {
+    case 0: return NAN;  /* No elements: STD: NaN */
+    case 1: return 0.0f; /* Single element, STD: 0.0 */
+    default:
+      /* 'ss' should never be bigger than 'sump2' unless the values are so
+         similar that it happens due to the floating-point error. Since
+         they are so close that their difference has caused this impossible
+         condition, their standard deviation is 0. */
+      ss=sum*sum/num;
+      if(ss>sump2) return 0.0f;
+      else         return sqrt( (sump2-ss)/num );
+    }
+
+  /* Control should not reach this point. */
+  error(EXIT_FAILURE, 0, "%s: a bug! Please contact us at '%s' to find "
+        "and fix the problem. Control should not reach this part of "
+        "the function", __func__, PACKAGE_BUGREPORT);
+  return NAN;
+}
+
+
+
+
+
 /* Return the standard deviation of the input dataset as a single element
    dataset of type float64. */
 gal_data_t *
@@ -218,12 +249,8 @@ gal_statistics_std(gal_data_t *input)
       /* Find the 's' and 's2' by parsing the data. */
       GAL_TILE_PARSE_OPERATE(input, out, 0, 1, {++n; s += *i; s2 += *i * *i;});
 
-      /* Write the standard deviation. If the square of the average value
-         is bigger than the average of the squares of the values, we have a
-         floating-point error (due to all the points having an identical
-         value, within floating point erros). So we should just set the
-         standard deviation to zero. */
-      o[0] = (s*s/n > s2) ? 0 : sqrt( (s2-s*s/n)/n );
+      /* Write the standard deviation. */
+      o[0] = gal_statistics_std_from_sums(s, s2, n);
       break;
     }
 
@@ -275,7 +302,7 @@ gal_statistics_mean_std(gal_data_t *input)
          floating-point error (due to all the points having an identical
          value, within floating point erros). So we should just set the
          standard deviation to zero. */
-      o[1] = (s*s/n > s2) ? 0 : sqrt( (s2-s*s/n)/n );
+      o[1] = gal_statistics_std_from_sums(s, s2, n);
       break;
     }
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]