[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 969da3f: Statistics binning range can be set m

From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 969da3f: Statistics binning range can be set manually
Date: Tue, 20 Feb 2018 20:45:15 -0500 (EST)

branch: master
commit 969da3fb80a112ee89b8e4229b32155525b5485f
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    Statistics binning range can be set manually
    Until now, there was no way to manually define the range of the binning
    functionality in the Statistics program. For example, if you wanted a
    histogram ranged 0 to 100, but your data had a maximum value of 79, then
    the last bin would always finish at 79.
    To address this issue, a `--manualbinrange' option has been added. When
    called, the bin organizing function will use the values given to
    `--greaterequal' or `--lessthan' to set the range manually.
 NEWS                        |  4 ++++
 bin/statistics/args.h       | 13 ++++++++++++
 bin/statistics/main.h       |  1 +
 bin/statistics/statistics.c | 48 ++++++++++++++++++++++++++++++++++++---------
 bin/statistics/ui.c         | 11 ++++++-----
 bin/statistics/ui.h         |  1 +
 doc/gnuastro.texi           | 46 +++++++++++++++++++++++++++++++------------
 7 files changed, 98 insertions(+), 26 deletions(-)

diff --git a/NEWS b/NEWS
index 7f3782f..19c3ca3 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,10 @@ GNU Astronomy Utilities NEWS                          -*- 
outline -*-
 ** New features
+  Statistics: the new `--manualbinrange' allows the bins in histograms or
+  cumulative frequency plots to be set outside the minimum or maximum
+  values of the dataset.
 ** Removed features
 ** Changed features
diff --git a/bin/statistics/args.h b/bin/statistics/args.h
index 227540f..6b0fbb9 100644
--- a/bin/statistics/args.h
+++ b/bin/statistics/args.h
@@ -595,6 +595,19 @@ struct argp_option program_options[] =
+      "manualbinrange",
+      0,
+      0,
+      "Set min/max of bins manually, not from data.",
+      &p->manualbinrange,
+    },
+    {
diff --git a/bin/statistics/main.h b/bin/statistics/main.h
index 4557a6b..01c437b 100644
--- a/bin/statistics/main.h
+++ b/bin/statistics/main.h
@@ -79,6 +79,7 @@ struct statisticsparams
   size_t      numasciibins;  /* Number of bins in ASCII plots.           */
   size_t       asciiheight;  /* Height of ASCII histogram or CFP plots.  */
   uint8_t        normalize;  /* set the sum of all bins to 1.            */
+  uint8_t   manualbinrange;  /* Set bin min/max manually, not from data. */
   float        onebinstart;  /* Shift bins to start at this value.       */
   uint8_t        maxbinone;  /* Set the maximum bin to 1.                */
   float         mirrordist;  /* Maximum distance after mirror for mode.  */
diff --git a/bin/statistics/statistics.c b/bin/statistics/statistics.c
index 42a80dc..62bf0ec 100644
--- a/bin/statistics/statistics.c
+++ b/bin/statistics/statistics.c
@@ -303,8 +303,8 @@ statistics_on_tile(struct statisticsparams *p)
           type=GAL_TYPE_FLOAT64; break;
-          error(EXIT_FAILURE, 0, "%s: a bug! %d is not a recognized operation "
-                "code", __func__, operation->v);
+          error(EXIT_FAILURE, 0, "%s: a bug! %d is not a recognized "
+                "operation code", __func__, operation->v);
       /* Allocate the space necessary to keep the value for each tile. */
@@ -470,13 +470,37 @@ print_ascii_plot(struct statisticsparams *p, gal_data_t 
+/* Data structure that must be fed into `gal_statistics_regular_bins'.*/
+static gal_data_t *
+set_bin_range_params(struct statisticsparams *p)
+  size_t rsize=2;
+  gal_data_t *range=NULL;
+  if(p->manualbinrange)
+    {
+      /* Allocate the range data structure. */
+      range=gal_data_alloc(NULL, GAL_TYPE_FLOAT32, 1, &rsize, NULL, 0, -1,
+                           NULL, NULL, NULL);
+      ((float *)(range->array))[0]=p->greaterequal;
+      ((float *)(range->array))[1]=p->lessthan;
+    }
+  return range;
 static void
 ascii_plots(struct statisticsparams *p)
-  gal_data_t *bins, *hist, *cfp=NULL;
+  gal_data_t *bins, *hist, *cfp=NULL, *range=NULL;
   /* Make the bins and the respective plot. */
-  bins=gal_statistics_regular_bins(p->input, NULL, p->numasciibins, NAN);
+  range=set_bin_range_params(p);
+  bins=gal_statistics_regular_bins(p->input, range, p->numasciibins, NAN);
   hist=gal_statistics_histogram(p->input, bins, 0, 0);
@@ -586,13 +610,13 @@ static void
 save_hist_and_or_cfp(struct statisticsparams *p)
   char *suf, *contents;
-  gal_data_t *bins, *hist, *cfp=NULL;
+  gal_data_t *bins, *hist, *cfp=NULL, *range=NULL;
   /* Set the bins and make the histogram, this is necessary for both the
      histogram and CFP (recall that the CFP is built from the
      histogram). */
-  bins=gal_statistics_regular_bins(p->input, NULL, p->numbins,
+  range=set_bin_range_params(p);
+  bins=gal_statistics_regular_bins(p->input, range, p->numbins,
   hist=gal_statistics_histogram(p->input, bins, p->normalize, p->maxbinone);
@@ -636,6 +660,10 @@ save_hist_and_or_cfp(struct statisticsparams *p)
   /* Set the output file name. */
   write_output_table(p, bins, suf, contents);
+  /* Clean up. */
+  gal_data_free(range);
@@ -770,7 +798,7 @@ print_basics(struct statisticsparams *p)
   int namewidth=40;
   float mirrdist=1.5;
   double mean, std, *d;
-  gal_data_t *tmp, *bins, *hist;
+  gal_data_t *tmp, *bins, *hist, *range=NULL;
   /* Define the input dataset. */
@@ -830,14 +858,16 @@ print_basics(struct statisticsparams *p)
      range of the histogram. In that case, we want to print the histogram
      information. */
+  range=set_bin_range_params(p);
   p->asciiheight = p->asciiheight ? p->asciiheight : 10;
   p->numasciibins = p->numasciibins ? p->numasciibins : 70;
-  bins=gal_statistics_regular_bins(p->input, NULL, p->numasciibins, NAN);
+  bins=gal_statistics_regular_bins(p->input, range, p->numasciibins, NAN);
   hist=gal_statistics_histogram(p->input, bins, 0, 0);
   if(p->refcol==NULL) printf("\nHistogram:\n");
   print_ascii_plot(p, hist, bins, 1, p->refcol ? 1 : 0);
+  gal_data_free(range);
diff --git a/bin/statistics/ui.c b/bin/statistics/ui.c
index 4c4b652..2e1d57f 100644
--- a/bin/statistics/ui.c
+++ b/bin/statistics/ui.c
@@ -765,11 +765,12 @@ ui_read_columns(struct statisticsparams *p)
       /* Print an error if there are too many columns: */
-        gal_tableintern_error_col_selection(p->inputname, p->cp.hdu, "too many 
-                                            "columns were selected by the "
-                                            "given values to the `--column' "
-                                            "and/or `--refcol' options. Only "
-                                            "one is acceptable for each.");
+        gal_tableintern_error_col_selection(p->inputname, p->cp.hdu, "too "
+                                            "many columns were selected by "
+                                            "the given values to the "
+                                            "`--column' and/or `--refcol' "
+                                            "options. Only one is "
+                                            "acceptable for each.");
   /* Clean up. */
diff --git a/bin/statistics/ui.h b/bin/statistics/ui.h
index 3e96f5b..e8d4bef 100644
--- a/bin/statistics/ui.h
+++ b/bin/statistics/ui.h
@@ -87,6 +87,7 @@ enum option_keys_enum
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 9142d4d..f828b41 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -13215,9 +13215,10 @@ table. The first column is the value at the center of 
the bin and the
 second is the number of points in that bin. If the @option{--cumulative}
 option is also called with this option in a run, then the table will have
 three columns (the third is the cumulative frequency plot). Through the
address@hidden and @option{--lowerbin} you can modify the first column
-values and with @option{--normalize} and @option{--maxbinone} you can
-modify the second columns. See below for the description of each.
address@hidden, @option{--onebinstart}, or @option{--manualbinrange},
+you can modify the first column values and with @option{--normalize} and
address@hidden you can modify the second columns. See below for the
+description of each.
 By default (when no @option{--output} is specified) a plain text table will
 be created, see @ref{Gnuastro text table format}. If a FITS name is
@@ -13305,20 +13306,41 @@ can be very useful.
 @item --onebinstart=FLT
 Make sure that one bin starts with the value to this option. In practice,
 this will shift the bins used to find the histogram and cumulative
-frequency plot such that one bin's lower interval becomes this value. For
-example when the histogram range includes negative and positive values and
-zero has a special significance in your analysis, then zero will be
-somewhere in one bin and will mix counts of positive and negative. By
-setting @option{--onebinstart=0}, you can make sure that the viewers of the
-histogram will not be confused without doing the math of setting a range
-and number of bins.
+frequency plot such that one bin's lower interval becomes this value.
+For example when a histogram range includes negative and positive values
+and zero has a special significance in your analysis, then zero might fall
+somewhere in one bin. As a result that bin will have counts of positive and
+negative. By setting @option{--onebinstart=0}, you can make sure that one
+bin will only count negative values in the vicinity of zero and the next
+bin will only count positive ones in that vicinity.
 @cindex NaN
 Note that by default, the first row of the histogram and cumulative
 frequency plot show the central values of each bin. So in the example above
 you will not see the 0.000 in the first column, you will see two symmetric
-values. If the value is not within the usable input range, this option will
-be ignored.
+If the value is not within the usable input range, this option will be
+ignored. When it is, this option is the last operation before the bins are
+finalized, therefore it has a higher priority than options like
address@hidden --manualbinrange
+Use the values given to the @option{--greaterequal} and @option{--lessthan}
+to define the range of all bin-based calculations like the histogram. This
+option itself doesn't take any value, but just tells the program to use the
+values of those two options instead of the minimum and maximum values of a
+plot. If any of the two options are not given, then the minimum or maximum
+will be used respectively. Therefore, if none of them are called calling
+this option is redundant.
+The @option{--onebinstart} option has a higher priority than this option.
+In other words, @option{--onebinstart} takes effect after the range has
+been finalized and the initial bins have been defined, therefore it has the
+power to (possibly) shift the bins. If you want to manually set the range
+of the bins @emph{and} have one bin on a special value, it is thus better
+to avoid @option{--onebinstart}.
 @end table

reply via email to

[Prev in Thread] Current Thread [Next in Thread]