gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 89bbee4 2/2: MakeCatalog: automatically find t


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 89bbee4 2/2: MakeCatalog: automatically find total number of clumps if necessary
Date: Sun, 9 Jun 2019 01:47:38 -0400 (EDT)

branch: master
commit 89bbee4c38b60e5aa9f718a099d0b4a2be0e8f7b
Author: Mohammad Akhlaghi <address@hidden>
Commit: Mohammad Akhlaghi <address@hidden>

    MakeCatalog: automatically find total number of clumps if necessary
    
    Until now, MakeCatalog would use the `NUMLABS' keyword in the clumps
    catalog to find the total number of clumps before parsing the dataset. This
    was very inconvenient when we used clump labels that were not (directly)
    created by Segment.
    
    With this commit, when `NUMLABS' is not present, MakeCatalog will parse the
    clumps and objects datasets and count the total number of clumps in all the
    objects while also re-labeling the clumps dataset (and saving the relabeled
    image).
---
 NEWS                 |  6 ++++
 bin/mkcatalog/main.h |  1 +
 bin/mkcatalog/ui.c   | 93 +++++++++++++++++++++++++++++++++++++++++++++-------
 doc/gnuastro.texi    | 20 ++++++++++-
 4 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/NEWS b/NEWS
index f7f0b8f..5fc250c 100644
--- a/NEWS
+++ b/NEWS
@@ -78,6 +78,12 @@ See the end of the file for license conditions.
      `sigclip-mean' and `sigclip-std') return 32-bit floating point
      datasets
 
+  MakeCatalog:
+   - When a clumps catalog is requested, MakeCatalog will automatically
+     deduce the total number of clumps (at a small cost in
+     performance). Until now, it was mandatory for the clumps label dataset
+     to contain the total number of clumps in the `NUMLABS' keyword.
+
   Library:
    - gal_statistics_outlier_flat_cfp: Improved implementation with new API.
 
diff --git a/bin/mkcatalog/main.h b/bin/mkcatalog/main.h
index 78e3451..0b7459b 100644
--- a/bin/mkcatalog/main.h
+++ b/bin/mkcatalog/main.h
@@ -182,6 +182,7 @@ struct mkcatalogparams
   int32_t       checkuplim[2];  /* Object & clump ID to check dist.     */
 
   /* Internal. */
+  char           *relabclumps;  /* Name of new file for clump labels.   */
   time_t              rawtime;  /* Starting time of the program.        */
   gal_data_t          *values;  /* Input.                               */
   gal_data_t         *objects;  /* Object labels.                       */
diff --git a/bin/mkcatalog/ui.c b/bin/mkcatalog/ui.c
index cc2daf4..150b798 100644
--- a/bin/mkcatalog/ui.c
+++ b/bin/mkcatalog/ui.c
@@ -532,12 +532,88 @@ ui_wcs_info(struct mkcatalogparams *p)
 
 
 
+static size_t
+ui_num_clumps(struct mkcatalogparams *p)
+{
+  char *basename;
+  size_t i, counter, numclumps=0;
+  gal_list_i32_t *tmp, **labsinobj;
+  int32_t *o=p->objects->array, *of=o+p->objects->size, *c=p->clumps->array;
+
+  /* Allocate array of lists to keep the unique labels within each object. */
+  errno=0;
+  labsinobj=calloc(p->numobjects+1, sizeof *labsinobj);
+  if(labsinobj==NULL)
+    error(EXIT_FAILURE, 0, "%s: couldn't allocate %zu bytes for "
+          "`labsinobj'", __func__, p->numobjects * sizeof *labsinobj);
+
+  /* Go over each pixel and find the unique clump labels within each
+     object. */
+  do
+    {
+      /* Do the steps if we are on a clump. */
+      if(*o>0 && *c>0)
+        {
+          /* See if the label has already been found. */
+          for(tmp=labsinobj[*o];tmp!=NULL;tmp=tmp->next) if(tmp->v==*c) break;
+
+          /* When it wasn't found, `tmp==NULL'. */
+          if(tmp==NULL)
+            {
+              ++numclumps;
+              gal_list_i32_add(&labsinobj[*o], *c);
+            }
+        }
+
+      /* Increment the clumps pointer.*/
+      ++c;
+    }
+  while(++o<of);
+
+  /* Re-write the clump values so their numbering is contiguous, since this
+     is assumed during the later steps. */
+  o=p->objects->array;
+  c=p->clumps->array;
+  do
+    {
+      /* Do the steps if we are on a clump. */
+      if(*o>0 && *c>0)
+        {
+          counter=0;
+          for(tmp=labsinobj[*o];tmp!=NULL;tmp=tmp->next)
+            { counter++; if(tmp->v==*c) {*c=counter; break;} }
+        }
+
+      /* Increment the clumps pointer.*/
+      ++c;
+    }
+  while(++o<of);
+
+  /* Write the created file into a file for the user to inspect. */
+  basename = p->cp.output ? p->cp.output : p->objectsfile;
+  p->relabclumps=gal_checkset_automatic_output(&p->cp, basename,
+                                               "-clumps-relab.fits");
+  gal_fits_img_write(p->clumps, p->relabclumps, NULL, PROGRAM_STRING);
+
+
+  /* Clean up. */
+  for(i=0;i<p->numobjects;++i)
+    if(labsinobj[i]) gal_list_i32_free(labsinobj[i]);
+  free(labsinobj);
+
+  /* Return the number of clumps. */
+  return numclumps;
+}
+
+
+
+
+
 /* The only mandatory input is the objects image, so first read that and
    make sure its type is correct. */
 static void
 ui_read_labels(struct mkcatalogparams *p)
 {
-  char *msg;
   gal_data_t *tmp, *keys=gal_data_array_calloc(2);
 
   /* Read it into memory. */
@@ -624,17 +700,7 @@ ui_read_labels(struct mkcatalogparams *p)
       keys[0].array=&p->clumpsn;            keys[1].array=&p->numclumps;
       gal_fits_key_read(p->usedclumpsfile, p->clumpshdu, keys, 0, 0);
       if(keys[0].status) p->clumpsn=NAN;
-      if(keys[1].status)
-        {
-          if( asprintf(&msg, "%s (hdu: %s): couldn't find/read `NUMLABS' in "
-                       "the header keywords, see CFITSIO error above. The "
-                       "clumps image must have the total number of clumps "
-                       "(irrespective of how many objects there are in the "
-                       "image) in this header keyword", p->usedclumpsfile,
-                       p->clumpshdu)<0 )
-            error(EXIT_FAILURE, 0, "%s: asprintf allocation", __func__);
-          gal_fits_io_error(keys[1].status, msg);
-        }
+      if(keys[1].status) p->numclumps=ui_num_clumps(p);
 
       /* If there were no clumps, then free the clumps array and set it to
          NULL, so for the rest of the processing, MakeCatalog things that
@@ -1520,6 +1586,9 @@ ui_read_check_inputs_setup(int argc, char *argv[], struct 
mkcatalogparams *p)
       if(p->clumps)
         printf("  - Clumps:  %s (hdu: %s)\n", p->usedclumpsfile,
                p->clumpshdu);
+      if(p->relabclumps)
+        printf("  - RELABELED CLUMPS (no NUMLABS in original): %s\n",
+               p->relabclumps);
       if(p->values)
         printf("  - Values:  %s (hdu: %s)\n", p->usedvaluesfile,
                p->valueshdu);
diff --git a/doc/gnuastro.texi b/doc/gnuastro.texi
index 6463a55..31db2e4 100644
--- a/doc/gnuastro.texi
+++ b/doc/gnuastro.texi
@@ -19694,7 +19694,7 @@ There are separate options to explicitly request a file 
name and
 HDU/extension for each of the required input datasets as fully described
 below (with the @option{--*file} format). When each dataset is in a
 separate file, these options are necessary. However, one great advantage of
-the FITS file format, that is heavily used in astronomy, is that it allows
+the FITS file format (that is heavily used in astronomy) is that it allows
 the storage of multiple datasets in one file. So in most situations (for
 example if you are using the outputs of @ref{NoiseChisel} or
 @ref{Segment}), all the necessary input datasets can be in one file.
@@ -19707,6 +19707,24 @@ necessary and the only @option{--*file} option called 
is
 default/given HDUs) in the file given to @option{--valuesfile} (before
 looking into the the main argument file).
 
+When the clumps image (necessary with the @option{--clumpscat} option) is
+used, MakeCatalog looks into the (possibly existing) @code{NUMLABS} keyword
+for the total number of clumps in the image (irrespective of how many
+objects there are). If its not present, it will count them and possibly
+re-label the clumps so the clump labels always start with 1 and finish with
+the total number of clumps in each object. The re-labeled clumps image will
+be stored with the @file{-clumps-relab.fits} suffix. This can slighly
+slow-down the run.
+
+Note that @code{NUMLABS} is automatically written by Segment in its
+outputs, so if you are feeding Segment's clump labels, you can benefit from
+the improved speed. Otherwise, if you are creating the clumps label dataset
+manually, it may be good to include the @code{NUMLABS} keyword in its
+header and also be sure that there is no gap in the clump labels. For
+example if an object has three clumps, they are labeled as 1, 2, 3. If they
+are labeled as 1, 3, 4, or any other combination of three positive integers
+that aren't an increment of the previous, you might get un-known behavior.
+
 It may happen that your labeled objects image was created with a program
 that only outputs floating point files. However, you know it only has
 integer valued pixels that are stored in a floating point container. In



reply via email to

[Prev in Thread] Current Thread [Next in Thread]