gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master e04f4ac 03/19: First implementation of k-d tre


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master e04f4ac 03/19: First implementation of k-d tree matching, not complete
Date: Sun, 14 Nov 2021 20:40:57 -0500 (EST)

branch: master
commit e04f4ac97b5daecedd011515c0c8e366539c5c29
Author: Mohammad Akhlaghi <mohammad@akhlaghi.org>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    First implementation of k-d tree matching, not complete
    
    The first library implementation of the k-d tree matching has been done:
    the new 'gal_match_kdtree' function will find the nearest point in the
    first catalog to each point in the second catalog in parallel. But it still
    needs to do some post-processing to make an output in the same format as
    'gal_match_coordinates'.
    
    Some points on the previous commit that have been fixed here. Please pay
    close attention to these, run with 'git log -p -1' to see my changes.
    
     - As we had defined in the task, 'match_catalog_kdtree_build' should only
       build the k-d tree, write it into a file and abort. So it doesn't need
       to return any dataset!
    
     - The case when the user doesn't specify an '--output' wasn't planned (a
       default name).
    
     - The comments I had originally put in 'match_catalog_kdtree' explained
       that the input coordinates are already read into memory (as
       'p->cols1'). So there was no need to re-read the input in
       'match_catalog_kdtree_build'!!!
    
     - Again, in the comments I had originally put in 'match_catalog_kdtree', I
       had explained that the value of 'p->kdtreemode' already contains the
       proper macro for the k-d tree mode (done in 'ui.c' at the very start of
       the program). There was no need to re-compare the strings in
       'match_catalog_kdtree'.
---
 bin/match/match.c | 104 +++++++++++++++++++++--------------------
 bin/match/ui.c    |  25 ++++++++--
 lib/match.c       | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 208 insertions(+), 57 deletions(-)

diff --git a/bin/match/match.c b/bin/match/match.c
index 88f797a..a8a2bbe 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -448,90 +448,85 @@ match_catalog_write_one_col(struct matchparams *p, 
gal_data_t *a,
 
 
 
-static gal_data_t *
-match_catalog_kdtree_build(struct matchparams *p, char *kdtreefile)
+static void
+match_catalog_kdtree_build(struct matchparams *p)
 {
   size_t root;
-  gal_data_t *input, *kdtree;
-  // gal_list_str_t *cols = p->ccol1->array;
+  gal_data_t *kdtree;
   gal_fits_list_key_t *keylist=NULL;
 
   /* Meta-data in the output fits file. */
   char *unit = "index";
   char *keyname = "KDTROOT";
-  char *inputfile = p->input1name;
   char *comment = "k-d tree root index (counting from 0).";
 
-  /* Read the input table. */
-  input = gal_table_read(inputfile, "1", NULL, NULL,
-                          GAL_TABLE_SEARCH_NAME, 0, -1, 0, NULL);
-
-  /* Construct a k-d tree. The index of root is stored in 'root'. */
-  kdtree = gal_kdtree_create(input, &root);
+  /* Construct a k-d tree from 'p->cols1': the index of root is stored in
+     'root'. */
+  kdtree = gal_kdtree_create(p->cols1, &root);
 
   /* Write the k-d tree to a file and write root index and input name
      as FITS keywords ('gal_table_write' frees 'keylist'). */
   gal_fits_key_list_title_add(&keylist, "k-d tree parameters", 0);
-  gal_fits_key_write_filename("KDTIN", inputfile, &keylist, 0);
+  gal_fits_key_write_filename("KDTIN", p->input1name, &keylist, 0);
   gal_fits_key_list_add_end(&keylist, GAL_TYPE_SIZE_T, keyname, 0,
                             &root, 0, comment, 0, unit, 0);
   gal_table_write(kdtree, &keylist, NULL, GAL_TABLE_FORMAT_BFITS,
-                  kdtreefile, "kdtree", 0);
+                  p->out1name, "kdtree", 0);
 
-  /* Clean up and return. */
-  gal_list_data_free(input);
-  return kdtree;
+  /* Let the user know that the k-d tree has been built. */
+  if(!p->cp.quiet)
+    fprintf(stdout, "Output (k-d tree): %s\n", p->out1name);
 }
 
 
 
 
 
+/* See if a k-d tree should be used (MATCH_KDTREE_INTERNAL) or classical
+   matching (MATCH_KDTREE_DISABLE). */
+static int
+match_catalog_kdtree_auto(struct matchparams *p)
+{
+  error(EXIT_FAILURE, 0, "%s: not yet implemented!", __func__);
+  return MATCH_KDTREE_INVALID;
+}
+
+
+
+
 
 /* Wrapper over the k-d tree library to return an output in the same format
    as 'gal_match_coordinates'. */
 static gal_data_t *
 match_catalog_kdtree(struct matchparams *p)
 {
-  /* The input coordinates are already available in 'p->cols1' (which is
-     already stored as double).
-
-     Also, the 'p->kdtreemode' contains the k-d tree operation mode (mode
-     codes are defined as an 'enum' in 'main.h'): you can use a
-     'switch'. If the mode isn't 'MATCH_KDTREE_BUILD', you also have the
-     second input (to match against) in 'p->cols2' (again, already stored
-     as 'double'). */
-  int mode = 0;
-  char *kdtreefile = p->cp.output;
-  gal_data_t *kdtree=NULL;
-
-  /* Check the mode of operation for the kd-tree. */
-  if(strcmp(p->kdtree, "build") == 0)
-    mode = MATCH_KDTREE_BUILD;
-  else if(strcmp(p->kdtree, "internal") == 0)
-    mode = MATCH_KDTREE_INTERNAL;
-  else if(strcmp(p->kdtree, "automatic") == 0)
-    mode = MATCH_KDTREE_AUTO;
-  else if(strcmp(p->kdtree, "none") == 0)
-    mode = MATCH_KDTREE_DISABLE;
-  else
-    mode = MATCH_KDTREE_FILE;
+  gal_data_t *out=NULL;
+
+  /* If we are in automatic mode, we should look at the data (number of
+     rows/columns) and system (number of threads) to decide if the mode
+     should be set to 'MATCH_KDTREE_INTERNAL' or 'MATCH_KDTREE_DISABLE'. */
+  if(p->kdtreemode==MATCH_KDTREE_AUTO)
+    p->kdtreemode=match_catalog_kdtree_auto(p);
 
   /* Operate according to the required mode. */
-  switch(mode)
+  switch(p->kdtreemode)
     {
-      case MATCH_KDTREE_BUILD:
-        kdtree = match_catalog_kdtree_build(p, kdtreefile);
-        break;
-      case MATCH_KDTREE_INTERNAL:
-        break;
-      // Similar for other cases
-
-      default:
-        break;
+    /* Build a k-d tree and don't continue. */
+    case MATCH_KDTREE_BUILD:
+      match_catalog_kdtree_build(p);
+      break;
+
+    /* Do the k-d tree matching. */
+    case MATCH_KDTREE_INTERNAL:
+      error(EXIT_FAILURE, 0, "%s: internal kd tree usage not "
+            "yet implemented", __func__);
+      break;
+
+    /* No 'default' necessary because the modes include disabling. */
     }
 
-  return kdtree;
+  /* Return the final match. */
+  return out;
 }
 
 
@@ -548,7 +543,14 @@ match_catalog(struct matchparams *p)
 
   /* If we want to use kd-tree for matching. */
   if(p->kdtreemode!=MATCH_KDTREE_DISABLE)
-    mcols=match_catalog_kdtree(p);
+    {
+      /* The main processing function. */
+      mcols=match_catalog_kdtree(p);
+
+      /* If the user just asked to build a k-d tree, no futher processing
+         is necessary. */
+      if(p->kdtreemode==MATCH_KDTREE_BUILD) return;
+    }
 
   /* Find the matching coordinates. We are doing the processing in
      place. Incase it was decided not to use a k-d tree at all
diff --git a/bin/match/ui.c b/bin/match/ui.c
index 6a5f241..2ed4e1a 100644
--- a/bin/match/ui.c
+++ b/bin/match/ui.c
@@ -231,6 +231,15 @@ ui_read_check_only_options(struct matchparams *p)
           "automatically if a k-d tree should be used or not), 'disable' "
           "(to not use a k-d tree at all), a FITS file name (the file to "
           "read a created k-d tree from)", p->kdtree);
+
+  /* Make sure that the k-d tree build mode is not called with
+     '--outcols'. */
+  if( p->kdtreemode==MATCH_KDTREE_BUILD && (p->outcols || p->coord) )
+    error(EXIT_FAILURE, 0, "the '--kdtree=build' option is incompatible "
+          "with the '--outcols' or '--coord' options (because in the k-d "
+          "tree building mode doesn't involve actual matching. It will "
+          "only build k-d tree and write it to a file so it can be used "
+          "in future matches)");
 }
 
 
@@ -653,7 +662,7 @@ ui_set_columns_sanity_check_read_aperture(struct 
matchparams *p)
               "dimensions is deduced from the number of values given to "
               "'--ccol1' (or '--coord') and '--ccol2'", ccol1n);
       }
-  else
+  else if ( p->kdtreemode != MATCH_KDTREE_BUILD )
     error(EXIT_FAILURE, 0, "no matching aperture specified. Please use "
           "the '--aperture' option to define the acceptable aperture for "
           "matching the coordinates (in the same units as each "
@@ -890,6 +899,7 @@ static void
 ui_preparations_out_name(struct matchparams *p)
 {
   /* To temporarily keep the original value. */
+  char *suffix;
   uint8_t keepinputdir_orig;
   char *refname = p->input1name ? p->input1name : p->input2name;
 
@@ -918,14 +928,19 @@ ui_preparations_out_name(struct matchparams *p)
     }
   else
     {
-      if(p->outcols || p->coord)
+      if(p->outcols || p->coord || p->kdtreemode==MATCH_KDTREE_BUILD)
         {
           if(p->cp.output)
             gal_checkset_allocate_copy(p->cp.output, &p->out1name);
           else
-            p->out1name = gal_checkset_automatic_output(&p->cp,
-                 refname, ( p->cp.tableformat==GAL_TABLE_FORMAT_TXT
-                                  ? "_matched.txt" : "_matched.fits") );
+            {
+              suffix = ( p->kdtreemode==MATCH_KDTREE_BUILD
+                         ? "_kdtree.fits"
+                         : ( p->cp.tableformat==GAL_TABLE_FORMAT_TXT
+                             ? "_matched.txt" : "_matched.fits") );
+              p->out1name = gal_checkset_automatic_output(&p->cp,
+                                                          refname, suffix);
+            }
           gal_checkset_writable_remove(p->out1name, 0, p->cp.dontdelete);
         }
       else
diff --git a/lib/match.c b/lib/match.c
index e3a4f34..4497788 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -33,7 +33,9 @@ along with Gnuastro. If not, see 
<http://www.gnu.org/licenses/>.
 #include <gnuastro/box.h>
 #include <gnuastro/list.h>
 #include <gnuastro/blank.h>
+#include <gnuastro/kdtree.h>
 #include <gnuastro/pointer.h>
+#include <gnuastro/threads.h>
 #include <gnuastro/permutation.h>
 
 
@@ -894,7 +896,7 @@ gal_match_coordinates_output(gal_data_t *A, gal_data_t *B, 
size_t *A_perm,
    changes the inputs' order), the output permutation will correspond to
    original inputs.
 
-   The output is a list of 'gal_data_t' with the following columns:
+   The output is a list of 'gal_data_t's with the following columns:
 
        Node 1: First catalog index (counting from zero).
        Node 2: Second catalog index (counting from zero).
@@ -957,3 +959,135 @@ gal_match_coordinates(gal_data_t *coord1, gal_data_t 
*coord2,
   *nummatched = out ?  out->next->next->size : 0;
   return out;
 }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/********************************************************************/
+/*************             k-d tree matching            *************/
+/********************************************************************/
+struct match_kdtree_params
+{
+  size_t               ndim;  /* The number of dimensions.            */
+  gal_data_t        *coord1;  /* 1st coordinate list of 'gal_data_t's */
+  gal_data_t        *coord2;  /* 2nd coordinate list of 'gal_data_t's */
+  size_t           *c2match;  /* Matched IDs for the second coords.   */
+  double          *aperture;  /* Acceptable aperture for match.       */
+  size_t        kdtree_root;  /* Index (counting from 0) of root.     */
+  gal_data_t *coord1_kdtree;  /* k-d tree of first coordinate.        */
+};
+
+
+
+
+/* Main k-d tree matching function. */
+static void *
+match_kdtree_worker(void *in_prm)
+{
+  /* Low-level definitions to be done first. */
+  struct gal_threads_params *tprm=(struct gal_threads_params *)in_prm;
+  struct match_kdtree_params *p=(struct match_kdtree_params *)tprm->params;
+
+  /* High level definitions. */
+  gal_data_t *ccol;
+  double *point, least_dist;
+  size_t i, j, index, mindex;
+
+  /* Allocate space for all the matching points (based on the number of
+     dimensions). */
+  gal_pointer_allocate(GAL_TYPE_FLOAT64, p->ndim, 0, __func__, "point");
+
+  /* Go over all the actions (pixels in this case) that were assigned to
+     this thread. */
+  for(i=0; tprm->indexs[i] != GAL_BLANK_SIZE_T; ++i)
+    {
+      /* Fill the 'point' for this thread. */
+      j=0;
+      index = tprm->indexs[i];
+      for(ccol=p->coord2; ccol!=NULL; ccol=ccol->next)
+        point[ j++ ] = ((double *)(ccol->array))[index];
+
+      /* Find the index of the nearest neighbor to this item. */
+      mindex=gal_kdtree_nearest_neighbour(p->coord1, p->coord1_kdtree,
+                                          p->kdtree_root, point, &least_dist);
+
+      /* Make sure the matched point is within the given aperture.
+         ###############################
+         The '1' check should be fixed to see if the matched point is
+         within the requested aperture (note that the aperture can be
+         elliptical!).
+         ############################### */
+      mindex= 1 ? mindex : GAL_BLANK_SIZE_T;
+
+      /* Put the matched index into the final array for this index. */
+      p->c2match[index]=mindex;
+    }
+
+  /* Wait for all the other threads to finish, then return. */
+  if(tprm->b) pthread_barrier_wait(tprm->b);
+  return NULL;
+}
+
+
+
+
+
+gal_data_t *
+gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
+                 gal_data_t *coord1_kdtree, size_t kdtree_root,
+                 double *aperture, size_t numthreads, size_t minmapsize,
+                 int quietmmap)
+{
+  gal_data_t *c2match;
+  struct match_kdtree_params p;
+
+  /* Basic sanity checks. */
+  p.ndim=gal_list_data_number(coord1);
+  if( (   p.ndim != gal_list_data_number(coord2))
+      || (p.ndim != gal_list_data_number(coord1_kdtree)) )
+    error(EXIT_FAILURE, 0, "%s: the 'coord1', 'coord2' and 'coord1_kdtree' "
+          "arguments should have the same number of nodes/columns (elements "
+          "in a simply linked list). But they each respectively have %zu, "
+          "%zu and %zu nodes/columns", __func__, p.ndim,
+          gal_list_data_number(coord2), gal_list_data_number(coord1_kdtree));
+  if( coord1->type!=GAL_TYPE_FLOAT64 )
+    error(EXIT_FAILURE, 0, "%s: the type of 'coord1' should be 'double', "
+          "but it is '%s'", __func__, gal_type_name(coord1->type, 1));
+  if( coord2->type!=GAL_TYPE_FLOAT64 )
+    error(EXIT_FAILURE, 0, "%s: the type of 'coord2' should be 'double', "
+          "but it is '%s'", __func__, gal_type_name(coord2->type, 1));
+  if( coord1_kdtree->type!=GAL_TYPE_UINT32 )
+    error(EXIT_FAILURE, 0, "%s: the type of 'coord1_kdtree' should be "
+          "'uint32', but it is '%s'", __func__,
+          gal_type_name(coord1_kdtree->type, 1));
+
+  /* Allocate the space to keep results. */
+  c2match=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &coord2->size, NULL, 0,
+                         minmapsize, quietmmap, NULL, NULL, NULL);
+
+  /* Set the threading parameters and spin-off the threads. */
+  p.coord1=coord1;
+  p.coord2=coord2;
+  p.aperture=aperture;
+  p.c2match=c2match->array;
+  p.kdtree_root=kdtree_root;
+  p.coord1_kdtree=coord1_kdtree;
+  gal_threads_spin_off(match_kdtree_worker, &p, coord2->size, numthreads,
+                       minmapsize, quietmmap);
+}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]