gnuastro-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gnuastro-commits] master 20ad77d 07/19: Final output for kdtree matchin


From: Mohammad Akhlaghi
Subject: [gnuastro-commits] master 20ad77d 07/19: Final output for kdtree matching
Date: Sun, 14 Nov 2021 20:40:58 -0500 (EST)

branch: master
commit 20ad77d759cd86bef77cdf7a05aaeb52daa1ef8a
Author: Sachin Kumar Singh <sachinkumarsingh092@gmail.com>
Commit: Mohammad Akhlaghi <mohammad@akhlaghi.org>

    Final output for kdtree matching
    
    The final output is now being produces for kdtree
    matching.`gal_kdtree_nearest_neighbour` alredy finds the smallest
    distance between the 2 points so there was no further need to calculate
    it. I kept track of all the distance by using a new variable in
    `match_kdtree_params`.
    
    To remove the multiple matchings, I used the arry `multiple _match` n
    `match_kdtree_params` and used it further to check if the same point has
    been checked before in `gal_match_kdtree_output`.
    
    The `gal_match_kdtree_output` works similar to that of normal coordinate
    matching, but instead of using a permutaion array, simply uses the
    matched points to assign values/indexes to the output.
    
    The error in valgrind is still present but produces the correct fits output.
    When using valgrind with option '--track-origin=yes', they track back to
    malloc in pointer.c. The output is shown using valgrind but due to the
    conditional jumps, are not produced normally.
---
 bin/match/match.c                     |   7 +-
 bin/match/ui.c                        |  50 +++++-----
 during-dev-test-data/match-query.fits | Bin 8640 -> 0 bytes
 lib/gnuastro/match.h                  |   2 +-
 lib/match.c                           | 176 ++++++++++++++++++++++++++++++----
 5 files changed, 187 insertions(+), 48 deletions(-)

diff --git a/bin/match/match.c b/bin/match/match.c
index 91a51f1..f8f455e 100644
--- a/bin/match/match.c
+++ b/bin/match/match.c
@@ -499,7 +499,7 @@ match_catalog_kdtree_auto(struct matchparams *p)
 /* Wrapper over the k-d tree library to return an output in the same format
    as 'gal_match_coordinates'. */
 static gal_data_t *
-match_catalog_kdtree(struct matchparams *p)
+match_catalog_kdtree(struct matchparams *p, size_t *nummatched)
 {
   size_t root;
   gal_data_t *out=NULL;
@@ -524,7 +524,8 @@ match_catalog_kdtree(struct matchparams *p)
       kdtree = gal_kdtree_create(p->cols1, &root);
       out = gal_match_kdtree(p->cols1, p->cols2, kdtree, root,
                              p->aperture->array, p->cp.numthreads,
-                             p->cp.minmapsize, p->cp.quietmmap);
+                             p->cp.minmapsize, nummatched, 1,
+                             p->cp.quietmmap);
       gal_list_data_free(kdtree);
       break;
 
@@ -551,7 +552,7 @@ match_catalog(struct matchparams *p)
   if(p->kdtreemode!=MATCH_KDTREE_DISABLE)
     {
       /* The main processing function. */
-      mcols=match_catalog_kdtree(p);
+      mcols=match_catalog_kdtree(p, &nummatched);
 
       /* If the user just asked to build a k-d tree, no futher processing
          is necessary. */
diff --git a/bin/match/ui.c b/bin/match/ui.c
index 2ed4e1a..b8fb280 100644
--- a/bin/match/ui.c
+++ b/bin/match/ui.c
@@ -217,29 +217,33 @@ parse_opt(int key, char *arg, struct argp_state *state)
 static void
 ui_read_check_only_options(struct matchparams *p)
 {
-  /* Set the k-d tree mode. */
-  if(      !strcmp(p->kdtree,"build")    ) p->kdtreemode=MATCH_KDTREE_BUILD;
-  else if( !strcmp(p->kdtree,"internal") ) p->kdtreemode=MATCH_KDTREE_INTERNAL;
-  else if( !strcmp(p->kdtree,"auto")     ) p->kdtreemode=MATCH_KDTREE_AUTO;
-  else if( !strcmp(p->kdtree,"disable")  ) p->kdtreemode=MATCH_KDTREE_DISABLE;
-  else if( gal_fits_name_is_fits(p->kdtree) ) p->kdtreemode=MATCH_KDTREE_FILE;
-  else
-    error(EXIT_FAILURE, 0, "'%s' is not a valid value for '--kdtree'. The "
-          "following values are accepted: 'build' (to build the k-d tree in "
-          "the file given to '--output'), 'internal' (to force internal "
-          "usage of a k-d tree for the matching), 'auto' (to decide "
-          "automatically if a k-d tree should be used or not), 'disable' "
-          "(to not use a k-d tree at all), a FITS file name (the file to "
-          "read a created k-d tree from)", p->kdtree);
-
-  /* Make sure that the k-d tree build mode is not called with
-     '--outcols'. */
-  if( p->kdtreemode==MATCH_KDTREE_BUILD && (p->outcols || p->coord) )
-    error(EXIT_FAILURE, 0, "the '--kdtree=build' option is incompatible "
-          "with the '--outcols' or '--coord' options (because in the k-d "
-          "tree building mode doesn't involve actual matching. It will "
-          "only build k-d tree and write it to a file so it can be used "
-          "in future matches)");
+  /* k-d tree specific sanity checks. */
+  if(p->kdtree)
+  {
+    /* Set the k-d tree mode. */
+    if(      !strcmp(p->kdtree,"build")    ) p->kdtreemode=MATCH_KDTREE_BUILD;
+    else if( !strcmp(p->kdtree,"internal") ) 
p->kdtreemode=MATCH_KDTREE_INTERNAL;
+    else if( !strcmp(p->kdtree,"auto")     ) p->kdtreemode=MATCH_KDTREE_AUTO;
+    else if( !strcmp(p->kdtree,"disable")  ) 
p->kdtreemode=MATCH_KDTREE_DISABLE;
+    else if( gal_fits_name_is_fits(p->kdtree) ) 
p->kdtreemode=MATCH_KDTREE_FILE;
+    else
+      error(EXIT_FAILURE, 0, "'%s' is not valid for '--kdtree'. The "
+            "following values are accepted: 'build' (to build the k-d tree in "
+            "the file given to '--output'), 'internal' (to force internal "
+            "usage of a k-d tree for the matching), 'auto' (to decide "
+            "automatically if a k-d tree should be used or not), 'disable' "
+            "(to not use a k-d tree at all), a FITS file name (the file to "
+            "read a created k-d tree from)", p->kdtree);
+
+    /* Make sure that the k-d tree build mode is not called with
+       '--outcols'. */
+    if( p->kdtreemode==MATCH_KDTREE_BUILD && (p->outcols || p->coord) )
+      error(EXIT_FAILURE, 0, "the '--kdtree=build' option is incompatible "
+            "with the '--outcols' or '--coord' options (because in the k-d "
+            "tree building mode doesn't involve actual matching. It will "
+            "only build k-d tree and write it to a file so it can be used "
+            "in future matches)");
+  }
 }
 
 
diff --git a/during-dev-test-data/match-query.fits 
b/during-dev-test-data/match-query.fits
deleted file mode 100644
index 64401ed..0000000
Binary files a/during-dev-test-data/match-query.fits and /dev/null differ
diff --git a/lib/gnuastro/match.h b/lib/gnuastro/match.h
index b35d918..866b66a 100644
--- a/lib/gnuastro/match.h
+++ b/lib/gnuastro/match.h
@@ -55,7 +55,7 @@ gal_data_t *
 gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
                  gal_data_t *coord1_kdtree, size_t kdtree_root,
                  double *aperture, size_t numthreads, size_t minmapsize,
-                 int quietmmap);
+                 size_t *nummatched, int inplace, int quietmmap);
 
 
 __END_C_DECLS    /* From C++ preparations */
diff --git a/lib/match.c b/lib/match.c
index 827ec83..ad5d800 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -987,9 +987,11 @@ struct match_kdtree_params
   gal_data_t        *coord1;  /* 1st coordinate list of 'gal_data_t's */
   gal_data_t        *coord2;  /* 2nd coordinate list of 'gal_data_t's */
   size_t           *c2match;  /* Matched IDs for the second coords.   */
+  double          *distance;  /* The distance between the matches.    */
   double          *aperture;  /* Acceptable aperture for match.       */
   size_t        kdtree_root;  /* Index (counting from 0) of root.     */
   gal_data_t *coord1_kdtree;  /* k-d tree of first coordinate.        */
+  size_t  *multiple_matches;  /* Multiple match count in 1st coords.  */
 
   /* Internal parameters for easy aperture checking. For example there is
      no need to calculate the fixed 'cos()' and 'sin()' functions. So we
@@ -1014,7 +1016,6 @@ match_kdtree_worker(void *in_prm)
 
   /* High level definitions. */
   gal_data_t *ccol;
-  double r, delta[3];
   size_t i, j, index, mindex;
   double *point=NULL, least_dist;
 
@@ -1036,14 +1037,21 @@ match_kdtree_worker(void *in_prm)
       mindex=gal_kdtree_nearest_neighbour(p->coord1, p->coord1_kdtree,
                                           p->kdtree_root, point, &least_dist);
 
+      /* Store the distance between the matches. */
+      p->distance[index] = least_dist;
+
       /* Make sure the matched point is within the given aperture (which
          may be elliptical). If it isn't, then the reported index for this
          element will be 'GAL_BLANK_SIZE_T'. */
-      for(j=0;j<p->ndim;++j)
-        delta[j]=p->b[j][index] - p->a[j][mindex];
-      r=match_coordinates_distance(delta, p->iscircle, p->ndim,
-                                   p->aperture, p->c, p->s);
-      p->c2match[index] = r<p->aperture[0] ? mindex : GAL_BLANK_SIZE_T;
+      p->c2match[index] = least_dist<p->aperture[0] ? mindex : 
GAL_BLANK_SIZE_T;
+
+      /* Keep count of the number of matches in the 1st coordinate.
+         Some points in the first catalog can match to multiple points in
+         the second catalog for a given aperture value. To avoid this multiple
+         matching, we keep a track of such points in the first catalog.
+         We use 'multiple_matches' to keep track for the same. */
+      if(p->c2match[index] != GAL_BLANK_SIZE_T)
+        p->multiple_matches[p->c2match[index]]++;
     }
 
   /* Clean up. */
@@ -1059,16 +1067,117 @@ match_kdtree_worker(void *in_prm)
 
 
 gal_data_t *
+gal_match_kdtree_output(gal_data_t *A, gal_data_t *B, size_t *bina,
+                        double* distance, size_t *multiple_matches,
+                        size_t minmapsize, int quietmmap)
+{
+  double *rval;
+  gal_data_t *out;
+  uint8_t *Amatched;
+  size_t ai=0, bi=0, nummatched=0;
+  size_t *aind, *bind, match_i, nomatch_i;
+
+  /* Find how many matches there were in total. */
+  for(ai=0;ai<A->size;++ai) if(multiple_matches[ai]) ++nummatched;
+
+  /* If there aren't any matches, return NULL. */
+  if(nummatched==0) return NULL;
+
+
+  /* Allocate the output list. */
+  out=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &A->size, NULL, 0,
+                     minmapsize, quietmmap, "CAT1_ROW", "counter",
+                     "Row index in first catalog (counting from 0).");
+  out->next=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &B->size, NULL, 0,
+                           minmapsize, quietmmap, "CAT2_ROW", "counter",
+                           "Row index in second catalog (counting "
+                           "from 0).");
+  out->next->next=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &nummatched,
+                                 NULL, 0, minmapsize, quietmmap,
+                                 "MATCH_DIST", NULL,
+                                 "Distance between the match.");
+
+
+  /* Allocate the 'Amatched' array which is a flag for which rows of the
+     first catalog were matched. The columns that had a match will get a
+     value of one while we are parsing them below. */
+  Amatched=gal_pointer_allocate(GAL_TYPE_UINT8, A->size, 1, __func__,
+                                "Amatched");
+
+
+  /* Initialize the indexs. We want the first 'nummatched' indexs in both
+     outputs to be the matching rows. The non-matched rows should start to
+     be indexed after the matched ones. So the first non-matched index is
+     at the index 'nummatched'. */
+  match_i   = 0;
+  nomatch_i = nummatched;
+
+
+  /* Fill in the output arrays. */
+  aind = out->array;
+  bind = out->next->array;
+  rval = out->next->next->array;
+  for(bi=0;bi<B->size;++bi)
+    {
+      /* A match was found. */
+      if(bina[bi] != GAL_BLANK_SIZE_T && multiple_matches[bina[bi]])
+        {
+          multiple_matches[bina[bi]] = 0;
+          /* Note that the permutation keeps the original indexs. */
+          rval[ match_i   ] = distance[bi];
+          aind[ match_i   ] = bina[bi];
+          bind[ match_i++ ] = bi;
+
+          /* Set a '1' for this object in the first catalog. This will
+             later be used to find which rows didn't match to fill in the
+             output. */
+          Amatched[ bina[bi] ] = 1;
+        }
+
+      /* No match found. At this stage, we can only fill the indexs of the
+         second input. The first input needs to be matched afterwards.*/
+      else bind[ nomatch_i++ ] = bi;
+    }
+
+
+  /* Complete the first input's permutation. */
+  nomatch_i=nummatched;
+  for(ai=0;ai<A->size;++ai)
+    if( Amatched[ai] == 0 )
+      aind[ nomatch_i++ ] = ai;
+
+
+  /* For a check
+  printf("\nNumber of matches: %zu\n", nummatched);
+  printf("\nFirst input's permutation:\n");
+  for(ai=0;ai<A->size;++ai)
+    printf("%s%zu\n", ai<nummatched?"  ":"* ", aind[ai]+1);
+  printf("\nSecond input's permutation:\n");
+  for(bi=0;bi<B->size;++bi)
+    printf("%s%zu\n", bi<nummatched?"  ":"* ", bind[bi]+1);
+  exit(0);
+  */
+
+  /* Return the output. */
+  return out;
+}
+
+
+
+
+
+gal_data_t *
 gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
                  gal_data_t *coord1_kdtree, size_t kdtree_root,
                  double *aperture, size_t numthreads, size_t minmapsize,
-                 int quietmmap)
+                 size_t *nummatched, int inplace, int quietmmap)
 {
-  size_t i;
-  gal_data_t *c2match;
+  gal_data_t *out;
   struct match_kdtree_params p;
+  gal_data_t *c2match, *distance, *multiple_matches;
   double dist[3]; /* Just a place-holder in 'sif_prepare'. */
 
+
   /* Basic sanity checks. */
   p.ndim=gal_list_data_number(coord1);
   if( (   p.ndim != gal_list_data_number(coord2))
@@ -1089,11 +1198,21 @@ gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
           "'uint32', but it is '%s'", __func__,
           gal_type_name(coord1_kdtree->type, 1));
 
+
   /* Do the preparations. */
   match_coordinates_sif_prepare(coord1, coord2, aperture, p.ndim,
                                 p.a, p.b, dist, p.c, p.s, &p.iscircle);
+
+
   c2match=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &coord2->size, NULL, 0,
                          minmapsize, quietmmap, NULL, NULL, NULL);
+  distance=gal_data_alloc(NULL, GAL_TYPE_FLOAT64, 1, &coord2->size, NULL, 0,
+                          minmapsize, quietmmap, NULL, NULL, NULL);
+  multiple_matches=gal_data_alloc(NULL, GAL_TYPE_SIZE_T, 1, &coord1->size,
+                                  NULL, 0, minmapsize, quietmmap, NULL,
+                                  NULL, NULL);
+
+
 
   /* Set the threading parameters and spin-off the threads. */
   p.coord1=coord1;
@@ -1101,21 +1220,36 @@ gal_match_kdtree(gal_data_t *coord1, gal_data_t *coord2,
   p.aperture=aperture;
   p.c2match=c2match->array;
   p.kdtree_root=kdtree_root;
+  p.distance=distance->array;
   p.coord1_kdtree=coord1_kdtree;
+  p.multiple_matches=multiple_matches->array;
   gal_threads_spin_off(match_kdtree_worker, &p, coord2->size, numthreads,
                        minmapsize, quietmmap);
 
-  /* For a check. */
-  {
-    double *c1x=coord1->array, *c1y=coord1->next->array;
-    double *c2x=coord2->array, *c2y=coord2->next->array;
-    for(i=0; i<coord2->size; ++i)
-      printf("%s: cat2's (%g, %g) matches with (%g, %g) of cat1\n",
-             __func__, c2x[i], c2y[i],
-             p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1x[p.c2match[i]],
-             p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1y[p.c2match[i]]);
-  }
 
-  exit(0);
-  return c2match;
+  /* The match is done, write the output. */
+  out=gal_match_kdtree_output(coord1, coord2, p.c2match, p.distance,
+                              p.multiple_matches, minmapsize, quietmmap);
+
+  /* For a check:
+  double *c1x=coord1->array, *c1y=coord1->next->array;
+  double *c2x=coord2->array, *c2y=coord2->next->array;
+  for(size_t i=0; i<coord2->size; ++i)
+    printf("%s: cat2's (%g, %g) matches with (%g, %g) of cat1\n"
+           "with distance %g c2match %zu\n",
+            __func__, c2x[i], c2y[i],
+            p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1x[p.c2match[i]],
+            p.c2match[i]==GAL_BLANK_SIZE_T ? NAN : c1y[p.c2match[i]],
+            p.distance[i], p.c2match[i]);
+  */
+
+  /* Clean up. */
+  gal_data_free(multiple_matches);
+  gal_data_free(distance);
+  gal_data_free(c2match);
+
+
+  /* Set 'nummatched' and return output. */
+  *nummatched = out ?  out->next->next->size : 0;
+  return out;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]