pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

patch: adding comments to new code (part 2 of 2)


From: Ben Pfaff
Subject: patch: adding comments to new code (part 2 of 2)
Date: Sun, 10 Jun 2007 11:27:21 -0700
User-agent: Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux)

Turned out that there wasn't as much code that needed comments as
I thought, unless I'm missing some of it.  Anyway, here's part 2.
It also fixes some minor bugs that I noticed as I was going
through the code and rearranges code a little to suit my
preferred order.

Index: merge/src/data/casereader-filter.c
===================================================================
--- merge.orig/src/data/casereader-filter.c     2007-06-10 08:27:11.000000000 
-0700
+++ merge/src/data/casereader-filter.c  2007-06-10 10:53:18.000000000 -0700
@@ -34,17 +34,36 @@
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
+/* A casereader that filters data coming from another
+   casereader. */
 struct casereader_filter
   {
-    struct casereader *subreader;
+    struct casereader *subreader; /* The reader to filter. */
     bool (*include) (const struct ccase *, void *aux);
     bool (*destroy) (void *aux);
     void *aux;
-    struct casewriter *exclude;
+    struct casewriter *exclude; /* Writer that gets filtered cases, or NULL. */
   };
 
 static struct casereader_class casereader_filter_class;
 
+/* Creates and returns a casereader whose content is a filtered
+   version of the data in SUBREADER.  Only the cases for which
+   INCLUDE returns true will appear in the returned casereader,
+   in the original order.
+
+   If EXCLUDE is non-null, then cases for which INCLUDE returns
+   false are written to EXCLUDE.  These cases will not
+   necessarily be fully written to EXCLUDE until the filtering casereader's
+   cases have been fully read or, if that never occurs, until the
+   filtering casereader is destroyed.
+
+   When the filtering casereader is destroyed, DESTROY will be
+   called to allow any state maintained by INCLUDE to be freed.
+
+   After this function is called, SUBREADER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
 struct casereader *
 casereader_create_filter_func (struct casereader *subreader,
                                bool (*include) (const struct ccase *,
@@ -68,6 +87,7 @@
   return reader;
 }
 
+/* Internal read function for filtering casereader. */
 static bool
 casereader_filter_read (struct casereader *reader UNUSED, void *filter_,
                         struct ccase *c)
@@ -87,16 +107,31 @@
     }
 }
 
+/* Internal destruction function for filtering casereader. */
 static void
 casereader_filter_destroy (struct casereader *reader, void *filter_)
 {
   struct casereader_filter *filter = filter_;
+
+  /* Make sure we've written everything to the excluded cases
+     casewriter, if there is one. */
+  if (filter->exclude != NULL)
+    {
+      struct ccase c;
+      while (casereader_read (filter->subreader, &c))
+        if (filter->include (&c, filter->aux))
+          case_destroy (&c);
+        else
+          casewriter_write (filter->exclude, &c);
+    }
+
   casereader_destroy (filter->subreader);
   if (filter->destroy != NULL && !filter->destroy (filter->aux))
     casereader_force_error (reader);
   free (filter);
 }
 
+/* Filtering casereader class. */
 static struct casereader_class casereader_filter_class =
   {
     casereader_filter_read,
@@ -111,41 +146,42 @@
     NULL,
   };
 
+
+/* Casereader for filtering valid weights. */
+
+/* Weight-filtering data. */
 struct casereader_filter_weight
   {
-    const struct variable *weight_var;
-    bool *warn_on_invalid;
-    bool local_warn_on_invalid;
+    const struct variable *weight_var; /* Weight variable. */
+    bool *warn_on_invalid;      /* Have we already issued an error? */
+    bool local_warn_on_invalid; /* warn_on_invalid might point here. */
   };
 
-static bool
-casereader_filter_weight_include (const struct ccase *c, void *cfw_)
-{
-  struct casereader_filter_weight *cfw = cfw_;
-  double value = case_num (c, cfw->weight_var);
-  if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
-    return true;
-  else
-    {
-      if (*cfw->warn_on_invalid)
-        {
-         msg (SW, _("At least one case in the data read had a weight value "
-                    "that was user-missing, system-missing, zero, or "
-                    "negative.  These case(s) were ignored."));
-          *cfw->warn_on_invalid = false;
-        }
-      return false;
-    }
-}
-
-static bool
-casereader_filter_weight_destroy (void *cfw_)
-{
-  struct casereader_filter_weight *cfw = cfw_;
-  free (cfw);
-  return true;
-}
+static bool casereader_filter_weight_include (const struct ccase *, void *);
+static bool casereader_filter_weight_destroy (void *);
 
+/* Creates and returns a casereader that filters cases from
+   READER by valid weights, that is, any cases with user- or
+   system-missing, zero, or negative weights are dropped.  The
+   weight variable's information is taken from DICT.  If DICT
+   does not have a weight variable, then no cases are filtered
+   out.
+
+   When a case with an invalid weight is encountered,
+   *WARN_ON_INVALID is checked.  If it is true, then an error
+   message is issued and *WARN_ON_INVALID is set false.  If
+   WARN_ON_INVALID is a null pointer, then an internal bool that
+   is initially true is used instead of a caller-supplied bool.
+
+   If EXCLUDE is non-null, then dropped cases are written to
+   EXCLUDE.  These cases will not necessarily be fully written to
+   EXCLUDE until the filtering casereader's cases have been fully
+   read or, if that never occurs, until the filtering casereader
+   is destroyed.
+
+   After this function is called, READER must not ever again be
+   referenced directly.  It will be destroyed automatically when
+   the filtering casereader is destroyed. */
 struct casereader *
 casereader_create_filter_weight (struct casereader *reader,
                                  const struct dictionary *dict,
@@ -170,39 +206,69 @@
     reader = casereader_rename (reader);
   return reader;
 }
-
-struct casereader_filter_missing
-  {
-    struct variable **vars;
-    size_t var_cnt;
-    enum mv_class class;
-  };
 
+/* Internal "include" function for weight-filtering
+   casereader. */
 static bool
-casereader_filter_missing_include (const struct ccase *c, void *cfm_)
+casereader_filter_weight_include (const struct ccase *c, void *cfw_)
 {
-  const struct casereader_filter_missing *cfm = cfm_;
-  size_t i;
-
-  for (i = 0; i < cfm->var_cnt; i++)
+  struct casereader_filter_weight *cfw = cfw_;
+  double value = case_num (c, cfw->weight_var);
+  if (value >= 0.0 && !var_is_num_missing (cfw->weight_var, value, MV_ANY))
+    return true;
+  else
     {
-      struct variable *var = cfm->vars[i];
-      const union value *value = case_data (c, var);
-      if (var_is_value_missing (var, value, cfm->class))
-        return false;
+      if (*cfw->warn_on_invalid)
+        {
+         msg (SW, _("At least one case in the data read had a weight value "
+                    "that was user-missing, system-missing, zero, or "
+                    "negative.  These case(s) were ignored."));
+          *cfw->warn_on_invalid = false;
+        }
+      return false;
     }
-  return true;
 }
 
+/* Internal "destroy" function for weight-filtering
+   casereader. */
 static bool
-casereader_filter_missing_destroy (void *cfm_)
+casereader_filter_weight_destroy (void *cfw_)
 {
-  struct casereader_filter_missing *cfm = cfm_;
-  free (cfm->vars);
-  free (cfm);
+  struct casereader_filter_weight *cfw = cfw_;
+  free (cfw);
   return true;
 }
+
+/* Casereader for filtering missing values. */
 
+/* Missing-value filtering data. */
+struct casereader_filter_missing
+  {
+    struct variable **vars;     /* Variables whose values to filter. */
+    size_t var_cnt;             /* Number of variables. */
+    enum mv_class class;        /* Types of missing values to filter. */
+  };
+
+static bool casereader_filter_missing_include (const struct ccase *, void *);
+static bool casereader_filter_missing_destroy (void *);
+
+/* Creates and returns a casereader that filters out cases from
+   READER that have a missing value in the given CLASS for any of
+   the VAR_CNT variables in VARS.  Only cases that have
+   non-missing values for all of these variables are passed
+   through.
+
+   Ownership of VARS is retained by the caller.
+
+   If EXCLUDE is non-null, then dropped cases are written to
+   EXCLUDE.  These cases will not necessarily be fully written to
+   EXCLUDE until the filtering casereader's cases have been fully
+   read or, if that never occurs, until the filtering casereader
+   is destroyed.
+
+   After this function is called, READER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
 struct casereader *
 casereader_create_filter_missing (struct casereader *reader,
                                   const struct variable **vars, size_t var_cnt,
@@ -224,16 +290,58 @@
   else
     return casereader_rename (reader);
 }
-
-
+
+/* Internal "include" function for missing value-filtering
+   casereader. */
 static bool
-casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+casereader_filter_missing_include (const struct ccase *c, void *cfm_)
 {
-  casenumber *counter = counter_;
-  ++*counter;
+  const struct casereader_filter_missing *cfm = cfm_;
+  size_t i;
+
+  for (i = 0; i < cfm->var_cnt; i++)
+    {
+      struct variable *var = cfm->vars[i];
+      const union value *value = case_data (c, var);
+      if (var_is_value_missing (var, value, cfm->class))
+        return false;
+    }
   return true;
 }
 
+/* Internal "destroy" function for missing value-filtering
+   casereader. */
+static bool
+casereader_filter_missing_destroy (void *cfm_)
+{
+  struct casereader_filter_missing *cfm = cfm_;
+  free (cfm->vars);
+  free (cfm);
+  return true;
+}
+
+/* Case-counting casereader. */
+
+static bool casereader_counter_include (const struct ccase *, void *);
+
+/* Creates and returns a new casereader that counts the number of
+   cases that have been read from it.  *COUNTER is initially set
+   to INITIAL_VALUE, then incremented by 1 each time a case is read.
+
+   Counting casereaders must be used very cautiously: if a
+   counting casereader is cloned or if the casereader_peek
+   function is used on it, then the counter's value can be higher
+   than expected because of the buffering that goes on behind the
+   scenes.
+
+   The counter is only incremented as cases are actually read
+   from the casereader.  In particular, if the casereader is
+   destroyed before all cases have been read from the casereader,
+   cases never read will not be included in the count.
+
+   After this function is called, READER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the filtering casereader is destroyed. */
 struct casereader *
 casereader_create_counter (struct casereader *reader, casenumber *counter,
                            casenumber initial_value)
@@ -242,3 +350,12 @@
   return casereader_create_filter_func (reader, casereader_counter_include,
                                         NULL, counter, NULL);
 }
+
+/* Internal "include" function for counting casereader. */
+static bool
+casereader_counter_include (const struct ccase *c UNUSED, void *counter_)
+{
+  casenumber *counter = counter_;
+  ++*counter;
+  return true;
+}
Index: merge/src/data/casereader-translator.c
===================================================================
--- merge.orig/src/data/casereader-translator.c 2007-06-10 09:52:20.000000000 
-0700
+++ merge/src/data/casereader-translator.c      2007-06-10 09:59:27.000000000 
-0700
@@ -27,9 +27,13 @@
 
 #include "xalloc.h"
 
+/* Casereader that applies a user-supplied function to translate
+   each case into another in an arbitrary fashion. */
+
+/* A translating casereader. */
 struct casereader_translator
   {
-    struct casereader *subreader;
+    struct casereader *subreader; /* Source of input cases. */
 
     void (*translate) (const struct ccase *input, struct ccase *output,
                        void *aux);
@@ -39,6 +43,18 @@
 
 static struct casereader_class casereader_translator_class;
 
+/* Creates and returns a new casereader whose cases are produced
+   by reading from SUBREADER and passing through TRANSLATE, which
+   must create case OUTPUT, with OUTPUT_VALUE_CNT values, and
+   populate it based on INPUT and auxiliary data AUX.  TRANSLATE
+   must also destroy INPUT.
+
+   When the translating casereader is destroyed, DESTROY will be
+   called to allow any state maintained by TRANSLATE to be freed.
+
+   After this function is called, SUBREADER must not ever again
+   be referenced directly.  It will be destroyed automatically
+   when the translating casereader is destroyed. */
 struct casereader *
 casereader_create_translator (struct casereader *subreader,
                               size_t output_value_cnt,
@@ -62,6 +78,7 @@
   return reader;
 }
 
+/* Internal read function for translating casereader. */
 static bool
 casereader_translator_read (struct casereader *reader UNUSED,
                             void *ct_, struct ccase *c)
@@ -78,6 +95,7 @@
     return false;
 }
 
+/* Internal destroy function for translating casereader. */
 static void
 casereader_translator_destroy (struct casereader *reader UNUSED, void *ct_)
 {
@@ -87,6 +105,7 @@
   free (ct);
 }
 
+/* Casereader class for translating casereader. */
 static struct casereader_class casereader_translator_class =
   {
     casereader_translator_read,
Index: merge/src/data/casegrouper.c
===================================================================
--- merge.orig/src/data/casegrouper.c   2007-06-10 10:31:29.000000000 -0700
+++ merge/src/data/casegrouper.c        2007-06-10 10:52:43.000000000 -0700
@@ -30,16 +30,27 @@
 
 #include "xalloc.h"
 
+/* A casegrouper. */
 struct casegrouper
   {
-    struct casereader *reader;
-    struct taint *taint;
+    struct casereader *reader;  /* Source of input cases. */
+    struct taint *taint;        /* Error status for casegrouper. */
 
+    /* Functions for grouping cases. */
     bool (*same_group) (const struct ccase *, const struct ccase *, void *aux);
     void (*destroy) (void *aux);
     void *aux;
   };
 
+/* Creates and returns a new casegrouper that takes its input
+   from READER.  SAME_GROUP is used to decide which cases are in
+   a group: it returns true if the pair of cases provided are in
+   the same group, false otherwise.  DESTROY will be called when
+   the casegrouper is destroyed and should free any storage
+   needed by SAME_GROUP.
+
+   SAME_GROUP may be a null pointer.  If so, READER's entire
+   contents is considered to be a single group. */
 struct casegrouper *
 casegrouper_create_func (struct casereader *reader,
                          bool (*same_group) (const struct ccase *,
@@ -57,13 +68,17 @@
   return grouper;
 }
 
-/* FIXME: we really shouldn't need a temporary casewriter for the
-   common case where we read an entire group's data before going
-   on to the next. */
+/* Obtains the next group of cases from GROUPER.  Returns true if
+   successful, false if no groups remain.  If successful, *READER
+   is set to the casereader for the new group; otherwise, it is
+   set to NULL. */
 bool
 casegrouper_get_next_group (struct casegrouper *grouper,
                             struct casereader **reader)
 {
+  /* FIXME: we really shouldn't need a temporary casewriter for
+     the common case where we read an entire group's data before
+     going on to the next. */
   if (grouper->same_group != NULL)
     {
       struct casewriter *writer;
@@ -102,10 +117,17 @@
           return true;
         }
       else
-        return false;
+        {
+          *reader = NULL;
+          return false;
+        }
     }
 }
 
+/* Destroys GROUPER.  Returns false if GROUPER's input casereader
+   or any state derived from it had become tainted, which means
+   that an I/O error or other serious error occurred in
+   processing data derived from GROUPER; otherwise, return true. */
 bool
 casegrouper_destroy (struct casegrouper *grouper)
 {
@@ -126,29 +148,26 @@
   else
     return true;
 }
+
+/* Casegrouper based on equal values of variables from case to
+   case. */
 
+/* Casegrouper based on equal variables. */
 struct casegrouper_vars
   {
-    const struct variable **vars;
-    size_t var_cnt;
+    const struct variable **vars; /* Variables to compare. */
+    size_t var_cnt;               /* Number of variables. */
   };
 
-static bool
-casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
-                             void *cv_)
-{
-  struct casegrouper_vars *cv = cv_;
-  return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
-}
-
-static void
-casegrouper_vars_destroy (void *cv_)
-{
-  struct casegrouper_vars *cv = cv_;
-  free (cv->vars);
-  free (cv);
-}
-
+static bool casegrouper_vars_same_group (const struct ccase *,
+                                         const struct ccase *,
+                                         void *);
+static void casegrouper_vars_destroy (void *);
+
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the VAR_CNT variables in VARS.  If VAR_CNT is 0,
+   then all the cases will be put in a single group. */
 struct casegrouper *
 casegrouper_create_vars (struct casereader *reader,
                          const struct variable *const *vars,
@@ -168,6 +187,11 @@
     return casegrouper_create_func (reader, NULL, NULL, NULL);
 }
 
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the SPLIT FILE variables in DICT.  If DICT has no
+   SPLIT FILE variables, then all the cases will be put into a
+   single group. */
 struct casegrouper *
 casegrouper_create_splits (struct casereader *reader,
                            const struct dictionary *dict)
@@ -177,6 +201,11 @@
                                   dict_get_split_cnt (dict));
 }
 
+/* Creates and returns a casegrouper that reads data from READER
+   and breaks it into contiguous groups of cases that have equal
+   values for the variables used for sorting in CO.  If CO is
+   empty (contains no sort keys), then all the cases will be put
+   into a single group. */
 struct casegrouper *
 casegrouper_create_case_ordering (struct casereader *reader,
                                   const struct case_ordering *co)
@@ -191,3 +220,22 @@
 
   return grouper;
 }
+
+/* "same_group" function for an equal-variables casegrouper. */
+static bool
+casegrouper_vars_same_group (const struct ccase *a, const struct ccase *b,
+                             void *cv_)
+{
+  struct casegrouper_vars *cv = cv_;
+  return case_compare (a, b, cv->vars, cv->var_cnt) == 0;
+}
+
+/* "destroy" for an equal-variables casegrouper. */
+static void
+casegrouper_vars_destroy (void *cv_)
+{
+  struct casegrouper_vars *cv = cv_;
+  free (cv->vars);
+  free (cv);
+}
+
Index: merge/src/data/casegrouper.h
===================================================================
--- merge.orig/src/data/casegrouper.h   2007-06-10 10:31:41.000000000 -0700
+++ merge/src/data/casegrouper.h        2007-06-10 10:33:43.000000000 -0700
@@ -16,6 +16,13 @@
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA. */
 
+/* Casegrouper.
+
+   Breaks up the cases from a casereader into sets of contiguous
+   cases based on some criteria, e.g. sets of cases that all have
+   the same values for some subset of variables.  Each set of
+   cases is made available to the client as a casereader. */
+
 #ifndef DATA_CASEGROUPER_H
 #define DATA_CASEGROUPER_H 1
 
Index: merge/src/data/datasheet.c
===================================================================
--- merge.orig/src/data/datasheet.c     2007-06-10 10:00:43.000000000 -0700
+++ merge/src/data/datasheet.c  2007-06-10 10:02:23.000000000 -0700
@@ -229,20 +229,23 @@
   return new;
 }
 
-/* Returns true if a I/O error has occurred while processing a
-   datasheet operation. */
+/* Returns true if datasheet DS is tainted.
+   A datasheet is tainted by an I/O error or by taint
+   propagation to the datasheet. */
 bool
 datasheet_error (const struct datasheet *ds)
 {
   return taint_is_tainted (ds->taint);
 }
 
+/* Marks datasheet DS tainted. */
 void
 datasheet_force_error (struct datasheet *ds)
 {
   taint_set_taint (ds->taint);
 }
 
+/* Returns datasheet DS's taint object. */
 const struct taint *
 datasheet_get_taint (const struct datasheet *ds)
 {
@@ -535,6 +538,7 @@
   return reader;
 }
 
+/* "read" function for the datasheet random casereader. */
 static bool
 datasheet_reader_read (struct casereader *reader UNUSED, void *ds_,
                        casenumber case_idx, struct ccase *c)
@@ -551,6 +555,7 @@
     }
 }
 
+/* "destroy" function for the datasheet random casereader. */
 static void
 datasheet_reader_destroy (struct casereader *reader UNUSED, void *ds_)
 {
@@ -558,6 +563,7 @@
   datasheet_destroy (ds);
 }
 
+/* "advance" function for the datasheet random casereader. */
 static void
 datasheet_reader_advance (struct casereader *reader UNUSED, void *ds_,
                           casenumber case_cnt)
@@ -566,6 +572,7 @@
   datasheet_delete_rows (ds, 0, case_cnt);
 }
 
+/* Random casereader class for a datasheet. */
 static const struct casereader_random_class datasheet_reader_class =
   {
     datasheet_reader_read,
Index: merge/src/data/procedure.c
===================================================================
--- merge.orig/src/data/procedure.c     2007-06-10 10:03:27.000000000 -0700
+++ merge/src/data/procedure.c  2007-06-10 10:55:23.000000000 -0700
@@ -86,9 +86,10 @@
   /* Procedure data. */
   enum
     {
-      PROC_COMMITTED,
-      PROC_OPEN,
-      PROC_CLOSED
+      PROC_COMMITTED,           /* No procedure in progress. */
+      PROC_OPEN,                /* proc_open called, casereader still open. */
+      PROC_CLOSED               /* casereader from proc_open destroyed,
+                                   but proc_commit not yet called. */
     }
   proc_state;
   size_t cases_written;       /* Cases output so far. */
@@ -193,18 +194,15 @@
                                        &proc_casereader_class, ds);
 }
 
+/* Returns true if a procedure is in progress, that is, if
+   proc_open has been called but proc_commit has not. */
 bool
 proc_is_open (const struct dataset *ds)
 {
   return ds->proc_state != PROC_COMMITTED;
 }
 
-/* Reads the next case from dataset DS, which must have been
-   opened for reading with proc_open.
-   Returns true if successful, in which case a pointer to the
-   case is stored in *C.
-   Return false at end of file or if a read error occurs.  In
-   this case a null pointer is stored in *C. */
+/* "read" function for procedure casereader. */
 static bool
 proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
                       struct ccase *c)
@@ -280,11 +278,7 @@
     }
 }
 
-/* Closes dataset DS for reading.
-   Returns true if successful, false if an I/O error occurred
-   while reading or closing the data set.
-   If DS has not been opened, returns true without doing
-   anything else. */
+/* "destroy" function for procedure casereader. */
 static void
 proc_casereader_destroy (struct casereader *reader, void *ds_)
 {
@@ -352,6 +346,7 @@
   return proc_cancel_all_transformations (ds) && ds->ok;
 }
 
+/* Casereader class for procedure execution. */
 static struct casereader_class proc_casereader_class =
   {
     proc_casereader_read,


-- 
Ben Pfaff 
http://benpfaff.org




reply via email to

[Prev in Thread] Current Thread [Next in Thread]