pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch 15/19] the big patch


From: blp
Subject: [patch 15/19] the big patch
Date: Tue, 05 Jun 2007 23:27:42 -0700
User-agent: quilt/0.45-1

Most of the patches up to this point have been creating infrastructure
for the new procedure code.  This patch actually implements the new
procedure code and adapts all of its clients to match.  It also adapts
all of the other case sources and sinks in the tree and their clients
to use the casereader/casewriter infrastructure.

The files removed from src/data/automake.mk would also be removed from
CVS when this patch is checked in.

Index: merge/Smake
===================================================================
--- merge.orig/Smake    2007-06-05 09:16:10.000000000 -0700
+++ merge/Smake 2007-06-05 09:18:06.000000000 -0700
@@ -62,6 +62,7 @@
        vsnprintf \
        xalloc \
        xalloc-die \
+       xallocsa \
        xsize \
        xstrndup \
        xvasprintf
Index: merge/src/data/any-reader.c
===================================================================
--- merge.orig/src/data/any-reader.c    2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/any-reader.c 2007-06-05 09:18:06.000000000 -0700
@@ -36,21 +36,6 @@
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-/* Type of file backing an any_reader. */
-enum any_reader_type
-  {
-    SYSTEM_FILE,                /* System file. */
-    PORTABLE_FILE,              /* Portable file. */
-    SCRATCH_FILE                /* Scratch file. */
-  };
-
-/* Reader for any type of case-structured file. */
-struct any_reader 
-  {
-    enum any_reader_type type;  /* Type of file. */
-    void *private;              /* Private data. */
-  };
-
 /* Result of type detection. */
 enum detect_result 
   {
@@ -83,27 +68,10 @@
   return is_type ? YES : NO;
 }
 
-/* If PRIVATE is non-null, creates and returns a new any_reader,
-   initializing its fields to TYPE and PRIVATE.  If PRIVATE is a
-   null pointer, just returns a null pointer. */   
-static struct any_reader *
-make_any_reader (enum any_reader_type type, void *private) 
-{
-  if (private != NULL) 
-    {
-      struct any_reader *reader = xmalloc (sizeof *reader);
-      reader->type = type;
-      reader->private = private;
-      return reader;
-    }
-  else
-    return NULL;
-}
-
-/* Creates an any_reader for HANDLE.  On success, returns the new
-   any_reader and stores the file's dictionary into *DICT.  On
+/* Returns a casereader for HANDLE.  On success, returns the new
+   casereader and stores the file's dictionary into *DICT.  On
    failure, returns a null pointer. */
-struct any_reader *
+struct casereader *
 any_reader_open (struct file_handle *handle, struct dictionary **dict)
 {
   switch (fh_get_referent (handle)) 
@@ -116,15 +84,13 @@
         if (result == IO_ERROR)
           return NULL;
         else if (result == YES)
-          return make_any_reader (SYSTEM_FILE,
-                                  sfm_open_reader (handle, dict, NULL));
+          return sfm_open_reader (handle, dict, NULL);
 
         result = try_detect (handle, pfm_detect);
         if (result == IO_ERROR)
           return NULL;
         else if (result == YES)
-          return make_any_reader (PORTABLE_FILE,
-                                  pfm_open_reader (handle, dict, NULL));
+          return pfm_open_reader (handle, dict, NULL);
 
         msg (SE, _("\"%s\" is not a system or portable file."),
              fh_get_file_name (handle));
@@ -136,74 +102,7 @@
       return NULL;
 
     case FH_REF_SCRATCH:
-      return make_any_reader (SCRATCH_FILE,
-                              scratch_reader_open (handle, dict));
+      return scratch_reader_open (handle, dict);
     }
   NOT_REACHED ();
 }
-
-/* Reads a single case from READER into C.
-   Returns true if successful, false at end of file or on error. */
-bool
-any_reader_read (struct any_reader *reader, struct ccase *c) 
-{
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_read_case (reader->private, c);
-
-    case PORTABLE_FILE:
-      return pfm_read_case (reader->private, c);
-
-    case SCRATCH_FILE:
-      return scratch_reader_read_case (reader->private, c);
-    }
-  NOT_REACHED ();
-}
-
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-any_reader_error (struct any_reader *reader) 
-{
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_read_error (reader->private);
-
-    case PORTABLE_FILE:
-      return pfm_read_error (reader->private);
-
-    case SCRATCH_FILE:
-      return scratch_reader_error (reader->private);
-    }
-  NOT_REACHED ();
-}
-
-/* Closes READER. */
-void
-any_reader_close (struct any_reader *reader) 
-{
-  if (reader == NULL)
-    return;
-
-  switch (reader->type) 
-    {
-    case SYSTEM_FILE:
-      sfm_close_reader (reader->private);
-      break;
-
-    case PORTABLE_FILE:
-      pfm_close_reader (reader->private);
-      break;
-
-    case SCRATCH_FILE:
-      scratch_reader_close (reader->private);
-      break;
-
-    default:
-      NOT_REACHED ();
-    }
-
-  free (reader);
-}
Index: merge/src/data/any-reader.h
===================================================================
--- merge.orig/src/data/any-reader.h    2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/any-reader.h 2007-06-05 09:18:06.000000000 -0700
@@ -23,11 +23,7 @@
 
 struct file_handle;
 struct dictionary;
-struct ccase;
-struct any_reader *any_reader_open (struct file_handle *,
+struct casereader *any_reader_open (struct file_handle *,
                                     struct dictionary **);
-bool any_reader_read (struct any_reader *, struct ccase *);
-bool any_reader_error (struct any_reader *);
-void any_reader_close (struct any_reader *);
 
 #endif /* any-reader.h */
Index: merge/src/data/any-writer.c
===================================================================
--- merge.orig/src/data/any-writer.c    2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/any-writer.c 2007-06-05 09:18:06.000000000 -0700
@@ -36,41 +36,26 @@
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-/* Type of file backing an any_writer. */
-enum any_writer_type
-  {
-    SYSTEM_FILE,                /* System file. */
-    PORTABLE_FILE,              /* Portable file. */
-    SCRATCH_FILE                /* Scratch file. */
-  };
-
-/* Writer for any type of case-structured file. */
-struct any_writer 
-  {
-    enum any_writer_type type;  /* Type of file. */
-    void *private;              /* Private data. */
-  };
-
 /* Creates and returns a writer for HANDLE with the given DICT. */
-struct any_writer *
+struct casewriter *
 any_writer_open (struct file_handle *handle, struct dictionary *dict)
 {
   switch (fh_get_referent (handle)) 
     {
     case FH_REF_FILE:
       {
-        struct any_writer *writer;
+        struct casewriter *writer;
         char *extension;
 
         extension = fn_extension (fh_get_file_name (handle));
         str_lowercase (extension);
 
         if (!strcmp (extension, ".por"))
-          writer = any_writer_from_pfm_writer (
-            pfm_open_writer (handle, dict, pfm_writer_default_options ()));
+          writer = pfm_open_writer (handle, dict,
+                                    pfm_writer_default_options ());
         else
-          writer = any_writer_from_sfm_writer (
-            sfm_open_writer (handle, dict, sfm_writer_default_options ()));
+          writer = sfm_open_writer (handle, dict,
+                                    sfm_writer_default_options ());
         free (extension);
 
         return writer;
@@ -81,137 +66,8 @@
       return NULL;
 
     case FH_REF_SCRATCH:
-      return any_writer_from_scratch_writer (scratch_writer_open (handle,
-                                                                  dict));
+      return scratch_writer_open (handle, dict);
     }
 
   NOT_REACHED ();
 }
-
-/* If PRIVATE is non-null, creates and returns a new any_writer,
-   initializing its fields to TYPE and PRIVATE.  If PRIVATE is a
-   null pointer, just returns a null pointer. */   
-static struct any_writer *
-make_any_writer (enum any_writer_type type, void *private) 
-{
-  if (private != NULL) 
-    {
-      struct any_writer *writer = xmalloc (sizeof *writer);
-      writer->type = type;
-      writer->private = private;
-      return writer; 
-    }
-  else
-    return NULL;
-}
-  
-/* If SFM_WRITER is non-null, encapsulates SFM_WRITER in an
-   any_writer and returns it.  If SFM_WRITER is null, just
-   returns a null pointer.
-
-   Useful when you need to pass options to sfm_open_writer().
-   Typical usage:
-        any_writer_from_sfm_writer (sfm_open_writer (fh, dict, opts))
-   If you don't need to pass options, then any_writer_open() by
-   itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_sfm_writer (struct sfm_writer *sfm_writer) 
-{
-  return make_any_writer (SYSTEM_FILE, sfm_writer);
-}
-
-/* If PFM_WRITER is non-null, encapsulates PFM_WRITER in an
-   any_writer and returns it.  If PFM_WRITER is null, just
-   returns a null pointer.
-
-   Useful when you need to pass options to pfm_open_writer().
-   Typical usage:
-        any_writer_from_pfm_writer (pfm_open_writer (fh, dict, opts))
-   If you don't need to pass options, then any_writer_open() by
-   itself is easier and more straightforward. */
-struct any_writer *
-any_writer_from_pfm_writer (struct pfm_writer *pfm_writer) 
-{
-  return make_any_writer (PORTABLE_FILE, pfm_writer);
-}
-
-/* If SCRATCH_WRITER is non-null, encapsulates SCRATCH_WRITER in
-   an any_writer and returns it.  If SCRATCH_WRITER is null, just
-   returns a null pointer.
-
-   Not particularly useful.  Included just for consistency. */
-struct any_writer *
-any_writer_from_scratch_writer (struct scratch_writer *scratch_writer) 
-{
-  return make_any_writer (SCRATCH_FILE, scratch_writer);
-}
-
-/* Writes cases C to WRITER.
-   Returns true if successful, false on failure. */
-bool
-any_writer_write (struct any_writer *writer, const struct ccase *c) 
-{
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_write_case (writer->private, c);
-
-    case PORTABLE_FILE:
-      return pfm_write_case (writer->private, c);
-
-    case SCRATCH_FILE:
-      return scratch_writer_write_case (writer->private, c);
-    }
-  NOT_REACHED ();
-}
-
-/* Returns true if an I/O error has occurred on WRITER, false
-   otherwise. */
-bool
-any_writer_error (const struct any_writer *writer) 
-{
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      return sfm_write_error (writer->private);
-
-    case PORTABLE_FILE:
-      return pfm_write_error (writer->private);
-
-    case SCRATCH_FILE:
-      return scratch_writer_error (writer->private);
-    }
-  NOT_REACHED ();
-}
-
-/* Closes WRITER.
-   Returns true if successful, false if an I/O error occurred. */
-bool
-any_writer_close (struct any_writer *writer) 
-{
-  bool ok;
-  
-  if (writer == NULL)
-    return true;
-
-  switch (writer->type) 
-    {
-    case SYSTEM_FILE:
-      ok = sfm_close_writer (writer->private);
-      break;
-
-    case PORTABLE_FILE:
-      ok = pfm_close_writer (writer->private);
-      break;
-
-    case SCRATCH_FILE:
-      ok = scratch_writer_close (writer->private);
-      break;
-      
-    default:
-      NOT_REACHED ();
-    }
-
-  free (writer);
-  return ok;
-}
Index: merge/src/data/any-writer.h
===================================================================
--- merge.orig/src/data/any-writer.h    2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/any-writer.h 2007-06-05 09:18:06.000000000 -0700
@@ -23,18 +23,7 @@
 
 struct file_handle;
 struct dictionary;
-struct ccase;
-struct sfm_writer;
-struct pfm_writer;
-struct scratch_writer;
 
-struct any_writer *any_writer_open (struct file_handle *, struct dictionary *);
-struct any_writer *any_writer_from_sfm_writer (struct sfm_writer *);
-struct any_writer *any_writer_from_pfm_writer (struct pfm_writer *);
-struct any_writer *any_writer_from_scratch_writer (struct scratch_writer *);
-
-bool any_writer_write (struct any_writer *, const struct ccase *);
-bool any_writer_error (const struct any_writer *);
-bool any_writer_close (struct any_writer *);
+struct casewriter *any_writer_open (struct file_handle *, struct dictionary *);
 
 #endif /* any-writer.h */
Index: merge/src/data/automake.mk
===================================================================
--- merge.orig/src/data/automake.mk     2007-06-05 09:17:39.000000000 -0700
+++ merge/src/data/automake.mk  2007-06-05 09:18:06.000000000 -0700
@@ -10,20 +10,12 @@
        src/data/calendar.c \
        src/data/calendar.h \
        src/data/case-ordering.c \
        src/data/case-ordering.h \
-       src/data/case-sink.c \
-       src/data/case-sink.h \
-       src/data/case-source.c \
-       src/data/case-source.h \
        src/data/case.c \
-       src/data/casefilter.c \
-       src/data/casefilter.h \
-       src/data/casefile.h \
-       src/data/casefile.c \
-       src/data/casefile-factory.h \
-       src/data/casefile-private.h \
        src/data/casegrouper.c \
        src/data/casegrouper.h \
+       src/data/caseinit.c \
+       src/data/caseinit.h \
        src/data/casereader-filter.c \
        src/data/casereader-provider.h \
        src/data/casereader-translator.c \
@@ -80,8 +72,6 @@
        src/data/settings.h \
        src/data/sparse-cases.c \
        src/data/sparse-cases.h \
-       src/data/storage-stream.c \
-       src/data/storage-stream.h \
        src/data/sys-file-private.c \
        src/data/sys-file-private.h \
        src/data/sys-file-reader.c \
Index: merge/src/data/dictionary.c
===================================================================
--- merge.orig/src/data/dictionary.c    2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/dictionary.c 2007-06-05 09:18:06.000000000 -0700
@@ -718,7 +717,7 @@
       double w = case_num (c, d->weight);
       if (w < 0.0 || var_is_num_missing (d->weight, w, MV_ANY))
         w = 0.0;
-      if ( w == 0.0 && *warn_on_invalid ) {
+      if ( w == 0.0 && warn_on_invalid != NULL && *warn_on_invalid ) {
          *warn_on_invalid = false;
          msg (SW, _("At least one case in the data file had a weight value "
                     "that was user-missing, system-missing, zero, or "
Index: merge/src/data/por-file-reader.c
===================================================================
--- merge.orig/src/data/por-file-reader.c       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/por-file-reader.c    2007-06-05 09:18:06.000000000 -0700
@@ -20,29 +20,32 @@
 
 #include <config.h>
 #include "por-file-reader.h"
-#include <libpspp/message.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
+
 #include <ctype.h>
 #include <errno.h>
 #include <math.h>
 #include <setjmp.h>
-#include <libpspp/alloc.h>
+#include <stdarg.h>
 #include <stdbool.h>
-#include "case.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <libpspp/alloc.h>
 #include <libpspp/compiler.h>
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
 #include <libpspp/hash.h>
 #include <libpspp/magic.h>
+#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/pool.h>
 #include <libpspp/str.h>
-#include "value-labels.h"
-#include "variable.h"
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
@@ -71,10 +74,12 @@
     int var_cnt;                /* Number of variables. */
     int weight_index;          /* 0-based index of weight variable, or -1. */
     int *widths;                /* Variable widths, 0 for numeric. */
-    int value_cnt;             /* Number of `value's per case. */
+    size_t value_cnt;          /* Number of `value's per case. */
     bool ok;                    /* Set false on I/O error. */
   };
 
+static struct casereader_class por_file_casereader_class;
+
 static void
 error (struct pfm_reader *r, const char *msg,...)
      PRINTF_FORMAT (2, 3)
@@ -110,11 +115,11 @@
 }
 
 /* Closes portable file reader R, after we're done with it. */
-void
-pfm_close_reader (struct pfm_reader *r)
+static void
+por_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
 {
-  if (r != NULL)
-    pool_destroy (r->pool);
+  struct pfm_reader *r = r_;
+  pool_destroy (r->pool);
 }
 
 /* Read a single character into cur_char.  */
@@ -156,7 +161,7 @@
 /* Reads the dictionary from file with handle H, and returns it in a
    dictionary structure.  This dictionary may be modified in order to
    rename, reorder, and delete variables, etc. */
-struct pfm_reader *
+struct casereader *
 pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
                  struct pfm_read_info *info)
 {
@@ -204,10 +209,12 @@
   if (!match (r, 'F'))
     error (r, _("Data record expected."));
 
-  return r;
+  r->value_cnt = dict_get_next_value_idx (*dict);
+  return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+                                       &por_file_casereader_class, r);
 
  error:
-  pfm_close_reader (r);
+  pool_destroy (r->pool);
   dict_destroy (*dict);
   *dict = NULL;
   return NULL;
@@ -677,19 +684,28 @@
 }
 
 /* Reads one case from portable file R into C. */
-bool
-pfm_read_case (struct pfm_reader *r, struct ccase *c)
+static bool
+por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c)
 {
+  struct pfm_reader *r = r_;
   size_t i;
   size_t idx;
 
+  case_create (c, casereader_get_value_cnt (reader));
   setjmp (r->bail_out);
-  if (!r->ok)
-    return false;
+  if (!r->ok) 
+    {
+      casereader_force_error (reader);
+      case_destroy (c);
+      return false; 
+    }
   
   /* Check for end of file. */
-  if (r->cc == 'Z')
-    return false;
+  if (r->cc == 'Z') 
+    {
+      case_destroy (c);
+      return false; 
+    }
 
   idx = 0;
   for (i = 0; i < r->var_cnt; i++) 
@@ -713,14 +729,6 @@
   return true;
 }
 
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-pfm_read_error (const struct pfm_reader *reader) 
-{
-  return !reader->ok;
-}
-
 /* Returns true if FILE is an SPSS portable file,
    false otherwise. */
 bool
@@ -755,3 +763,11 @@
 
   return true;
 }
+
+static struct casereader_class por_file_casereader_class = 
+  {
+    por_file_casereader_read,
+    por_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
Index: merge/src/data/por-file-reader.h
===================================================================
--- merge.orig/src/data/por-file-reader.h       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/por-file-reader.h    2007-06-05 09:18:06.000000000 -0700
@@ -37,12 +37,9 @@
 struct dictionary;
 struct file_handle;
 struct ccase;
-struct pfm_reader *pfm_open_reader (struct file_handle *,
+struct casereader *pfm_open_reader (struct file_handle *,
                                     struct dictionary **,
                                     struct pfm_read_info *);
-bool pfm_read_case (struct pfm_reader *, struct ccase *);
-bool pfm_read_error (const struct pfm_reader *);
-void pfm_close_reader (struct pfm_reader *);
 bool pfm_detect (FILE *);
 
 #endif /* por-file-reader.h */
Index: merge/src/data/por-file-writer.c
===================================================================
--- merge.orig/src/data/por-file-writer.c       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/por-file-writer.c    2007-06-05 09:18:06.000000000 -0700
@@ -30,13 +30,15 @@
 #include <time.h>
 #include <unistd.h>
 
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
 
 #include <libpspp/alloc.h>
 #include <libpspp/hash.h>
@@ -70,6 +72,9 @@
     int fv;                     /* Starting case index. */
   };
 
+static struct casewriter_class por_file_casewriter_class;
+
+static bool close_writer (struct pfm_writer *);
 static void buf_write (struct pfm_writer *, const void *, size_t);
 static void write_header (struct pfm_writer *);
 static void write_version_data (struct pfm_writer *);
@@ -94,7 +99,7 @@
 /* Writes the dictionary DICT to portable file HANDLE according
    to the given OPTS.  Returns nonzero only if successful.  DICT
    will not be modified, except to assign short names. */
-struct pfm_writer *
+struct casewriter *
 pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
                  struct pfm_write_options opts)
 {
@@ -153,12 +158,12 @@
   write_variables (w, dict);
   write_value_labels (w, dict);
   buf_write (w, "F", 1);
-  if (pfm_write_error (w))
+  if (ferror (w->file))
     goto error;
-  return w;
+  return casewriter_create (&por_file_casewriter_class, w);
 
  error:
-  pfm_close_writer (w);
+  close_writer (w);
   return NULL;
 
  open_error:
@@ -356,6 +361,7 @@
           write_value (w, &value, v);
         }
 
+      /* Write variable label. */
       if (var_get_label (v) != NULL)
         { 
           buf_write (w, "C", 1);
@@ -394,41 +400,47 @@
     }
 }
 
-/* Writes case ELEM to the portable file represented by H. */
-int 
-pfm_write_case (struct pfm_writer *w, const struct ccase *c)
+/* Writes case C to the portable file represented by H. */
+static void 
+por_file_casewriter_write (struct casewriter *writer, void *w_,
+                           struct ccase *c)
 {
+  struct pfm_writer *w = w_;
   int i;
 
-  if (ferror (w->file))
-    return 0;
-  
-  for (i = 0; i < w->var_cnt; i++)
+  if (!ferror (w->file)) 
     {
-      struct pfm_var *v = &w->vars[i];
+      for (i = 0; i < w->var_cnt; i++)
+        {
+          struct pfm_var *v = &w->vars[i];
       
-      if (v->width == 0)
-        write_float (w, case_num_idx (c, v->fv));
-      else
-       {
-         write_int (w, v->width);
-          buf_write (w, case_str_idx (c, v->fv), v->width);
-       }
+          if (v->width == 0)
+            write_float (w, case_num_idx (c, v->fv));
+          else
+            {
+              write_int (w, v->width);
+              buf_write (w, case_str_idx (c, v->fv), v->width);
+            }
+        } 
     }
-
-  return !pfm_write_error (w);
+  else
+    casewriter_force_error (writer);
+  
+  case_destroy (c);
 }
 
-bool
-pfm_write_error (const struct pfm_writer *w) 
+static void
+por_file_casewriter_destroy (struct casewriter *writer, void *w_) 
 {
-  return ferror (w->file);
+  struct pfm_writer *w = w_;
+  if (!close_writer (w))
+    casewriter_force_error (writer);
 }
 
 /* Closes a portable file after we're done with it.
    Returns true if successful, false if an I/O error occurred. */
-bool
-pfm_close_writer (struct pfm_writer *w)
+static bool
+close_writer (struct pfm_writer *w)
 {
   bool ok;
 
@@ -442,7 +454,7 @@
       memset (buf, 'Z', sizeof buf);
       buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
 
-      ok = !pfm_write_error (w);
+      ok = !ferror (w->file);
       if (fclose (w->file) == EOF) 
         ok = false; 
 
@@ -844,3 +856,10 @@
   strcpy (output, "*.");
   return;
 }
+
+static struct casewriter_class por_file_casewriter_class = 
+  {
+    por_file_casewriter_write,
+    por_file_casewriter_destroy,
+    NULL,
+  };
Index: merge/src/data/por-file-writer.h
===================================================================
--- merge.orig/src/data/por-file-writer.h       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/por-file-writer.h    2007-06-05 09:18:06.000000000 -0700
@@ -41,12 +41,8 @@
 struct file_handle;
 struct dictionary;
 struct ccase;
-struct pfm_writer *pfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *pfm_open_writer (struct file_handle *, struct dictionary *,
                                     struct pfm_write_options);
 struct pfm_write_options pfm_writer_default_options (void);
 
-int pfm_write_case (struct pfm_writer *, const struct ccase *);
-bool pfm_write_error (const struct pfm_writer *);
-bool pfm_close_writer (struct pfm_writer *);
-
 #endif /* por-file-writer.h */
Index: merge/src/data/procedure.c
===================================================================
--- merge.orig/src/data/procedure.c     2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/procedure.c  2007-06-05 09:18:06.000000000 -0700
@@ -23,48 +23,50 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include <data/case-source.h>
-#include <data/case-sink.h>
 #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/caseinit.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
+#include <data/casewriter.h>
 #include <data/dictionary.h>
 #include <data/file-handle-def.h>
 #include <data/procedure.h>
-#include <data/storage-stream.h>
 #include <data/transformations.h>
 #include <data/variable.h>
 #include <libpspp/alloc.h>
 #include <libpspp/deque.h>
 #include <libpspp/misc.h>
 #include <libpspp/str.h>
+#include <libpspp/taint.h>
 
 struct dataset {
-
-  /* An abstract factory which creates casefiles */
-  struct casefile_factory *cf_factory;
-
-  /* Callback which occurs when a procedure provides a new source for
-     the dataset */
-  replace_source_callback *replace_source ;
-
-  /* Callback which occurs whenever the DICT is replaced by a new one */
-  replace_dictionary_callback *replace_dict;
-
-  /* Cases are read from proc_source,
+  /* Cases are read from source,
+     their transformation variables are initialized,
      pass through permanent_trns_chain (which transforms them into
      the format described by permanent_dict),
-     are written to proc_sink,
+     are written to sink,
      pass through temporary_trns_chain (which transforms them into
      the format described by dict),
      and are finally passed to the procedure. */
-  struct case_source *proc_source;
+  struct casereader *source;
+  struct caseinit *caseinit;
   struct trns_chain *permanent_trns_chain;
   struct dictionary *permanent_dict;
-  struct case_sink *proc_sink;
+  struct casewriter *sink;
   struct trns_chain *temporary_trns_chain;
   struct dictionary *dict;
 
+  /* Callback which occurs when a procedure provides a new source for
+     the dataset */
+  replace_source_callback *replace_source ;
+
+  /* Callback which occurs whenever the DICT is replaced by a new one */
+  replace_dictionary_callback *replace_dict;
+
+  /* If true, cases are discarded instead of being written to
+     sink. */
+  bool discard_output;
+
   /* The transformation chain that the next transformation will be
      added to. */
   struct trns_chain *cur_trns_chain;
@@ -82,26 +84,22 @@
   struct ccase *lag_cases;      /* Lagged cases managed by deque. */
 
   /* Procedure data. */
-  bool is_open;               /* Procedure open? */
-  struct ccase trns_case;     /* Case used for transformations. */
-  struct ccase sink_case;     /* Case written to sink, if
-                                 compacting is necessary. */
+  enum 
+    {
+      PROC_COMMITTED,
+      PROC_OPEN,
+      PROC_CLOSED 
+    }
+  proc_state;
   size_t cases_written;       /* Cases output so far. */
-  bool ok;
+  bool ok;                    /* Error status. */
 }; /* struct dataset */
 
 
 static void add_case_limit_trns (struct dataset *ds);
 static void add_filter_trns (struct dataset *ds);
 
-static bool internal_procedure (struct dataset *ds, case_func *,
-                                end_func *,
-                                void *aux);
 static void update_last_proc_invocation (struct dataset *ds);
-static void create_trns_case (struct ccase *, struct dictionary *);
-static void open_active_file (struct dataset *ds);
-static void clear_case (const struct dataset *ds, struct ccase *c);
-static bool close_active_file (struct dataset *ds);
 
 /* Public functions. */
 
@@ -116,146 +114,89 @@
 
 /* Regular procedure. */
 
-
-
-/* Reads the data from the input program and writes it to a new
-   active file.  For each case we read from the input program, we
-   do the following:
-
-   1. Execute permanent transformations.  If these drop the case,
-      start the next case from step 1.
-
-   2. Write case to replacement active file.
-
-   3. Execute temporary transformations.  If these drop the case,
-      start the next case from step 1.
-
-   4. Pass case to PROC_FUNC, passing AUX as auxiliary data.
-
-   Returns true if successful, false if an I/O error occurred. */
+/* Executes any pending transformations, if necessary.
+   This is not identical to the EXECUTE command in that it won't
+   always read the source data.  This can be important when the
+   source data is given inline within BEGIN DATA...END FILE. */
 bool
-procedure (struct dataset *ds, case_func *cf, void *aux)
+proc_execute (struct dataset *ds)
 {
-  update_last_proc_invocation (ds);
+  bool ok;
 
-  /* Optimize the trivial case where we're not going to do
-     anything with the data, by not reading the data at all. */
-  if (cf == NULL
-      && case_source_is_class (ds->proc_source, &storage_source_class)
-      && ds->proc_sink == NULL
-      && (ds->temporary_trns_chain == NULL
-          || trns_chain_is_empty (ds->temporary_trns_chain))
+  if ((ds->temporary_trns_chain == NULL
+       || trns_chain_is_empty (ds->temporary_trns_chain))
       && trns_chain_is_empty (ds->permanent_trns_chain))
     {
       ds->n_lag = 0;
+      ds->discard_output = false;
       dict_set_case_limit (ds->dict, 0);
       dict_clear_vectors (ds->dict);
       return true;
     }
 
-  return internal_procedure (ds, cf, NULL, aux);
-}
-
-/* Multipass procedure. */
-
-struct multipass_aux_data
-  {
-    struct casefile *casefile;
-
-    bool (*proc_func) (const struct casefile *, void *aux);
-    void *aux;
-  };
-
-/* Case processing function for multipass_procedure(). */
-static bool
-multipass_case_func (const struct ccase *c, void *aux_data_, const struct 
dataset *ds UNUSED)
-{
-  struct multipass_aux_data *aux_data = aux_data_;
-  return casefile_append (aux_data->casefile, c);
+  ok = casereader_destroy (proc_open (ds));
+  return proc_commit (ds) && ok;
 }
 
-/* End-of-file function for multipass_procedure(). */
-static bool
-multipass_end_func (void *aux_data_, const struct dataset *ds UNUSED)
-{
-  struct multipass_aux_data *aux_data = aux_data_;
-  return (aux_data->proc_func == NULL
-          || aux_data->proc_func (aux_data->casefile, aux_data->aux));
-}
+static struct casereader_class proc_casereader_class;
 
-/* Procedure that allows multiple passes over the input data.
-   The entire active file is passed to PROC_FUNC, with the given
-   AUX as auxiliary data, as a unit. */
-bool
-multipass_procedure (struct dataset *ds, casefile_func *proc_func,  void *aux)
+/* Opens dataset DS for reading cases with proc_read.
+   proc_commit must be called when done. */
+struct casereader *
+proc_open (struct dataset *ds)
 {
-  struct multipass_aux_data aux_data;
-  bool ok;
-
-  aux_data.casefile =
-    ds->cf_factory->create_casefile (ds->cf_factory,
-                                    dict_get_next_value_idx (ds->dict));
-
-  aux_data.proc_func = proc_func;
-  aux_data.aux = aux;
+  assert (ds->source != NULL);
+  assert (ds->proc_state == PROC_COMMITTED);
 
-  ok = internal_procedure (ds, multipass_case_func, multipass_end_func, 
&aux_data);
-  ok = !casefile_error (aux_data.casefile) && ok;
+  update_last_proc_invocation (ds);
 
-  casefile_destroy (aux_data.casefile);
+  caseinit_mark_for_init (ds->caseinit, ds->dict);
 
-  return ok;
-}
-
-
-/* Procedure implementation. */
+  /* Finish up the collection of transformations. */
+  add_case_limit_trns (ds);
+  add_filter_trns (ds);
+  trns_chain_finalize (ds->cur_trns_chain);
 
-/* Executes a procedure.
-   Passes each case to CASE_FUNC.
-   Calls END_FUNC after the last case.
-   Returns true if successful, false if an I/O error occurred (or
-   if CASE_FUNC or END_FUNC ever returned false). */
-static bool
-internal_procedure (struct dataset *ds, case_func *proc,
-                   end_func *end,
-                    void *aux)
-{
-  struct ccase *c;
-  bool ok = true;
-
-  proc_open (ds);
-  while (ok && proc_read (ds, &c))
-    if (proc != NULL)
-      ok = proc (c, aux, ds) && ok;
-  if (end != NULL)
-    ok = end (aux, ds) && ok;
+  /* Make permanent_dict refer to the dictionary right before
+     data reaches the sink. */
+  if (ds->permanent_dict == NULL)
+    ds->permanent_dict = ds->dict;
 
-  if ( proc_close (ds) && ok )
+  /* Prepare sink. */
+  if (!ds->discard_output) 
     {
-
-      return true;
+      ds->compactor = (dict_compacting_would_shrink (ds->permanent_dict)
+                       ? dict_make_compactor (ds->permanent_dict)
+                       : NULL);
+      ds->sink = autopaging_writer_create (dict_get_compacted_value_cnt (
+                                             ds->permanent_dict)); 
+    }
+  else 
+    {
+      ds->compactor = NULL;
+      ds->sink = NULL;
     }
 
-  return false;
-}
-
-/* Opens dataset DS for reading cases with proc_read.
-   proc_close must be called when done. */
-void
-proc_open (struct dataset *ds)
-{
-  assert (ds->proc_source != NULL);
-  assert (!ds->is_open);
-
-  update_last_proc_invocation (ds);
-
-  open_active_file (ds);
+  /* Allocate memory for lagged cases. */
+  ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
 
-  ds->is_open = true;
-  create_trns_case (&ds->trns_case, ds->dict);
-  case_create (&ds->sink_case, dict_get_compacted_value_cnt (ds->dict));
+  ds->proc_state = PROC_OPEN;
   ds->cases_written = 0;
   ds->ok = true;
+
+  /* FIXME: use taint in dataset in place of `ok'? */
+  /* FIXME: for trivial cases we can just return a clone of
+     ds->source? */
+  return casereader_create_sequential (NULL,
+                                       dict_get_next_value_idx (ds->dict),
+                                       CASENUMBER_MAX,
+                                       &proc_casereader_class, ds);
+}
+
+bool
+proc_is_open (const struct dataset *ds) 
+{
+  return ds->proc_state != PROC_COMMITTED;
 }
 
 /* Reads the next case from dataset DS, which must have been
@@ -264,14 +205,15 @@
    case is stored in *C.
    Return false at end of file or if a read error occurs.  In
    this case a null pointer is stored in *C. */
-bool
-proc_read (struct dataset *ds, struct ccase **c)
+static bool
+proc_casereader_read (struct casereader *reader UNUSED, void *ds_,
+                      struct ccase *c) 
 {
+  struct dataset *ds = ds_;
   enum trns_result retval = TRNS_DROP_CASE;
 
-  assert (ds->is_open);
-  *c = NULL;
-  for (;;)
+  assert (ds->proc_state == PROC_OPEN);
+  for (;;) 
     {
       size_t case_nr;
 
@@ -281,51 +223,59 @@
       if (!ds->ok)
         return false;
 
-      /* Read a case from proc_source. */
-      clear_case (ds, &ds->trns_case);
-      if (!ds->proc_source->class->read (ds->proc_source, &ds->trns_case))
+      /* Read a case from source. */
+      if (!casereader_read (ds->source, c))
         return false;
+      case_resize (c, dict_get_next_value_idx (ds->dict));
+      caseinit_init_reinit_vars (ds->caseinit, c);
+      caseinit_init_left_vars (ds->caseinit, c);
 
       /* Execute permanent transformations.  */
       case_nr = ds->cases_written + 1;
       retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE,
-                                   &ds->trns_case, &case_nr);
-      if (retval != TRNS_CONTINUE)
-        continue;
-
+                                   c, &case_nr);
+      caseinit_update_left_vars (ds->caseinit, c);
+      if (retval != TRNS_CONTINUE) 
+        {
+          case_destroy (c);
+          continue; 
+        }
+  
       /* Write case to collection of lagged cases. */
       if (ds->n_lag > 0) 
         {
           while (deque_count (&ds->lag) >= ds->n_lag)
             case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
-          case_clone (&ds->lag_cases[deque_push_front (&ds->lag)],
-                      &ds->trns_case);
+          case_clone (&ds->lag_cases[deque_push_front (&ds->lag)], c);
         }
 
       /* Write case to replacement active file. */
       ds->cases_written++;
-      if (ds->proc_sink->class->write != NULL)
+      if (ds->sink != NULL) 
         {
-          if (ds->compactor != NULL)
+          struct ccase tmp;
+          if (ds->compactor != NULL) 
             {
-              dict_compactor_compact (ds->compactor, &ds->sink_case,
-                                      &ds->trns_case);
-              ds->proc_sink->class->write (ds->proc_sink, &ds->sink_case);
+              case_create (&tmp, dict_get_compacted_value_cnt (ds->dict));
+              dict_compactor_compact (ds->compactor, &tmp, c);
             }
           else
-            ds->proc_sink->class->write (ds->proc_sink, &ds->trns_case);
+            case_clone (&tmp, c);
+          casewriter_write (ds->sink, &tmp);
         }
 
       /* Execute temporary transformations. */
       if (ds->temporary_trns_chain != NULL)
         {
           retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE,
-                                       &ds->trns_case, &ds->cases_written);
+                                       c, &ds->cases_written);
           if (retval != TRNS_CONTINUE)
-            continue;
+            {
+              case_destroy (c);
+              continue;
+            }
         }
 
-      *c = &ds->trns_case;
       return true;
     }
 }
@@ -335,120 +285,35 @@
    while reading or closing the data set.
    If DS has not been opened, returns true without doing
    anything else. */
-bool
-proc_close (struct dataset *ds)
-{
-  if (!ds->is_open)
-    return true;
-
-  /* Drain any remaining cases. */
-  while (ds->ok)
-    {
-      struct ccase *c;
-      if (!proc_read (ds, &c))
-        break;
-    }
-  ds->ok = free_case_source (ds->proc_source) && ds->ok;
-  proc_set_source (ds, NULL);
-
-  case_destroy (&ds->sink_case);
-  case_destroy (&ds->trns_case);
-
-  ds->ok = close_active_file (ds) && ds->ok;
-  ds->is_open = false;
-
-  return ds->ok;
-}
-
-/* Updates last_proc_invocation. */
 static void
-update_last_proc_invocation (struct dataset *ds)
+proc_casereader_destroy (struct casereader *reader, void *ds_)
 {
-  ds->last_proc_invocation = time (NULL);
-}
+  struct dataset *ds = ds_;
+  struct ccase c;
 
-/* Creates and returns a case, initializing it from the vectors
-   that say which `value's need to be initialized just once, and
-   which ones need to be re-initialized before every case. */
-static void
-create_trns_case (struct ccase *trns_case, struct dictionary *dict)
-{
-  size_t var_cnt = dict_get_var_cnt (dict);
-  size_t i;
+  /* Make sure transformations happen for every input case, in
+     case they have side effects, and ensure that the replacement
+     active file gets all the cases it should. */
+  while (casereader_read (reader, &c))
+    case_destroy (&c);
 
-  case_create (trns_case, dict_get_next_value_idx (dict));
-  for (i = 0; i < var_cnt; i++)
-    {
-      struct variable *v = dict_get_var (dict, i);
-      union value *value = case_data_rw (trns_case, v);
-
-      if (var_is_numeric (v))
-        value->f = var_get_leave (v) ? 0.0 : SYSMIS;
-      else
-        memset (value->s, ' ', var_get_width (v));
-    }
-}
-
-/* Makes all preparations for reading from the data source and writing
-   to the data sink. */
-static void
-open_active_file (struct dataset *ds)
-{
-  add_case_limit_trns (ds);
-  add_filter_trns (ds);
-
-  /* Finalize transformations. */
-  trns_chain_finalize (ds->cur_trns_chain);
-
-  /* Make permanent_dict refer to the dictionary right before
-     data reaches the sink. */
-  if (ds->permanent_dict == NULL)
-    ds->permanent_dict = ds->dict;
-
-  /* Figure out whether to compact. */
-  ds->compactor =
-    (dict_compacting_would_shrink (ds->permanent_dict)
-     ? dict_make_compactor (ds->permanent_dict)
-     : NULL);
-
-  /* Prepare sink. */
-  if (ds->proc_sink == NULL)
-    ds->proc_sink = create_case_sink (&storage_sink_class,
-                                     ds->permanent_dict,
-                                     ds->cf_factory,
-                                     NULL);
-  if (ds->proc_sink->class->open != NULL)
-    ds->proc_sink->class->open (ds->proc_sink);
-
-  /* Allocate memory for lagged cases. */
-  ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
+  ds->proc_state = PROC_CLOSED;
+  ds->ok = casereader_destroy (ds->source) && ds->ok;
+  ds->source = NULL;
+  proc_set_active_file_data (ds, NULL);
 }
 
-/* Clears the variables in C that need to be cleared between
-   processing cases.  */
-static void
-clear_case (const struct dataset *ds, struct ccase *c)
+/* Must return false if the source casereader, a transformation,
+   or the sink casewriter signaled an error.  (If a temporary
+   transformation signals an error, then the return value is
+   false, but the replacement active file may still be
+   untainted.) */
+bool
+proc_commit (struct dataset *ds) 
 {
-  size_t var_cnt = dict_get_var_cnt (ds->dict);
-  size_t i;
+  assert (ds->proc_state == PROC_CLOSED);
+  ds->proc_state = PROC_COMMITTED;
 
-  for (i = 0; i < var_cnt; i++)
-    {
-      struct variable *v = dict_get_var (ds->dict, i);
-      if (!var_get_leave (v))
-        {
-          if (var_is_numeric (v))
-            case_data_rw (c, v)->f = SYSMIS;
-          else
-            memset (case_data_rw (c, v)->s, ' ', var_get_width (v));
-        }
-    }
-}
-
-/* Closes the active file. */
-static bool
-close_active_file (struct dataset *ds)
-{
   /* Free memory for lagged cases. */
   while (!deque_is_empty (&ds->lag))
     case_destroy (&ds->lag_cases[deque_pop_back (&ds->lag)]);
@@ -457,23 +322,49 @@
   /* Dictionary from before TEMPORARY becomes permanent. */
   proc_cancel_temporary_transformations (ds);
 
-  /* Finish compacting. */
-  if (ds->compactor != NULL)
+  if (!ds->discard_output) 
     {
-      dict_compactor_destroy (ds->compactor);
-      dict_compact_values (ds->dict);
-      ds->compactor = NULL;
+      /* Finish compacting. */
+      if (ds->compactor != NULL) 
+        {
+          dict_compactor_destroy (ds->compactor);
+          dict_compact_values (ds->dict);
+          ds->compactor = NULL;
+        }
+    
+      /* Old data sink becomes new data source. */
+      if (ds->sink != NULL) 
+        ds->source = casewriter_make_reader (ds->sink);
     }
+  else 
+    {
+      ds->source = NULL;
+      ds->discard_output = false; 
+    }
+  ds->sink = NULL;
+  if ( ds->replace_source) ds->replace_source (ds->source);
 
-  /* Old data sink becomes new data source. */
-  if (ds->proc_sink->class->make_source != NULL)
-    proc_set_source (ds, ds->proc_sink->class->make_source (ds->proc_sink) );
-  free_case_sink (ds->proc_sink);
-  ds->proc_sink = NULL;
+  caseinit_clear (ds->caseinit);
+  caseinit_mark_as_preinited (ds->caseinit, ds->dict);
 
   dict_clear_vectors (ds->dict);
   ds->permanent_dict = NULL;
-  return proc_cancel_all_transformations (ds);
+  return proc_cancel_all_transformations (ds) && ds->ok;
+}
+
+static struct casereader_class proc_casereader_class = 
+  {
+    proc_casereader_read,
+    proc_casereader_destroy,
+    NULL,
+    NULL,
+  };
+
+/* Updates last_proc_invocation. */
+static void
+update_last_proc_invocation (struct dataset *ds)
+{
+  ds->last_proc_invocation = time (NULL);
 }
 
 /* Returns a pointer to the lagged case from N_BEFORE cases before the
@@ -490,218 +379,6 @@
     return NULL;
 }
 
-/* Procedure that separates the data into SPLIT FILE groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE. */
-struct split_aux_data
-  {
-    struct dataset *dataset;    /* The dataset */
-    struct ccase prev_case;     /* Data in previous case. */
-
-    /* Callback functions. */
-    begin_func *begin;
-    case_func *proc;
-    end_func *end;
-    void *func_aux;
-  };
-
-static int equal_splits (const struct ccase *, const struct ccase *, const 
struct dataset *ds);
-static bool split_procedure_case_func (const struct ccase *c, void *, const 
struct dataset *);
-static bool split_procedure_end_func (void *, const struct dataset *);
-
-/* Like procedure(), but it automatically breaks the case stream
-   into SPLIT FILE break groups.  Before each group of cases with
-   identical SPLIT FILE variable values, BEGIN_FUNC is called
-   with the first case in the group.
-   Then PROC_FUNC is called for each case in the group (including
-   the first).
-   END_FUNC is called when the group is finished.  FUNC_AUX is
-   passed to each of the functions as auxiliary data.
-
-   If the active file is empty, none of BEGIN_FUNC, PROC_FUNC,
-   and END_FUNC will be called at all.
-
-   If SPLIT FILE is not in effect, then there is one break group
-   (if the active file is nonempty), and BEGIN_FUNC and END_FUNC
-   will be called once.
-
-   Returns true if successful, false if an I/O error occurred. */
-bool
-procedure_with_splits (struct dataset *ds,
-                      begin_func begin,
-                      case_func *proc,
-                       end_func *end,
-                       void *func_aux)
-{
-  struct split_aux_data split_aux;
-  bool ok;
-
-  case_nullify (&split_aux.prev_case);
-  split_aux.begin = begin;
-  split_aux.proc = proc;
-  split_aux.end = end;
-  split_aux.func_aux = func_aux;
-  split_aux.dataset = ds;
-
-  ok = internal_procedure (ds, split_procedure_case_func,
-                           split_procedure_end_func, &split_aux);
-
-  case_destroy (&split_aux.prev_case);
-
-  return ok;
-}
-
-/* Case callback used by procedure_with_splits(). */
-static bool
-split_procedure_case_func (const struct ccase *c, void *split_aux_, const 
struct dataset *ds)
-{
-  struct split_aux_data *split_aux = split_aux_;
-
-  /* Start a new series if needed. */
-  if (case_is_null (&split_aux->prev_case)
-      || !equal_splits (c, &split_aux->prev_case, split_aux->dataset))
-    {
-      if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
-        split_aux->end (split_aux->func_aux, ds);
-
-      case_destroy (&split_aux->prev_case);
-      case_clone (&split_aux->prev_case, c);
-
-      if (split_aux->begin != NULL)
-       split_aux->begin (&split_aux->prev_case, split_aux->func_aux, ds);
-    }
-
-  return (split_aux->proc == NULL
-          || split_aux->proc (c, split_aux->func_aux, ds));
-}
-
-/* End-of-file callback used by procedure_with_splits(). */
-static bool
-split_procedure_end_func (void *split_aux_, const struct dataset *ds)
-{
-  struct split_aux_data *split_aux = split_aux_;
-
-  if (!case_is_null (&split_aux->prev_case) && split_aux->end != NULL)
-    split_aux->end (split_aux->func_aux, ds);
-  return true;
-}
-
-/* Compares the SPLIT FILE variables in cases A and B and returns
-   nonzero only if they differ. */
-static int
-equal_splits (const struct ccase *a, const struct ccase *b,
-             const struct dataset *ds)
-{
-  return case_compare (a, b,
-                       dict_get_split_vars (ds->dict),
-                       dict_get_split_cnt (ds->dict)) == 0;
-}
-
-/* Multipass procedure that separates the data into SPLIT FILE
-   groups. */
-
-/* Represents auxiliary data for handling SPLIT FILE in a
-   multipass procedure. */
-struct multipass_split_aux_data
-  {
-    struct dataset *dataset;    /* The dataset of the split */
-    struct ccase prev_case;     /* Data in previous case. */
-    struct casefile *casefile;  /* Accumulates data for a split. */
-    split_func *split;          /* Function to call with the accumulated
-                                  data. */
-    void *func_aux;             /* Auxiliary data. */
-  };
-
-static bool multipass_split_case_func (const struct ccase *c, void *aux_, 
const struct dataset *);
-static bool multipass_split_end_func (void *aux_, const struct dataset *ds);
-static bool multipass_split_output (struct multipass_split_aux_data *, const 
struct dataset *ds);
-
-/* Returns true if successful, false if an I/O error occurred. */
-bool
-multipass_procedure_with_splits (struct dataset *ds,
-                                split_func  *split,
-                                 void *func_aux)
-{
-  struct multipass_split_aux_data aux;
-  bool ok;
-
-  case_nullify (&aux.prev_case);
-  aux.casefile = NULL;
-  aux.split = split;
-  aux.func_aux = func_aux;
-  aux.dataset = ds;
-
-  ok = internal_procedure (ds, multipass_split_case_func,
-                           multipass_split_end_func, &aux);
-  case_destroy (&aux.prev_case);
-
-  return ok;
-}
-
-/* Case callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_case_func (const struct ccase *c, void *aux_, const struct 
dataset *ds)
-{
-  struct multipass_split_aux_data *aux = aux_;
-  bool ok = true;
-
-  /* Start a new series if needed. */
-  if (aux->casefile == NULL || ! equal_splits (c, &aux->prev_case, ds))
-    {
-      /* Record split values. */
-      case_destroy (&aux->prev_case);
-      case_clone (&aux->prev_case, c);
-
-      /* Pass any cases to split_func. */
-      if (aux->casefile != NULL)
-        ok = multipass_split_output (aux, ds);
-
-      /* Start a new casefile. */
-      aux->casefile =
-       ds->cf_factory->create_casefile (ds->cf_factory,
-                                        dict_get_next_value_idx (ds->dict));
-    }
-
-  return casefile_append (aux->casefile, c) && ok;
-}
-
-/* End-of-file callback used by multipass_procedure_with_splits(). */
-static bool
-multipass_split_end_func (void *aux_, const struct dataset *ds)
-{
-  struct multipass_split_aux_data *aux = aux_;
-  return (aux->casefile == NULL || multipass_split_output (aux, ds));
-}
-
-static bool
-multipass_split_output (struct multipass_split_aux_data *aux, const struct 
dataset *ds)
-{
-  bool ok;
-
-  assert (aux->casefile != NULL);
-  ok = aux->split (&aux->prev_case, aux->casefile, aux->func_aux, ds);
-  casefile_destroy (aux->casefile);
-  aux->casefile = NULL;
-
-  return ok;
-}
-
-/* Discards all the current state in preparation for a data-input
-   command like DATA LIST or GET. */
-void
-discard_variables (struct dataset *ds)
-{
-  dict_clear (ds->dict);
-  fh_set_default_handle (NULL);
-
-  ds->n_lag = 0;
-
-  free_case_source (ds->proc_source);
-  proc_set_source (ds, NULL);
-
-  proc_cancel_all_transformations (ds);
-}
-
 /* Returns the current set of permanent transformations,
    and clears the permanent transformations.
    For use by INPUT PROGRAM. */
@@ -804,8 +481,10 @@
 {
   if (proc_in_temporary_transformations (ds))
     {
-      dataset_set_dict (ds, ds->permanent_dict);
+      dict_destroy (ds->dict);
+      ds->dict = ds->permanent_dict;
       ds->permanent_dict = NULL;
+      if (ds->replace_dict) ds->replace_dict (ds->dict);
 
       trns_chain_destroy (ds->temporary_trns_chain);
       ds->temporary_trns_chain = NULL;
@@ -822,6 +501,7 @@
 proc_cancel_all_transformations (struct dataset *ds)
 {
   bool ok;
+  assert (ds->proc_state == PROC_COMMITTED);
   ok = trns_chain_destroy (ds->permanent_trns_chain);
   ok = trns_chain_destroy (ds->temporary_trns_chain) && ok;
   ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create ();
@@ -831,14 +511,12 @@
 
 /* Initializes procedure handling. */
 struct dataset *
-create_dataset (struct casefile_factory *fact,
-               replace_source_callback *rps,
-               replace_dictionary_callback *rds
-               )
+create_dataset (replace_source_callback *rps,
+               replace_dictionary_callback *rds)
 {
   struct dataset *ds = xzalloc (sizeof(*ds));
   ds->dict = dict_create ();
-  ds->cf_factory = fact;
+  ds->caseinit = caseinit_create ();
   ds->replace_source = rps;
   ds->replace_dict = rds;
   proc_cancel_all_transformations (ds);
@@ -849,60 +527,103 @@
 void
 destroy_dataset (struct dataset *ds)
 {
-  discard_variables (ds);
+  proc_discard_active_file (ds);
   dict_destroy (ds->dict);
+  caseinit_destroy (ds->caseinit);
   trns_chain_destroy (ds->permanent_trns_chain);
   free (ds);
 }
 
-/* Sets SINK as the destination for procedure output from the
-   next procedure. */
+/* Causes output from the next procedure to be discarded, instead
+   of being preserved for use as input for the next procedure. */
 void
-proc_set_sink (struct dataset *ds, struct case_sink *sink)
+proc_discard_output (struct dataset *ds) 
 {
-  assert (ds->proc_sink == NULL);
-  ds->proc_sink = sink;
+  ds->discard_output = true;
+}
+
+/* Discards the active file dictionary, data, and
+   transformations. */
+void
+proc_discard_active_file (struct dataset *ds)
+{
+  assert (ds->proc_state == PROC_COMMITTED);
+
+  dict_clear (ds->dict);
+  fh_set_default_handle (NULL);
+
+  ds->n_lag = 0;
+  
+  casereader_destroy (ds->source);
+  ds->source = NULL;
+  if ( ds->replace_source) ds->replace_source (NULL);
+
+  proc_cancel_all_transformations (ds);
 }
 
 /* Sets SOURCE as the source for procedure input for the next
    procedure. */
 void
-proc_set_source (struct dataset *ds, struct case_source *source)
+proc_set_active_file (struct dataset *ds,
+                      struct casereader *source,
+                      struct dictionary *dict) 
 {
-  ds->proc_source = source;
+  assert (ds->proc_state == PROC_COMMITTED);
+  assert (ds->dict != dict);
 
-  if ( ds->replace_source )
-    ds->replace_source (ds->proc_source);
+  proc_discard_active_file (ds);
+
+  dict_destroy (ds->dict);
+  ds->dict = dict;
+  if ( ds->replace_dict) ds->replace_dict (dict);
+
+  proc_set_active_file_data (ds, source);
 }
 
-/* Returns true if a source for the next procedure has been
-   configured, false otherwise. */
+/* Replaces the active file's data by READER without replacing
+   the associated dictionary. */
 bool
-proc_has_source (const struct dataset *ds)
+proc_set_active_file_data (struct dataset *ds, struct casereader *reader) 
 {
-  return ds->proc_source != NULL;
-}
+  casereader_destroy (ds->source);
+  ds->source = reader;
+  if (ds->replace_source) ds->replace_source (reader);
 
-/* Returns the output from the previous procedure.
-   For use only immediately after executing a procedure.
-   The returned casefile is owned by the caller; it will not be
-   automatically used for the next procedure's input. */
-struct casefile *
-proc_capture_output (struct dataset *ds)
-{
-  struct casefile *casefile;
+  caseinit_clear (ds->caseinit);
+  caseinit_mark_as_preinited (ds->caseinit, ds->dict);
 
-  /* Try to make sure that this function is called immediately
-     after procedure() or a similar function. */
-  assert (ds->proc_source != NULL);
-  assert (case_source_is_class (ds->proc_source, &storage_source_class));
-  assert (trns_chain_is_empty (ds->permanent_trns_chain));
-  assert (!proc_in_temporary_transformations (ds));
+  return reader == NULL || !casereader_error (reader);
+}
 
-  casefile = storage_source_decapsulate (ds->proc_source);
-  proc_set_source (ds, NULL);
+/* Returns true if an active file data source is available, false
+   otherwise. */
+bool
+proc_has_active_file (const struct dataset *ds) 
+{
+  return ds->source != NULL;
+}
 
-  return casefile;
+/* Checks whether DS has a corrupted active file.  If so,
+   discards it and returns false.  If not, returns true without
+   doing anything. */
+bool
+dataset_end_of_command (struct dataset *ds) 
+{
+  if (ds->source != NULL) 
+    {
+      if (casereader_error (ds->source)) 
+        {
+          proc_discard_active_file (ds);
+          return false;
+        }
+      else 
+        {
+          const struct taint *taint = casereader_get_taint (ds->source);
+          taint_reset_successor_taint ((struct taint *) taint);
+          assert (!taint_has_tainted_successor (taint));
+        }
+    }
+  return true; 
 }
 
 static trns_proc_func case_limit_trns_proc;
@@ -983,32 +704,8 @@
   return ds->dict;
 }
 
-
-/* Set or replace dataset DS's dictionary with DICT.
-   The old dictionary is destroyed */
-void
-dataset_set_dict (struct dataset *ds, struct dictionary *dict)
-{
-  struct dictionary *old_dict = ds->dict;
-
-  dict_copy_callbacks (dict, ds->dict);
-  ds->dict = dict;
-
-  if ( ds->replace_dict )
-    ds->replace_dict (dict);
-
-  dict_destroy (old_dict);
-}
-
 void 
 dataset_need_lag (struct dataset *ds, int n_before)
 {
   ds->n_lag = MAX (ds->n_lag, n_before);
 }
-
-struct casefile_factory *
-dataset_get_casefile_factory (const struct dataset *ds)
-{
-  return ds->cf_factory;
-}
-
Index: merge/src/data/procedure.h
===================================================================
--- merge.orig/src/data/procedure.h     2007-06-05 09:16:10.000000000 -0700
+++ merge/src/data/procedure.h  2007-06-05 09:18:06.000000000 -0700
@@ -23,16 +23,11 @@
 #include <stdbool.h>
 
 #include <data/transformations.h>
-#include <data/casefile-factory.h>
 #include <libpspp/compiler.h>
 
-struct ccase;
-struct casefile;
-struct case_sink;
-struct case_source;
-
+struct casereader;
 struct dataset;
-
+struct dictionary;
 
 /* Transformations. */
 
@@ -44,10 +39,6 @@
                                         trns_free_func *, void *);
 size_t next_transformation (const struct dataset *ds);
 
-void discard_variables (struct dataset *ds);
-
-
-
 bool proc_cancel_all_transformations (struct dataset *ds);
 struct trns_chain *proc_capture_transformations (struct dataset *ds);
 
@@ -59,63 +50,35 @@
 /* Procedures. */
 
 struct dictionary ;
-typedef void  replace_source_callback (struct case_source *);
+typedef void  replace_source_callback (struct casereader *);
 typedef void  replace_dictionary_callback (struct dictionary *);
 
 
-struct dataset * create_dataset (struct casefile_factory *fact,
-                                replace_source_callback *,
-                                replace_dictionary_callback *
-                                );
+struct dataset * create_dataset (replace_source_callback *,
+                                replace_dictionary_callback *);
 
 void destroy_dataset (struct dataset *);
 
-struct casefile_factory *dataset_get_casefile_factory (const struct dataset *);
-
-void proc_set_source (struct dataset *ds, struct case_source *);
-bool proc_has_source (const struct dataset *ds);
-
-void proc_set_sink (struct dataset *ds, struct case_sink *);
-struct casefile *proc_capture_output (struct dataset *ds);
-
-typedef bool casefile_func (const struct casefile *, void *);
-typedef bool case_func (const struct ccase *, void *, const struct dataset *);
-typedef void begin_func (const struct ccase *, void *, const struct dataset*);
+void proc_discard_active_file (struct dataset *);
+void proc_set_active_file (struct dataset *,
+                           struct casereader *, struct dictionary *);
+bool proc_set_active_file_data (struct dataset *, struct casereader *);
+bool proc_has_active_file (const struct dataset *ds);
 
-typedef bool end_func (void *, const struct dataset *);
-
-typedef bool split_func (const struct ccase *, const struct casefile *,
-                             void *, const struct dataset *);
-
-
-
-bool procedure (struct dataset *ds, case_func *, void *aux)  
WARN_UNUSED_RESULT;
-
-bool procedure_with_splits (struct dataset *ds, 
-                           begin_func *,
-                            case_func *,
-                           end_func *,
-                            void *aux)
-     WARN_UNUSED_RESULT;
-bool multipass_procedure (struct dataset *ds, casefile_func *, void  *aux)
-     WARN_UNUSED_RESULT;
-bool multipass_procedure_with_splits (struct dataset *ds,
-                                          split_func *,
-                                          void *aux)
-     WARN_UNUSED_RESULT;
+void proc_discard_output (struct dataset *ds);
 
+bool proc_execute (struct dataset *ds);
 time_t time_of_last_procedure (struct dataset *ds);
 
-void proc_open (struct dataset *);
-bool proc_read (struct dataset *, struct ccase **);
-bool proc_close (struct dataset *);
+struct casereader *proc_open (struct dataset *);
+bool proc_is_open (const struct dataset *);
+bool proc_commit (struct dataset *);
+
+bool dataset_end_of_command (struct dataset *);
 
+struct dictionary *dataset_dict (const struct dataset *ds);
 
 struct ccase *lagged_case (const struct dataset *ds, int n_before);
-
-inline struct dictionary *dataset_dict (const struct dataset *ds);
-inline void dataset_set_dict ( struct dataset *ds, struct dictionary *dict);
-
 void dataset_need_lag (struct dataset *ds, int n_before);
 
 #endif /* procedure.h */
Index: merge/src/data/scratch-handle.c
===================================================================
--- merge.orig/src/data/scratch-handle.c        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-handle.c     2007-06-05 09:18:06.000000000 -0700
@@ -18,9 +18,9 @@
 
 #include <config.h>
 #include <stdlib.h>
-#include "scratch-handle.h"
-#include "casefile.h"
-#include "dictionary.h"
+#include <data/casereader.h>
+#include <data/scratch-handle.h>
+#include <data/dictionary.h>
 
 /* Destroys HANDLE. */
 void
@@ -29,7 +29,7 @@
   if (handle != NULL) 
     {
       dict_destroy (handle->dictionary);
-      casefile_destroy (handle->casefile);
+      casereader_destroy (handle->casereader);
       free (handle);
     }
 }
Index: merge/src/data/scratch-handle.h
===================================================================
--- merge.orig/src/data/scratch-handle.h        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-handle.h     2007-06-05 09:18:06.000000000 -0700
@@ -25,7 +25,7 @@
 struct scratch_handle 
   {
     struct dictionary *dictionary;      /* Dictionary. */
-    struct casefile *casefile;          /* Cases. */
+    struct casereader *casereader;      /* Cases. */
   };
 
 void scratch_handle_destroy (struct scratch_handle *);
Index: merge/src/data/scratch-reader.c
===================================================================
--- merge.orig/src/data/scratch-reader.c        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-reader.c     2007-06-05 09:18:06.000000000 -0700
@@ -22,11 +22,11 @@
 
 #include <stdlib.h>
 
-#include "casefile.h"
 #include "dictionary.h"
 #include "file-handle-def.h"
 #include "scratch-handle.h"
 #include <data/case.h>
+#include <data/casereader.h>
 #include <libpspp/message.h>
 
 #include "xalloc.h"
@@ -34,31 +34,20 @@
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-/* A reader for a scratch file. */
-struct scratch_reader 
-  {
-    struct file_handle *fh;             /* Underlying file handle. */
-    struct casereader *casereader;      /* Case reader. */
-  };
-
 /* Opens FH, which must have referent type FH_REF_SCRATCH, and
    returns a scratch_reader for it, or a null pointer on
    failure.  Stores the dictionary for the scratch file into
-   *DICT.
-
-   If you use an any_reader instead, then your code can be more
-   flexible without being any harder to write. */
-struct scratch_reader *
+   *DICT. */
+struct casereader *
 scratch_reader_open (struct file_handle *fh, struct dictionary **dict)
 {
   struct scratch_handle *sh;
-  struct scratch_reader *reader;
   
   if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "rs"))
     return NULL;
   
   sh = fh_get_scratch_handle (fh);
-  if (sh == NULL) 
+  if (sh == NULL || sh->casereader == NULL) 
     {
       msg (SE, _("Scratch file handle %s has not yet been written, "
                  "using SAVE or another procedure, so it cannot yet "
@@ -68,42 +57,5 @@
     }
 
   *dict = dict_clone (sh->dictionary);
-  reader = xmalloc (sizeof *reader);
-  reader->fh = fh;
-  reader->casereader = casefile_get_reader (sh->casefile, NULL);
-  return reader;
-}
-
-/* Reads a case from READER and copies it into C.
-   Returns true if successful, false on error or at end of file. */
-bool
-scratch_reader_read_case (struct scratch_reader *reader, struct ccase *c)
-{
-  struct ccase tmp;
-  if (casereader_read (reader->casereader, &tmp)) 
-    {
-      case_copy (c, 0, &tmp, 0,
-                 casefile_get_value_cnt (
-                   casereader_get_casefile (reader->casereader)));
-      case_destroy (&tmp);
-      return true;
-    }
-  else
-    return false;
-}
-
-/* Returns true if an I/O error occurred on READER, false otherwise. */
-bool
-scratch_reader_error (const struct scratch_reader *reader) 
-{
-  return casefile_error (casereader_get_casefile (reader->casereader));
-}
-
-/* Closes READER. */
-void
-scratch_reader_close (struct scratch_reader *reader) 
-{
-  fh_close (reader->fh, "scratch file", "rs");
-  casereader_destroy (reader->casereader);
-  free (reader);
+  return casereader_clone (sh->casereader);
 }
Index: merge/src/data/scratch-reader.h
===================================================================
--- merge.orig/src/data/scratch-reader.h        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-reader.h     2007-06-05 09:18:06.000000000 -0700
@@ -24,10 +24,7 @@
 struct dictionary;
 struct file_handle;
 struct ccase;
-struct scratch_reader *scratch_reader_open (struct file_handle *,
-                                            struct dictionary **);
-bool scratch_reader_read_case (struct scratch_reader *, struct ccase *);
-bool scratch_reader_error (const struct scratch_reader *);
-void scratch_reader_close (struct scratch_reader *);
+struct casereader *scratch_reader_open (struct file_handle *,
+                                        struct dictionary **);
 
 #endif /* scratch-reader.h */
Index: merge/src/data/scratch-writer.c
===================================================================
--- merge.orig/src/data/scratch-writer.c        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-writer.c     2007-06-05 09:18:06.000000000 -0700
@@ -17,14 +17,21 @@
    02110-1301, USA. */
 
 #include <config.h>
+
 #include "scratch-writer.h"
+
 #include <stdlib.h>
-#include "case.h"
-#include "casefile.h"
-#include "fastfile.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "scratch-handle.h"
+
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/scratch-handle.h>
+#include <libpspp/compiler.h>
+#include <libpspp/taint.h>
+
 #include "xalloc.h"
 
 /* A scratch file writer. */
@@ -33,16 +40,16 @@
     struct scratch_handle *handle;      /* Underlying scratch handle. */
     struct file_handle *fh;             /* Underlying file handle. */
     struct dict_compactor *compactor;   /* Compacts into handle->dictionary. */
+    struct casewriter *subwriter;       /* Data output. */
   };
 
+static struct casewriter_class scratch_writer_casewriter_class;
+
 /* Opens FH, which must have referent type FH_REF_SCRATCH, and
    returns a scratch_writer for it, or a null pointer on
    failure.  Cases stored in the scratch_writer will be expected
-   to be drawn from DICTIONARY.
-
-   If you use an any_writer instead, then your code can be more
-   flexible without being any harder to write. */
-struct scratch_writer *
+   to be drawn from DICTIONARY. */
+struct casewriter *
 scratch_writer_open (struct file_handle *fh,
                      const struct dictionary *dictionary) 
 {
@@ -50,6 +57,7 @@
   struct scratch_writer *writer;
   struct dictionary *scratch_dict;
   struct dict_compactor *compactor;
+  struct casewriter *casewriter;
 
   if (!fh_open (fh, FH_REF_SCRATCH, "scratch file", "we"))
     return NULL;
@@ -72,50 +80,57 @@
   /* Create new contents. */
   sh = xmalloc (sizeof *sh);
   sh->dictionary = scratch_dict;
-  sh->casefile = fastfile_create (dict_get_next_value_idx (sh->dictionary));
+  sh->casereader = NULL;
 
   /* Create writer. */
   writer = xmalloc (sizeof *writer);
   writer->handle = sh;
   writer->fh = fh;
   writer->compactor = compactor;
+  writer->subwriter = autopaging_writer_create (dict_get_next_value_idx (
+                                               scratch_dict));
 
   fh_set_scratch_handle (fh, sh);
-  return writer;
+  casewriter = casewriter_create (&scratch_writer_casewriter_class, writer);
+  taint_propagate (casewriter_get_taint (writer->subwriter),
+                   casewriter_get_taint (casewriter));
+  return casewriter;
 }
 
 /* Writes case C to WRITER. */
-bool
-scratch_writer_write_case (struct scratch_writer *writer,
-                           const struct ccase *c) 
+static void
+scratch_writer_casewriter_write (struct casewriter *w UNUSED, void *writer_,
+                                 struct ccase *c) 
 {
+  struct scratch_writer *writer = writer_;
   struct scratch_handle *handle = writer->handle;
+  struct ccase tmp;
   if (writer->compactor) 
     {
-      struct ccase tmp_case;
-      case_create (&tmp_case, dict_get_next_value_idx (handle->dictionary));
-      dict_compactor_compact (writer->compactor, &tmp_case, c);
-      return casefile_append_xfer (handle->casefile, &tmp_case);
+      case_create (&tmp, dict_get_next_value_idx (handle->dictionary));
+      dict_compactor_compact (writer->compactor, &tmp, c);
+      case_destroy (c);
     }
-  else 
-    return casefile_append (handle->casefile, c);
-}
-
-/* Returns true if an I/O error occurred on WRITER, false otherwise. */
-bool
-scratch_writer_error (const struct scratch_writer *writer) 
-{
-  return casefile_error (writer->handle->casefile);
+  else
+    case_move (&tmp, c);
+  casewriter_write (writer->subwriter, &tmp);
 }
 
-/* Closes WRITER.
-   Returns true if successful, false if an I/O error occurred on WRITER. */
-bool
-scratch_writer_close (struct scratch_writer *writer) 
+/* Closes WRITER. */
+static void
+scratch_writer_casewriter_destroy (struct casewriter *w UNUSED, void *writer_) 
 {
-  struct casefile *cf = writer->handle->casefile;
-  bool ok = casefile_error (cf);
+  struct scratch_writer *writer = writer_;
+  struct casereader *reader = casewriter_make_reader (writer->subwriter);
+  if (!casereader_error (reader))
+    writer->handle->casereader = reader;
   fh_close (writer->fh, "scratch file", "we");
   free (writer);
-  return ok;
 }
+
+static struct casewriter_class scratch_writer_casewriter_class = 
+  {
+    scratch_writer_casewriter_write,
+    scratch_writer_casewriter_destroy,
+    NULL,
+  };
Index: merge/src/data/scratch-writer.h
===================================================================
--- merge.orig/src/data/scratch-writer.h        2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/scratch-writer.h     2007-06-05 09:18:06.000000000 -0700
@@ -24,10 +24,7 @@
 struct dictionary;
 struct file_handle;
 struct ccase;
-struct scratch_writer *scratch_writer_open (struct file_handle *,
-                                            const struct dictionary *);
-bool scratch_writer_write_case (struct scratch_writer *, const struct ccase *);
-bool scratch_writer_error (const struct scratch_writer *);
-bool scratch_writer_close (struct scratch_writer *);
+struct casewriter *scratch_writer_open (struct file_handle *,
+                                        const struct dictionary *);
 
 #endif /* scratch-writer.h */
Index: merge/src/data/sys-file-reader.c
===================================================================
--- merge.orig/src/data/sys-file-reader.c       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/sys-file-reader.c    2007-06-05 09:18:06.000000000 -0700
@@ -18,8 +18,8 @@
 
 #include <config.h>
 
-#include "sys-file-reader.h"
-#include "sys-file-private.h"
+#include <data/sys-file-reader.h>
+#include <data/sys-file-private.h>
 
 #include <errno.h>
 #include <float.h>
@@ -38,15 +38,17 @@
 #include <libpspp/hash.h>
 #include <libpspp/array.h>
 
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "file-name.h"
-#include "format.h"
-#include "missing-values.h"
-#include "value-labels.h"
-#include "variable.h"
-#include "value.h"
+#include <data/case.h>
+#include <data/casereader-provider.h>
+#include <data/casereader.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/file-name.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <data/value.h>
 
 #include "c-ctype.h"
 #include "inttostr.h"
@@ -69,11 +71,12 @@
     struct file_handle *fh;     /* File handle. */
     FILE *file;                 /* File stream. */
     bool error;                 /* I/O or corruption error? */
+    size_t value_cnt;           /* Number of "union value"s in struct case. */
 
     /* File format. */
     enum integer_format integer_format; /* On-disk integer format. */
     enum float_format float_format; /* On-disk floating point format. */
-    int value_cnt;             /* Number of 8-byte units per case. */
+    int flt64_cnt;             /* Number of 8-byte units per case. */
     struct sfm_var *vars;       /* Variables. */
     size_t var_cnt;             /* Number of variables. */
     bool has_long_var_names;    /* File has a long variable name map */
@@ -93,6 +96,10 @@
     int case_index;             /* Index into case. */
   };
 
+static struct casereader_class sys_file_casereader_class;
+
+static bool close_reader (struct sfm_reader *);
+
 static struct variable **make_var_by_value_idx (struct sfm_reader *,
                                                 struct dictionary *);
 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
@@ -125,6 +132,8 @@
                                         struct variable_to_value_map *,
                                         struct variable **var, char **value,
                                         int *warning_cnt);
+
+static bool close_reader (struct sfm_reader *r);
 
 /* Dictionary reader. */
 
@@ -135,7 +144,7 @@
   };
 
 static void read_header (struct sfm_reader *, struct dictionary *,
-                         int *weight_idx, int *claimed_value_cnt,
+                         int *weight_idx, int *claimed_flt64_cnt,
                          struct sfm_read_info *);
 static void read_variable_record (struct sfm_reader *, struct dictionary *,
                                   int *format_warning_cnt);
@@ -169,7 +178,7 @@
    reading.  Reads the system file's dictionary into *DICT.
    If INFO is non-null, then it receives additional info about the
    system file. */
-struct sfm_reader *
+struct casereader *
 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
                  struct sfm_read_info *info)
 {
@@ -177,7 +186,7 @@
   struct variable **var_by_value_idx;
   int format_warning_cnt = 0;
   int weight_idx;
-  int claimed_value_cnt;
+  int claimed_flt64_cnt;
   int rec_type;
   size_t i;
 
@@ -191,14 +200,14 @@
   r->fh = fh;
   r->file = fn_open (fh_get_file_name (fh), "rb");
   r->error = false;
-  r->value_cnt = 0;
+  r->flt64_cnt = 0;
   r->has_vls = false;
   r->has_long_var_names = false;
   r->opcode_idx = sizeof r->opcodes;
 
   if (setjmp (r->bail_out)) 
     {
-      sfm_close_reader (r);
+      close_reader (r);
       dict_destroy (*dict);
       *dict = NULL;
       return NULL;
@@ -212,7 +221,7 @@
     }
 
   /* Read header. */
-  read_header (r, *dict, &weight_idx, &claimed_value_cnt, info);
+  read_header (r, *dict, &weight_idx, &claimed_flt64_cnt, info);
 
   /* Read all the variable definition records. */
   rec_type = read_int32 (r);
@@ -280,10 +289,10 @@
   /* Read record 999 data, which is just filler. */
   read_int32 (r);
 
-  if (claimed_value_cnt != -1 && claimed_value_cnt != r->value_cnt)
+  if (claimed_flt64_cnt != -1 && claimed_flt64_cnt != r->flt64_cnt)
     sys_warn (r, _("File header claims %d variable positions but "
                    "%d were read from file."),
-              claimed_value_cnt, r->value_cnt);
+              claimed_flt64_cnt, r->flt64_cnt);
 
   /* Create an index of dictionary variable widths for
      sfm_read_case to use.  We cannot use the `struct variable's
@@ -300,36 +309,48 @@
     }
 
   pool_free (r->pool, var_by_value_idx);
-  return r;
+  r->value_cnt = dict_get_next_value_idx (*dict);
+  return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX,
+                                       &sys_file_casereader_class, r);
 }
 
-/* Closes a system file after we're done with it. */
-void
-sfm_close_reader (struct sfm_reader *r)
+/* Closes a system file after we're done with it.
+   Returns true if an I/O error has occurred on READER, false
+   otherwise. */
+static bool
+close_reader (struct sfm_reader *r)
 {
+  bool error;
+
   if (r == NULL)
-    return;
+    return true;
 
   if (r->file)
     {
-      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
-        msg (ME, _("Error closing system file \"%s\": %s."),
-             fh_get_file_name (r->fh), strerror (errno));
+      if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) 
+        {
+          msg (ME, _("Error closing system file \"%s\": %s."),
+               fh_get_file_name (r->fh), strerror (errno));
+          r->error = true;
+        }
       r->file = NULL;
     }
 
   if (r->fh != NULL)
     fh_close (r->fh, "system file", "rs");
 
+  error = r->error;
   pool_destroy (r->pool);
+
+  return !error;
 }
 
-/* Returns true if an I/O error has occurred on READER, false
-   otherwise. */
-bool
-sfm_read_error (const struct sfm_reader *reader) 
+/* Destroys READER. */
+static void
+sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) 
 {
-  return reader->error;
+  struct sfm_reader *r = r_;
+  close_reader (r);
 }
 
 /* Returns true if FILE is an SPSS system file,
@@ -350,13 +371,13 @@
    Sets DICT's file label to the system file's label.
    Sets *WEIGHT_IDX to 0 if the system file is unweighted,
    or to the value index of the weight variable otherwise.
-   Sets *CLAIMED_VALUE_CNT to the number of values that the file
+   Sets *CLAIMED_FLT64_CNT to the number of values that the file
    claims to have (although it is not always correct).
    If INFO is non-null, initializes *INFO with header
    information. */   
 static void
 read_header (struct sfm_reader *r, struct dictionary *dict,
-             int *weight_idx, int *claimed_value_cnt,
+             int *weight_idx, int *claimed_flt64_cnt,
              struct sfm_read_info *info)
 {
   char rec_type[5];
@@ -385,9 +406,9 @@
           && r->integer_format != INTEGER_LSB_FIRST))
     sys_error (r, _("This is not an SPSS system file."));
 
-  *claimed_value_cnt = read_int32 (r);
-  if (*claimed_value_cnt < 0 || *claimed_value_cnt > INT_MAX / 16)
-    *claimed_value_cnt = -1;
+  *claimed_flt64_cnt = read_int32 (r);
+  if (*claimed_flt64_cnt < 0 || *claimed_flt64_cnt > INT_MAX / 16)
+    *claimed_flt64_cnt = -1;
 
   r->compressed = read_int32 (r) != 0;
 
@@ -564,7 +585,7 @@
   /* Account for values.
      Skip long string continuation records, if any. */
   nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
-  r->value_cnt += nv;
+  r->flt64_cnt += nv;
   if (width > 8)
     {
       int i;
@@ -1110,29 +1131,39 @@
 static bool read_compressed_string (struct sfm_reader *, char *);
 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
 
-/* Reads one case from READER's file into C.  Returns nonzero
-   only if successful. */
-int
-sfm_read_case (struct sfm_reader *r, struct ccase *c)
+/* Reads one case from READER's file into C.  Returns true only
+   if successful. */
+static bool
+sys_file_casereader_read (struct casereader *reader, void *r_,
+                          struct ccase *c)
 {
+  struct sfm_reader *r = r_;
   if (r->error)
-    return 0;
+    return false;
 
-  if (setjmp (r->bail_out))
-    return 0;
+  case_create (c, r->value_cnt);
+  if (setjmp (r->bail_out)) 
+    {
+      casereader_force_error (reader);
+      case_destroy (c);
+      return false; 
+    }
 
   if (!r->compressed && sizeof (double) == 8 && !r->has_vls) 
     {
       /* Fast path.  Read the whole case directly. */
       if (!try_read_bytes (r, case_data_all_rw (c),
-                         sizeof (union value) * r->value_cnt))
-        return 0;
+                           sizeof (union value) * r->flt64_cnt)) 
+        {
+          case_destroy (c);
+          return false; 
+        }
 
       /* Convert floating point numbers to native format if needed. */
       if (r->float_format != FLOAT_NATIVE_DOUBLE) 
         {
           int i;
-          
+
           for (i = 0; i < r->var_cnt; i++) 
             if (r->vars[i].width == 0) 
               {
@@ -1140,7 +1171,7 @@
                 float_convert (r->float_format, d, FLOAT_NATIVE_DOUBLE, d); 
               }
         }
-      return 1;
+      return true;
     }
   else 
     {
@@ -1194,12 +1225,13 @@
                 }
             }
         }
-      return 1; 
+      return true; 
 
     eof:
+      case_destroy (c);
       if (i != 0)
         partial_record (r);
-      return 0;
+      return false;
     }
 }
 
@@ -1386,7 +1418,7 @@
   int i;
 
   var_by_value_idx = pool_nmalloc (r->pool,
-                                   r->value_cnt, sizeof *var_by_value_idx);
+                                   r->flt64_cnt, sizeof *var_by_value_idx);
   for (i = 0; i < dict_get_var_cnt (dict); i++) 
     {
       struct variable *v = dict_get_var (dict, i);
@@ -1397,7 +1429,7 @@
       for (j = 1; j < nv; j++)
         var_by_value_idx[value_idx++] = NULL;
     }
-  assert (value_idx == r->value_cnt);
+  assert (value_idx == r->flt64_cnt);
 
   return var_by_value_idx;
 }
@@ -1411,9 +1443,9 @@
 {
   struct variable *var;
   
-  if (value_idx < 1 || value_idx > r->value_cnt)
+  if (value_idx < 1 || value_idx > r->flt64_cnt)
     sys_error (r, _("Variable index %d not in valid range 1...%d."),
-               value_idx, r->value_cnt);
+               value_idx, r->flt64_cnt);
 
   var = var_by_value_idx[value_idx - 1];
   if (var == NULL)
@@ -1686,4 +1718,11 @@
     float_convert (r->float_format, flt64, FLOAT_NATIVE_DOUBLE, &x);
   return x;
 }
-
+
+static struct casereader_class sys_file_casereader_class = 
+  {
+    sys_file_casereader_read,
+    sys_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
Index: merge/src/data/sys-file-reader.h
===================================================================
--- merge.orig/src/data/sys-file-reader.h       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/sys-file-reader.h    2007-06-05 09:18:06.000000000 -0700
@@ -42,12 +42,9 @@
 struct dictionary;
 struct file_handle;
 struct ccase;
-struct sfm_reader *sfm_open_reader (struct file_handle *,
+struct casereader *sfm_open_reader (struct file_handle *,
                                     struct dictionary **,
                                     struct sfm_read_info *);
-int sfm_read_case (struct sfm_reader *, struct ccase *);
-bool sfm_read_error (const struct sfm_reader *);
-void sfm_close_reader (struct sfm_reader *);
 bool sfm_detect (FILE *);
 
 #endif /* sys-file-reader.h */
Index: merge/src/data/sys-file-writer.c
===================================================================
--- merge.orig/src/data/sys-file-writer.c       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/sys-file-writer.c    2007-06-05 09:18:06.000000000 -0700
@@ -37,14 +37,16 @@
 #include <libpspp/str.h>
 #include <libpspp/version.h>
 
-#include "case.h"
-#include "dictionary.h"
-#include "file-handle-def.h"
-#include "format.h"
-#include "missing-values.h"
-#include "settings.h"
-#include "value-labels.h"
-#include "variable.h"
+#include <data/case.h>
+#include <data/casewriter-provider.h>
+#include <data/casewriter.h>
+#include <data/dictionary.h>
+#include <data/file-handle-def.h>
+#include <data/format.h>
+#include <data/missing-values.h>
+#include <data/settings.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
 
 #include "minmax.h"
 
@@ -144,6 +146,8 @@
     size_t flt64_cnt;           /* Number of flt64 elements. */
   };
 
+static struct casewriter_class sys_file_casewriter_class;
+
 static char *append_string_max (char *, const char *, const char *);
 static void write_header (struct sfm_writer *, const struct dictionary *);
 static void buf_write (struct sfm_writer *, const void *, size_t);
@@ -164,6 +168,9 @@
 
 static void write_documents (struct sfm_writer *, const struct dictionary *);
 
+bool write_error (const struct sfm_writer *);
+bool close_writer (struct sfm_writer *);
+
 static inline int
 var_flt64_cnt (const struct variable *v) 
 {
@@ -219,7 +226,7 @@
    No reference to D is retained, so it may be modified or
    destroyed at will after this function returns.  D is not
    modified by this function, except to assign short names. */
-struct sfm_writer *
+struct casewriter *
 sfm_open_writer (struct file_handle *fh, struct dictionary *d,
                  struct sfm_write_options opts)
 {
@@ -374,13 +381,13 @@
       w->y = (unsigned char *) w->ptr;
     }
 
-  if (sfm_write_error (w))
+  if (write_error (w))
     goto error;
   
-  return w;
+  return casewriter_create (&sys_file_casewriter_class, w);
 
  error:
-  sfm_close_writer (w);
+  close_writer (w);
   return NULL;
 
  open_error:
@@ -925,13 +932,18 @@
 
 static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
 
-/* Writes case C to system file W.
-   Returns 1 if successful, 0 if an I/O error occurred. */
-bool
-sfm_write_case (struct sfm_writer *w, const struct ccase *c)
+/* Writes case C to system file W. */
+static void
+sys_file_casewriter_write (struct casewriter *writer, void *w_,
+                           struct ccase *c)
 {
-  if (ferror (w->file))
-    return 0;
+  struct sfm_writer *w = w_;
+  if (ferror (w->file)) 
+    {
+      casewriter_force_error (writer);
+      case_destroy (c);
+      return; 
+    }
   
   w->case_cnt++;
 
@@ -990,8 +1002,16 @@
 
       local_free (bounce); 
     }
-  
-  return !sfm_write_error (w);
+
+  case_destroy (c);
+}
+
+static void
+sys_file_casewriter_destroy (struct casewriter *writer, void *w_) 
+{
+  struct sfm_writer *w = w_;
+  if (!close_writer (w))
+    casewriter_force_error (writer);
 }
 
 static void
@@ -1057,7 +1077,7 @@
 
 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
 bool
-sfm_write_error (const struct sfm_writer *writer)
+write_error (const struct sfm_writer *writer)
 {
   return ferror (writer->file);
 }
@@ -1065,7 +1085,7 @@
 /* Closes a system file after we're done with it.
    Returns true if successful, false if an I/O error occurred. */
 bool
-sfm_close_writer (struct sfm_writer *w)
+close_writer (struct sfm_writer *w)
 {
   bool ok;
   
@@ -1083,7 +1103,7 @@
         }
       fflush (w->file);
 
-      ok = !sfm_write_error (w);
+      ok = !write_error (w);
 
       /* Seek back to the beginning and update the number of cases.
          This is just a courtesy to later readers, so there's no need
@@ -1112,3 +1132,10 @@
 
   return ok;
 }
+
+static struct casewriter_class sys_file_casewriter_class = 
+  {
+    sys_file_casewriter_write,
+    sys_file_casewriter_destroy,
+    NULL,
+  };
Index: merge/src/data/sys-file-writer.h
===================================================================
--- merge.orig/src/data/sys-file-writer.h       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/data/sys-file-writer.h    2007-06-05 09:18:06.000000000 -0700
@@ -34,12 +34,8 @@
 struct file_handle;
 struct dictionary;
 struct ccase;
-struct sfm_writer *sfm_open_writer (struct file_handle *, struct dictionary *,
+struct casewriter *sfm_open_writer (struct file_handle *, struct dictionary *,
                                     struct sfm_write_options);
 struct sfm_write_options sfm_writer_default_options (void);
 
-bool sfm_write_case (struct sfm_writer *, const struct ccase *);
-bool sfm_write_error (const struct sfm_writer *);
-bool sfm_close_writer (struct sfm_writer *);
-
 #endif /* sys-file-writer.h */
Index: merge/src/language/command.c
===================================================================
--- merge.orig/src/language/command.c   2007-06-05 09:16:10.000000000 -0700
+++ merge/src/language/command.c        2007-06-05 09:18:06.000000000 -0700
@@ -26,6 +26,7 @@
 #include <errno.h>
 #include <unistd.h>
 
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/settings.h>
@@ -147,8 +148,11 @@
   if (cmd_result_is_failure (result))
     lex_discard_rest_of_command (lexer);
 
+  assert (!proc_is_open (ds));
   unset_cmd_algorithm ();
   dict_clear_aux (dataset_dict (ds));
+  if (!dataset_end_of_command (ds))
+    result = CMD_CASCADING_FAILURE;
 
   return result;
 }
@@ -158,7 +162,7 @@
 {
   const struct dictionary *dict = dataset_dict (ds);
   return cmd_parse_in_state (lexer, ds,
-                            proc_has_source (ds) &&
+                            proc_has_active_file (ds) &&
                             dict_get_var_cnt (dict) > 0 ?
                             CMD_STATE_DATA : CMD_STATE_INITIAL);
 }
@@ -203,7 +207,7 @@
     {
       msg (SE, _("%s may be used only in enhanced syntax mode."),
            command->name);
-       return CMD_FAILURE;
+      return CMD_FAILURE;
     }
   else if (!in_correct_state (command, state)) 
     {
@@ -687,7 +691,8 @@
 int
 cmd_execute (struct lexer *lexer, struct dataset *ds)
 {
-  if (!procedure (ds, NULL, NULL))
+  bool ok = casereader_destroy (proc_open (ds));
+  if (!proc_commit (ds) || !ok)
     return CMD_CASCADING_FAILURE;
   return lex_end_of_command (lexer);
 }
@@ -840,7 +845,7 @@
 int
 cmd_new_file (struct lexer *lexer, struct dataset *ds)
 {
-  discard_variables (ds);
+  proc_discard_active_file (ds);
 
   return lex_end_of_command (lexer);
 }
Index: merge/src/language/command.def
===================================================================
--- merge.orig/src/language/command.def 2007-06-05 09:16:10.000000000 -0700
+++ merge/src/language/command.def      2007-06-05 09:18:06.000000000 -0700
@@ -128,7 +128,6 @@
 DEF_CMD (S_INPUT_PROGRAM, 0, "REREAD", cmd_reread)
 
 /* Commands for testing PSPP. */
-DEF_CMD (S_ANY, F_TESTING, "DEBUG CASEFILE", cmd_debug_casefile)
 DEF_CMD (S_ANY, F_TESTING, "DEBUG DATASHEET", cmd_debug_datasheet)
 DEF_CMD (S_ANY, F_TESTING, "DEBUG EVALUATE", cmd_debug_evaluate)
 DEF_CMD (S_ANY, F_TESTING, "DEBUG MOMENTS", cmd_debug_moments)
Index: merge/src/language/control/do-if.c
===================================================================
--- merge.orig/src/language/control/do-if.c     2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/language/control/do-if.c  2007-06-05 09:18:06.000000000 -0700
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 
 #include "control-stack.h"
+#include <data/case.h>
 #include <data/procedure.h>
 #include <data/transformations.h>
 #include <data/value.h>
Index: merge/src/language/data-io/data-list.c
===================================================================
--- merge.orig/src/language/data-io/data-list.c 2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/language/data-io/data-list.c      2007-06-05 09:18:06.000000000 
-0700
@@ -23,10 +23,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <data/case-source.h>
 #include <data/case.h>
-#include <data/case-source.h>
 #include <data/data-in.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
 #include <data/dictionary.h>
 #include <data/format.h>
 #include <data/procedure.h>
@@ -99,9 +99,10 @@
     int record_cnt;             /* Number of records. */
     struct string delims;       /* Field delimiters. */
     int skip_records;           /* Records to skip before first case. */
+    size_t value_cnt;           /* Number of `union value's in case. */
   };
 
-static const struct case_source_class data_list_source_class;
+static const struct casereader_class data_list_casereader_class;
 
 static bool parse_fixed (struct lexer *, struct dictionary *dict, 
                         struct pool *tmp_pool, struct data_list_pgm *);
@@ -118,15 +119,14 @@
 int
 cmd_data_list (struct lexer *lexer, struct dataset *ds)
 {
-  struct dictionary *dict = dataset_dict (ds);
+  struct dictionary *dict;
   struct data_list_pgm *dls;
   int table = -1;                /* Print table if nonzero, -1=undecided. */
   struct file_handle *fh = fh_inline_file ();
   struct pool *tmp_pool;
   bool ok;
 
-  if (!in_input_program ())
-    discard_variables (ds);
+  dict = in_input_program () ? dataset_dict (ds) : dict_create ();
 
   dls = pool_create_container (struct data_list_pgm, pool);
   ll_init (&dls->specs);
@@ -178,9 +178,9 @@
          lex_match (lexer, '=');
          if (!lex_force_id (lexer))
            goto error;
-         dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer));
+         dls->end = dict_lookup_var (dict, lex_tokid (lexer));
          if (!dls->end) 
-            dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid 
(lexer), 0);
+            dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0);
          lex_get (lexer);
        }
       else if (lex_token (lexer) == T_ID)
@@ -273,10 +273,19 @@
   if (dls->reader == NULL)
     goto error;
 
+  dls->value_cnt = dict_get_next_value_idx (dict);
+
   if (in_input_program ())
     add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls);
   else 
-    proc_set_source (ds, create_case_source (&data_list_source_class, dls));
+    {
+      struct casereader *reader;
+      reader = casereader_create_sequential (NULL,
+                                             dict_get_next_value_idx (dict),
+                                             -1, &data_list_casereader_class,
+                                             dls);
+      proc_set_active_file (ds, reader, dict); 
+    }
 
   pool_destroy (tmp_pool);
 
@@ -810,10 +819,12 @@
    Returns true if successful, false at end of file or if an
    I/O error occurred. */
 static bool
-data_list_source_read (struct case_source *source, struct ccase *c)
+data_list_casereader_read (struct casereader *reader UNUSED, void *dls_,
+                           struct ccase *c)
 {
-  struct data_list_pgm *dls = source->aux;
-
+  struct data_list_pgm *dls = dls_;
+  bool ok;
+  
   /* Skip the requested number of records before reading the
      first case. */
   while (dls->skip_records > 0) 
@@ -823,26 +834,28 @@
       dfm_forward_record (dls->reader);
       dls->skip_records--;
     }
-  
-  return read_from_data_list (dls, c);
+
+  case_create (c, dls->value_cnt);
+  ok = read_from_data_list (dls, c);
+  if (!ok)
+    case_destroy (c);
+  return ok;
 }
 
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-data_list_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_)
 {
-  struct data_list_pgm *dls = source->aux;
-  bool ok = !dfm_reader_error (dls->reader);
+  struct data_list_pgm *dls = dls_;
+  if (dfm_reader_error (dls->reader))
+    casereader_force_error (reader);
   data_list_trns_free (dls);
-  return ok;
 }
 
-static const struct case_source_class data_list_source_class = 
+static const struct casereader_class data_list_casereader_class =
   {
-    "DATA LIST",
-    NULL,
-    data_list_source_read,
-    data_list_source_destroy,
+    data_list_casereader_read,
+    data_list_casereader_destroy,
+    NULL,
+    NULL,
   };
Index: merge/src/language/data-io/data-reader.c
===================================================================
--- merge.orig/src/language/data-io/data-reader.c       2007-06-05 
09:16:10.000000000 -0700
+++ merge/src/language/data-io/data-reader.c    2007-06-05 09:18:06.000000000 
-0700
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include <data/casereader.h>
 #include <data/file-handle-def.h>
 #include <data/file-name.h>
 #include <data/procedure.h>
@@ -444,8 +445,8 @@
 
   /* Input procedure reads from inline file. */
   prompt_set_style (PROMPT_DATA);
-  ok = procedure (ds, NULL, NULL);
-
+  casereader_destroy (proc_open (ds));
+  ok = proc_commit (ds);
   dfm_close_reader (r);
 
   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
Index: merge/src/language/data-io/get.c
===================================================================
--- merge.orig/src/language/data-io/get.c       2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/language/data-io/get.c    2007-06-05 09:18:06.000000000 -0700
@@ -22,17 +22,14 @@
 
 #include <data/any-reader.h>
 #include <data/any-writer.h>
-#include <data/case-sink.h>
-#include <data/case-source.h>
 #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
 #include <data/format.h>
 #include <data/dictionary.h>
 #include <data/por-file-writer.h>
 #include <data/procedure.h>
 #include <data/settings.h>
-#include <data/storage-stream.h>
 #include <data/sys-file-writer.h>
 #include <data/transformations.h>
 #include <data/value-labels.h>
@@ -46,9 +43,9 @@
 #include <libpspp/compiler.h>
 #include <libpspp/hash.h>
 #include <libpspp/message.h>
-#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/str.h>
+#include <libpspp/taint.h>
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
@@ -71,25 +68,18 @@
     IMPORT_CMD
   };
 
-/* Case reader input program. */
-struct case_reader_pgm 
-  {
-    struct any_reader *reader;  /* File reader. */
-    struct case_map *map;       /* Map from file dict to active file dict. */
-    struct ccase bounce;        /* Bounce buffer. */
-  };
-
-static const struct case_source_class case_reader_source_class;
-
-static void case_reader_pgm_free (struct case_reader_pgm *);
+static void get_translate_case (const struct ccase *, struct ccase *,
+                                void *map_);
+static bool get_destroy_case_map (void *map_);
 
 /* Parses a GET or IMPORT command. */
 static int
 parse_read_command (struct lexer *lexer, struct dataset *ds, enum 
reader_command type)
 {
-  struct case_reader_pgm *pgm = NULL;
+  struct casereader *reader = NULL;
   struct file_handle *fh = NULL;
   struct dictionary *dict = NULL;
+  struct case_map *map = NULL;
 
   for (;;)
     {
@@ -127,17 +117,10 @@
       goto error;
     }
               
-  discard_variables (ds);
-
-  pgm = xmalloc (sizeof *pgm);
-  pgm->reader = any_reader_open (fh, &dict);
-  pgm->map = NULL;
-  case_nullify (&pgm->bounce);
-  if (pgm->reader == NULL)
+  reader = any_reader_open (fh, &dict);
+  if (reader == NULL)
     goto error;
 
-  case_create (&pgm->bounce, dict_get_next_value_idx (dict));
-
   start_case_map (dict);
 
   while (lex_token (lexer) != '.')
@@ -147,71 +130,40 @@
         goto error;
     }
 
-  pgm->map = finish_case_map (dict);
-
-  dataset_set_dict (ds, dict);
-
-  proc_set_source (ds,
-                  create_case_source (&case_reader_source_class, pgm));
+  map = finish_case_map (dict);
+  if (map != NULL)
+    reader = casereader_create_translator (reader,
+                                           dict_get_next_value_idx (dict),
+                                           get_translate_case,
+                                           get_destroy_case_map,
+                                           map);
+  
+  proc_set_active_file (ds, reader, dict);
 
   return CMD_SUCCESS;
 
  error:
-  case_reader_pgm_free (pgm);
+  casereader_destroy (reader);
   if (dict != NULL)
     dict_destroy (dict);
   return CMD_CASCADING_FAILURE;
 }
 
-/* Frees a struct case_reader_pgm. */
 static void
-case_reader_pgm_free (struct case_reader_pgm *pgm) 
+get_translate_case (const struct ccase *input, struct ccase *output,
+                    void *map_) 
 {
-  if (pgm != NULL) 
-    {
-      any_reader_close (pgm->reader);
-      destroy_case_map (pgm->map);
-      case_destroy (&pgm->bounce);
-      free (pgm);
-    }
+  struct case_map *map = map_;
+  map_case (map, input, output);
 }
 
-/* Reads one case into C.
-   Returns true if successful, false at end of file or if an
-   I/O error occurred. */
 static bool
-case_reader_source_read (struct case_source *source, struct ccase *c)
+get_destroy_case_map (void *map_) 
 {
-  struct case_reader_pgm *pgm = source->aux;
-  if (any_reader_read (pgm->reader, pgm->map == NULL ? c : &pgm->bounce)) 
-    {
-      if (pgm->map != NULL)
-        map_case (pgm->map, &pgm->bounce, c);
-      return true;
-    }
-  else  
-    return false;
-}
-
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-case_reader_source_destroy (struct case_source *source)
-{
-  struct case_reader_pgm *pgm = source->aux;
-  bool ok = !any_reader_error (pgm->reader); 
-  case_reader_pgm_free (pgm);
-  return ok;
+  struct case_map *map = map_;
+  destroy_case_map (map);
+  return true;
 }
-
-static const struct case_source_class case_reader_source_class =
-  {
-    "case reader",
-    NULL,
-    case_reader_source_read,
-    case_reader_source_destroy,
-  };
 
 /* GET. */
 int
@@ -243,30 +195,6 @@
     PROC_CMD            /* Procedure. */
   };
 
-/* File writer plus a case map. */
-struct case_writer
-  {
-    struct any_writer *writer;  /* File writer. */
-    struct case_map *map;       /* Map to output file dictionary
-                                   (null pointer for identity mapping). */
-    struct ccase bounce;        /* Bounce buffer for mapping (if needed). */
-  };
-
-/* Destroys AW. */
-static bool
-case_writer_destroy (struct case_writer *aw)
-{
-  bool ok = true;
-  if (aw != NULL) 
-    {
-      ok = any_writer_close (aw->writer);
-      destroy_case_map (aw->map);
-      case_destroy (&aw->bounce);
-      free (aw);
-    }
-  return ok;
-}
-
 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
    WRITER_TYPE identifies the type of file to write,
    and COMMAND_TYPE identifies the type of command.
@@ -277,7 +205,7 @@
    included.
 
    On failure, returns a null pointer. */
-static struct case_writer *
+static struct casewriter *
 parse_write_command (struct lexer *lexer, struct dataset *ds, 
                     enum writer_type writer_type,
                      enum command_type command_type,
@@ -286,7 +214,8 @@
   /* Common data. */
   struct file_handle *handle; /* Output file. */
   struct dictionary *dict;    /* Dictionary for output file. */
-  struct case_writer *aw;      /* Writer. */  
+  struct casewriter *writer;  /* Writer. */
+  struct case_map *map;       /* Map from input data to data for writer. */
 
   /* Common options. */
   bool print_map;             /* Print map?  TODO. */
@@ -303,10 +232,8 @@
 
   handle = NULL;
   dict = dict_clone (dataset_dict (ds));
-  aw = xmalloc (sizeof *aw);
-  aw->writer = NULL;
-  aw->map = NULL;
-  case_nullify (&aw->bounce);
+  writer = NULL;
+  map = NULL;
   print_map = false;
   print_short_names = false;
   sysfile_opts = sfm_writer_default_options ();
@@ -412,49 +339,40 @@
     }
 
   dict_compact_values (dict);
-  aw->map = finish_case_map (dict);
-  if (aw->map != NULL)
-    case_create (&aw->bounce, dict_get_next_value_idx (dict));
 
   if (fh_get_referent (handle) == FH_REF_FILE) 
     {
       switch (writer_type) 
         {
         case SYSFILE_WRITER:
-          aw->writer = any_writer_from_sfm_writer (
-            sfm_open_writer (handle, dict, sysfile_opts));
+          writer = sfm_open_writer (handle, dict, sysfile_opts);
           break;
         case PORFILE_WRITER:
-          aw->writer = any_writer_from_pfm_writer (
-            pfm_open_writer (handle, dict, porfile_opts));
+          writer = pfm_open_writer (handle, dict, porfile_opts);
           break;
         }
     }
   else
-    aw->writer = any_writer_open (handle, dict);
-  if (aw->writer == NULL)
+    writer = any_writer_open (handle, dict);
+  if (writer == NULL)
     goto error;
+
+  map = finish_case_map (dict);
+  if (map != NULL)
+    writer = casewriter_create_translator (writer,
+                                           get_translate_case,
+                                           get_destroy_case_map,
+                                           map);
   dict_destroy (dict);
   
-  return aw;
+  return writer;
 
  error:
-  case_writer_destroy (aw);
+  casewriter_destroy (writer);
   dict_destroy (dict);
+  destroy_case_map (map);
   return NULL;
 }
-
-/* Writes case C to writer AW. */
-static bool
-case_writer_write_case (struct case_writer *aw, const struct ccase *c) 
-{
-  if (aw->map != NULL) 
-    {
-      map_case (aw->map, c, &aw->bounce);
-      c = &aw->bounce; 
-    }
-  return any_writer_write (aw->writer, c);
-}
 
 /* SAVE and EXPORT. */
 
@@ -464,26 +382,24 @@
 {
   bool retain_unselected;
   struct variable *saved_filter_variable;
-  struct case_writer *aw;
-  struct ccase *c;
-  bool ok = true;
+  struct casewriter *output;
+  bool ok;
 
-  aw = parse_write_command (lexer, ds, writer_type, PROC_CMD, 
&retain_unselected);
-  if (aw == NULL) 
+  output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
+                                &retain_unselected);
+  if (output == NULL) 
     return CMD_CASCADING_FAILURE;
 
   saved_filter_variable = dict_get_filter (dataset_dict (ds));
   if (retain_unselected) 
     dict_set_filter (dataset_dict (ds), NULL);
 
-  proc_open (ds);
-  while (ok && proc_read (ds, &c))
-    ok = case_writer_write_case (aw, c);
-  ok = proc_close (ds) && ok;
+  casereader_transfer (proc_open (ds), output);
+  ok = casewriter_destroy (output);
+  ok = proc_commit (ds) && ok;
 
   dict_set_filter (dataset_dict (ds), saved_filter_variable);
 
-  case_writer_destroy (aw);
   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 }
 
@@ -504,7 +420,7 @@
 /* Transformation. */
 struct output_trns 
   {
-    struct case_writer *aw;      /* Writer. */
+    struct casewriter *writer;          /* Writer. */
   };
 
 static trns_proc_func output_trns_proc;
@@ -515,8 +431,8 @@
 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type 
writer_type) 
 {
   struct output_trns *t = xmalloc (sizeof *t);
-  t->aw = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
-  if (t->aw == NULL) 
+  t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
+  if (t->writer == NULL) 
     {
       free (t);
       return CMD_CASCADING_FAILURE;
@@ -531,7 +447,9 @@
 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
 {
   struct output_trns *t = trns_;
-  case_writer_write_case (t->aw, c);
+  struct ccase tmp;
+  case_clone (&tmp, c);
+  casewriter_write (t->writer, &tmp);
   return TRNS_CONTINUE;
 }
 
@@ -541,13 +459,8 @@
 output_trns_free (void *trns_)
 {
   struct output_trns *t = trns_;
-  bool ok = true;
-
-  if (t != NULL)
-    {
-      ok = case_writer_destroy (t->aw);
-      free (t);
-    }
+  bool ok = casewriter_destroy (t->writer);
+  free (t);
   return ok;
 }
 
@@ -748,15 +661,15 @@
     int type;                  /* One of MTF_*. */
     const struct variable **by;        /* List of BY variables for this file. 
*/
     struct file_handle *handle; /* File handle. */
-    struct any_reader *reader;  /* File reader. */
+    struct casereader *reader;  /* File reader. */
     struct dictionary *dict;   /* Dictionary from system file. */
+    bool active_file;           /* Active file? */
 
     /* IN subcommand. */
     char *in_name;              /* Variable name. */
     struct variable *in_var;    /* Variable (in master dictionary). */
 
-    struct ccase input_storage; /* Input record storage. */
-    struct ccase *input;        /* Input record. */
+    struct ccase input;         /* Input record. */
   };
 
 /* MATCH FILES procedure. */
@@ -773,7 +686,7 @@
     char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
     
     struct dictionary *dict;    /* Dictionary of output file. */
-    struct casefile *output;    /* MATCH FILES output. */
+    struct casewriter *output;  /* MATCH FILES output. */
     struct ccase mtf_case;      /* Case used for output. */
 
     unsigned seq_num;           /* Have we initialized this variable? */
@@ -782,11 +695,12 @@
 
 static bool mtf_free (struct mtf_proc *);
 static bool mtf_close_file (struct mtf_file *);
+static bool mtf_close_all_files (struct mtf_proc *);
 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
-static bool mtf_read_records (struct mtf_proc *, struct dataset *);
+static bool mtf_read_records (struct mtf_proc *);
 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
 
-static bool mtf_processing (struct mtf_proc *, struct dataset *);
+static bool mtf_processing (struct mtf_proc *);
 
 static char *var_type_description (struct variable *);
 
@@ -804,6 +718,7 @@
   bool used_active_file = false;
   bool saw_table = false;
   bool saw_in = false;
+  bool open_active_file = false;
 
   mtf.head = mtf.tail = NULL;
   mtf.by_cnt = 0;
@@ -840,8 +755,8 @@
       file->dict = NULL;
       file->in_name = NULL;
       file->in_var = NULL;
-      case_nullify (&file->input_storage);
-      file->input = &file->input_storage;
+      file->active_file = false;
+      case_nullify (&file->input);
 
       /* FILEs go first, then TABLEs. */
       if (file->type == MTF_TABLE || first_table == NULL)
@@ -881,7 +796,7 @@
             }
           used_active_file = true;
 
-          if (!proc_has_source (ds))
+          if (!proc_has_active_file (ds))
             {
               msg (SE, _("Cannot specify the active file since no active "
                          "file has been defined."));
@@ -895,6 +810,7 @@
                    "Temporary transformations will be made permanent."));
 
           file->dict = dataset_dict (ds);
+          file->active_file = true;
         }
       else
         {
@@ -905,9 +821,6 @@
           file->reader = any_reader_open (file->handle, &file->dict);
           if (file->reader == NULL)
             goto error;
-
-          case_create (&file->input_storage,
-                       dict_get_next_value_idx (file->dict));
         }
 
       while (lex_match (lexer, '/'))
@@ -1109,63 +1022,50 @@
 
   if (used_active_file) 
     {
-      proc_set_sink (ds, create_case_sink (&null_sink_class, 
-                                           dataset_dict (ds),
-                                          dataset_get_casefile_factory (ds),
-                                          NULL));
-      proc_open (ds); 
+      proc_discard_output (ds);
+      for (iter = mtf.head; iter != NULL; iter = iter->next)
+        if (iter->reader == NULL) 
+          iter->reader = proc_open (ds);
+      open_active_file = true;
     }
-  else
-    discard_variables (ds);
 
   dict_compact_values (mtf.dict);
-  mtf.output = dataset_get_casefile_factory (ds)->create_casefile
-    (dataset_get_casefile_factory (ds),
-     dict_get_next_value_idx (mtf.dict));
-
+  mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
   mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
   case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
 
-  if (!mtf_read_records (&mtf, ds))
-    goto error;
+  if (!mtf_read_records (&mtf)) 
+    goto error; 
   while (mtf.head && mtf.head->type == MTF_FILE)
-    if (!mtf_processing (&mtf, ds))
-      goto error;
-  if (!proc_close (ds))
+    if (!mtf_processing (&mtf))
+      goto error; 
+  if (!mtf_close_all_files (&mtf))
     goto error;
+  if (open_active_file)
+    proc_commit (ds);
 
-  discard_variables (ds);
-
-  dataset_set_dict (ds, mtf.dict);
+  proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
   mtf.dict = NULL;
-  proc_set_source (ds, storage_source_create (mtf.output));
   mtf.output = NULL;
 
   return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 
  error:
-  proc_close (ds);
+  if (open_active_file)
+    proc_commit (ds);
   mtf_free (&mtf);
   return CMD_CASCADING_FAILURE;
 }
 
-/* Return a string in a static buffer describing V's variable type and
-   width. */
+/* Return a string in an allocated buffer describing V's variable
+   type and width. */
 static char *
 var_type_description (struct variable *v)
 {
-  static char buf[2][32];
-  static int x = 0;
-  char *s;
-
-  x ^= 1;
-  s = buf[x];
-
   if (var_is_numeric (v))
-    strcpy (s, "numeric");
+    return xstrdup ("numeric");
   else
-    sprintf (s, "string with width %d", var_get_width (v));
-  return s;
+    return xasprintf ("string with width %d", var_get_width (v));
 }
 
 /* Closes FILE and frees its associated data.
@@ -1174,22 +1074,18 @@
 static bool
 mtf_close_file (struct mtf_file *file)
 {
-  bool ok = file->reader == NULL || !any_reader_error (file->reader);
+  bool ok = casereader_destroy (file->reader);
   free (file->by);
-  any_reader_close (file->reader);
-  if (file->handle != NULL)
+  if (!file->active_file)
     dict_destroy (file->dict);
-  case_destroy (&file->input_storage);
   free (file->in_name);
+  case_destroy (&file->input);
   free (file);
   return ok;
 }
 
-/* Free all the data for the MATCH FILES procedure.
-   Returns true if successful, false if an I/O error
-   occurred. */
 static bool
-mtf_free (struct mtf_proc *mtf)
+mtf_close_all_files (struct mtf_proc *mtf) 
 {
   struct mtf_file *iter, *next;
   bool ok = true;
@@ -1201,9 +1097,22 @@
       if (!mtf_close_file (iter))
         ok = false;
     }
-  
-  if (mtf->dict)
-    dict_destroy (mtf->dict);
+  mtf->head = NULL;
+  return ok;
+}
+
+/* Free all the data for the MATCH FILES procedure.
+   Returns true if successful, false if an I/O error
+   occurred. */
+static bool
+mtf_free (struct mtf_proc *mtf)
+{
+  bool ok;
+
+  ok = mtf_close_all_files (mtf);
+
+  casewriter_destroy (mtf->output);
+  dict_destroy (mtf->dict);
   case_destroy (&mtf->mtf_case);
   free (mtf->seq_nums);
 
@@ -1252,7 +1161,7 @@
 /* Read a record from every input file.
    Returns true if successful, false if an I/O error occurred. */
 static bool
-mtf_read_records (struct mtf_proc *mtf, struct dataset *ds)
+mtf_read_records (struct mtf_proc *mtf)
 {
   struct mtf_file *iter, *next;
   bool ok = true;
@@ -1260,9 +1169,7 @@
   for (iter = mtf->head; ok && iter != NULL; iter = next)
     {
       next = iter->next;
-      if (iter->handle
-          ? !any_reader_read (iter->reader, iter->input)
-          : !proc_read (ds, &iter->input)) 
+      if (!casereader_read (iter->reader, &iter->input))
         {
           if (!mtf_delete_file_in_place (mtf, &iter))
             ok = false; 
@@ -1277,17 +1184,18 @@
 mtf_compare_BY_values (struct mtf_proc *mtf,
                        struct mtf_file *a, struct mtf_file *b)
 {
-  return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt);
+  return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
 }
 
 /* Perform one iteration of steps 3...7 above.
    Returns true if successful, false if an I/O error occurred. */
 static bool
-mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
+mtf_processing (struct mtf_proc *mtf)
 {
   struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
   struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
   struct mtf_file *iter, *next;
+  struct ccase out_case;
 
   /* 3. Find the FILE input record(s) that have minimum BY
      values.  Store all the values from these input records into
@@ -1346,9 +1254,8 @@
             min_tail = min_tail->next_min = iter;
           else /* cmp > 0 */
             {
-              if (iter->handle
-                  ? any_reader_read (iter->reader, iter->input)
-                  : proc_read (ds, &iter->input))
+              case_destroy (&iter->input);
+              if (casereader_read (iter->reader, &iter->input))
                 continue;
               if (!mtf_delete_file_in_place (mtf, &iter))
                 return false;
@@ -1375,14 +1282,13 @@
          
           if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num) 
             {
-              const struct ccase *record = iter->input;
               union value *out = case_data_rw (&mtf->mtf_case, mv);
 
               mtf->seq_nums[mv_index] = mtf->seq_num;
               if (var_is_numeric (v))
-                out->f = case_num (record, v);
+                out->f = case_num (&iter->input, v);
               else
-                memcpy (out->s, case_str (record, v), var_get_width (v));
+                memcpy (out->s, case_str (&iter->input, v), var_get_width (v));
             } 
         }
       if (iter->in_var != NULL)
@@ -1418,7 +1324,8 @@
     }
 
   /* 5. Write the output record. */
-  casefile_append (mtf->output, &mtf->mtf_case);
+  case_clone (&out_case, &mtf->mtf_case);
+  casewriter_write (mtf->output, &out_case);
 
   /* 6. Read another record from each input file FILE and TABLE
      that we stored values from above.  If we come to the end of
@@ -1427,9 +1334,8 @@
   for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
     {
       next = iter->next_min;
-      if (iter->reader != NULL
-          ? !any_reader_read (iter->reader, iter->input)
-          : !proc_read (ds, &iter->input))
+      case_destroy (&iter->input);
+      if (!casereader_read (iter->reader, &iter->input))
         if (!mtf_delete_file_in_place (mtf, &iter))
           return false;
     }
@@ -1614,11 +1520,6 @@
 {
   size_t dst_idx;
 
-  assert (map != NULL);
-  assert (src != NULL);
-  assert (dst != NULL);
-  assert (src != dst);
-
   for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
     {
       int src_idx = map->map[dst_idx];
Index: merge/src/language/data-io/inpt-pgm.c
===================================================================
--- merge.orig/src/language/data-io/inpt-pgm.c  2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/language/data-io/inpt-pgm.c       2007-06-05 09:18:06.000000000 
-0700
@@ -23,9 +23,9 @@
 #include <float.h>
 #include <stdlib.h>
 
-#include <data/case-source.h>
 #include <data/case.h>
-#include <data/case-source.h>
+#include <data/caseinit.h>
+#include <data/casereader-provider.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/transformations.h>
@@ -68,12 +68,10 @@
     struct trns_chain *trns_chain;
     enum trns_result restart;
 
-    bool inited_case;           /* Did one-time case initialization? */
     size_t case_nr;             /* Incremented by END CASE transformation. */
 
-    enum value_init_type *init; /* How to initialize each `union value'. */
-    size_t init_cnt;            /* Number of elements in inp_init. */
-    size_t case_size;           /* Size of case in bytes. */
+    struct caseinit *init;
+    size_t value_cnt;
   };
 
 static void destroy_input_program (struct input_program_pgm *);
@@ -82,7 +80,7 @@
 static trns_proc_func end_file_trns_proc;
 static trns_free_func reread_trns_free;
 
-static const struct case_source_class input_program_source_class;
+static const struct casereader_class input_program_casereader_class;
 
 static bool inside_input_program;
 
@@ -105,10 +103,9 @@
 cmd_input_program (struct lexer *lexer, struct dataset *ds)
 {
   struct input_program_pgm *inp;
-  size_t i;
   bool saw_END_CASE = false;
 
-  discard_variables (ds);
+  proc_discard_active_file (ds);
   if (lex_token (lexer) != '.')
     return lex_end_of_command (lexer);
 
@@ -132,7 +129,7 @@
           if (result == CMD_EOF)
             msg (SE, _("Unexpected end-of-file within INPUT PROGRAM."));
           inside_input_program = false;
-          discard_variables (ds);
+          proc_discard_active_file (ds);
           destroy_input_program (inp);
           return result;
         }
@@ -144,7 +141,7 @@
   if (dict_get_next_value_idx (dataset_dict (ds)) == 0) 
     {
       msg (SE, _("Input program did not create any variables."));
-      discard_variables (ds);
+      proc_discard_active_file (ds);
       destroy_input_program (inp);
       return CMD_FAILURE;
     }
@@ -153,33 +150,15 @@
   trns_chain_finalize (inp->trns_chain);
 
   inp->restart = TRNS_CONTINUE;
-  inp->inited_case = false;
-  inp->case_nr = 1;
 
   /* Figure out how to initialize each input case. */
-  inp->init_cnt = dict_get_next_value_idx (dataset_dict (ds));
-  inp->init = xnmalloc (inp->init_cnt, sizeof *inp->init);
-  for (i = 0; i < inp->init_cnt; i++)
-    inp->init[i] = -1;
-  for (i = 0; i < dict_get_var_cnt (dataset_dict (ds)); i++)
-    {
-      struct variable *var = dict_get_var (dataset_dict (ds), i);
-      size_t value_cnt = var_get_value_cnt (var);
-      enum value_init_type value_init;
-      size_t j;
-      
-      value_init = var_is_numeric (var) ? INP_NUMERIC : INP_STRING;
-      value_init |= var_get_leave (var) ? INP_INIT_ONCE : INP_REINIT;
-
-      for (j = 0; j < value_cnt; j++)
-        inp->init[j + var_get_case_index (var)] = value_init;
-    }
-  for (i = 0; i < inp->init_cnt; i++)
-    assert (inp->init[i] != -1);
-  inp->case_size = dict_get_case_size (dataset_dict (ds));
-
-  proc_set_source (ds, 
-                   create_case_source (&input_program_source_class, inp));
+  inp->init = caseinit_create ();
+  caseinit_mark_for_init (inp->init, dataset_dict (ds));
+  inp->value_cnt = dict_get_next_value_idx (dataset_dict (ds));
+  
+  proc_set_active_file_data (
+    ds, casereader_create_sequential (NULL, inp->value_cnt, CASENUMBER_MAX,
+                                      &input_program_casereader_class, inp));
 
   return CMD_SUCCESS;
 }
@@ -191,56 +170,6 @@
   return CMD_END_INPUT_PROGRAM; 
 }
 
-/* Initializes case C.  Called before the first case is read. */
-static void
-init_case (const struct input_program_pgm *inp, struct ccase *c)
-{
-  size_t i;
-
-  for (i = 0; i < inp->init_cnt; i++)
-    switch (inp->init[i]) 
-      {
-      case INP_NUMERIC | INP_INIT_ONCE:
-        case_data_rw_idx (c, i)->f = 0.0;
-        break;
-      case INP_NUMERIC | INP_REINIT:
-        case_data_rw_idx (c, i)->f = SYSMIS;
-        break;
-      case INP_STRING | INP_INIT_ONCE:
-      case INP_STRING | INP_REINIT:
-        memset (case_data_rw_idx (c, i)->s, ' ',
-                sizeof case_data_rw_idx (c, i)->s);
-        break;
-      default:
-        NOT_REACHED ();
-      }
-}
-
-/* Clears case C.  Called between reading successive records. */
-static void
-clear_case (const struct input_program_pgm *inp, struct ccase *c)
-{
-  size_t i;
-
-  for (i = 0; i < inp->init_cnt; i++)
-    switch (inp->init[i]) 
-      {
-      case INP_NUMERIC | INP_INIT_ONCE:
-        break;
-      case INP_NUMERIC | INP_REINIT:
-        case_data_rw_idx (c, i)->f = SYSMIS;
-        break;
-      case INP_STRING | INP_INIT_ONCE:
-        break;
-      case INP_STRING | INP_REINIT:
-        memset (case_data_rw_idx (c, i)->s, ' ',
-                sizeof case_data_rw_idx (c, i)->s);
-        break;
-      default:
-        NOT_REACHED ();
-      }
-}
-
 /* Returns true if STATE is valid given the transformations that
    are allowed within INPUT PROGRAM. */
 static bool
@@ -256,26 +185,28 @@
    Returns true if successful, false at end of file or if an
    I/O error occurred. */
 static bool
-input_program_source_read (struct case_source *source, struct ccase *c)
+input_program_casereader_read (struct casereader *reader UNUSED, void *inp_,
+                               struct ccase *c)
 {
-  struct input_program_pgm *inp = source->aux;
+  struct input_program_pgm *inp = inp_;
 
-  if (!inp->inited_case)
-    {
-      init_case (inp, c);
-      inp->inited_case = true;
-    }
+  case_create (c, inp->value_cnt);
 
   do
     {
       assert (is_valid_state (inp->restart));
-      if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE)
-        return false;
+      if (inp->restart == TRNS_ERROR || inp->restart == TRNS_END_FILE) 
+        {
+          case_destroy (c);
+          return false; 
+        }
 
-      clear_case (inp, c);
+      caseinit_init_reinit_vars (inp->init, c);
+      caseinit_init_left_vars (inp->init, c);
       inp->restart = trns_chain_execute (inp->trns_chain, inp->restart,
                                          c, &inp->case_nr);
       assert (is_valid_state (inp->restart));
+      caseinit_update_left_vars (inp->init, c);
     }
   while (inp->restart < 0);
 
@@ -288,29 +219,27 @@
   if (pgm != NULL) 
     {
       trns_chain_destroy (pgm->trns_chain);
-      free (pgm->init);
+      caseinit_destroy (pgm->init);
       free (pgm);
     }
 }
 
-/* Destroys the source.
-   Returns true if successful read, false if an I/O occurred
-   during destruction or previously. */
-static bool
-input_program_source_destroy (struct case_source *source)
+/* Destroys the casereader. */
+static void
+input_program_casereader_destroy (struct casereader *reader UNUSED, void *inp_)
 {
-  struct input_program_pgm *inp = source->aux;
-  bool ok = inp->restart != TRNS_ERROR;
+  struct input_program_pgm *inp = inp_;
+  if (inp->restart == TRNS_ERROR)
+    casereader_force_error (reader);
   destroy_input_program (inp);
-  return ok;
 }
 
-static const struct case_source_class input_program_source_class =
+static const struct casereader_class input_program_casereader_class =
   {
-    "INPUT PROGRAM",
+    input_program_casereader_read,
+    input_program_casereader_destroy,
+    NULL,
     NULL,
-    input_program_source_read,
-    input_program_source_destroy,
   };
 
 int
@@ -322,7 +251,7 @@
   return lex_end_of_command (lexer);
 }
 
-/* Sends the current case as the source's output. */
+/* Outputs the current case */
 int
 end_case_trns_proc (void *inp_, struct ccase *c UNUSED,
                     casenumber case_nr UNUSED)
Index: merge/src/language/data-io/list.q
===================================================================
--- merge.orig/src/language/data-io/list.q      2007-06-05 09:16:10.000000000 
-0700
+++ merge/src/language/data-io/list.q   2007-06-05 09:18:06.000000000 -0700
@@ -23,7 +23,8 @@
 
 #include "intprops.h"
 #include "size_max.h"
-#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/data-out.h>
 #include <data/format.h>
@@ -73,9 +74,6 @@
 /* Parsed command. */
 static struct cmd_list cmd;
 
-/* Current case number. */
-static int case_idx;
-
 /* Line buffer. */
 static struct string line_buffer;
 
@@ -85,11 +83,12 @@
 static void write_line (struct outp_driver *d, const char *s);
 
 /* Other functions. */
-static bool list_cases (const struct ccase *, void *, const struct dataset *);
+static void list_case (struct ccase *, casenumber case_idx,
+                       const struct dataset *);
 static void determine_layout (void);
 static void clean_up (void);
 static void write_header (struct outp_driver *);
-static void write_all_headers (const struct ccase *, void *, const struct 
dataset*);
+static void write_all_headers (struct casereader *, const struct dataset*);
 
 /* Returns the number of text lines that can fit on the remainder of
    the page. */
@@ -133,7 +132,11 @@
 int
 cmd_list (struct lexer *lexer, struct dataset *ds)
 {
+  struct dictionary *dict = dataset_dict (ds);
   struct variable *casenum_var = NULL;
+  struct casegrouper *grouper;
+  struct casereader *group;
+  casenumber case_idx;
   bool ok;
 
   if (!parse_list (lexer, ds, &cmd, NULL))
@@ -147,7 +150,7 @@
   if (cmd.last == NOT_LONG)
     cmd.last = LONG_MAX;
   if (!cmd.sbc_variables)
-    dict_get_vars (dataset_dict (ds), &cmd.v_variables, &cmd.n_variables,
+    dict_get_vars (dict, &cmd.v_variables, &cmd.n_variables,
                   (1u << DC_SYSTEM) | (1u << DC_SCRATCH));
   if (cmd.n_variables == 0)
     {
@@ -187,12 +190,12 @@
   /* Weighting variable. */
   if (cmd.weight == LST_WEIGHT)
     {
-      if (dict_get_weight (dataset_dict (ds)) != NULL)
+      if (dict_get_weight (dict) != NULL)
        {
          size_t i;
 
          for (i = 0; i < cmd.n_variables; i++)
-           if (cmd.v_variables[i] == dict_get_weight (dataset_dict (ds)))
+           if (cmd.v_variables[i] == dict_get_weight (dict))
              break;
          if (i >= cmd.n_variables)
            {
@@ -201,7 +204,7 @@
              cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables,
                                            sizeof *cmd.v_variables);
              cmd.v_variables[cmd.n_variables - 1]
-                = dict_get_weight (dataset_dict (ds));
+                = dict_get_weight (dict);
            }
        }
       else
@@ -229,7 +232,24 @@
   determine_layout ();
 
   case_idx = 0;
-  ok = procedure_with_splits (ds, write_all_headers, list_cases, NULL, NULL);
+  for (grouper = casegrouper_create_splits (proc_open (ds), dict);
+       casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      write_all_headers (group, ds);
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        {
+          case_idx++;
+          if (case_idx >= cmd.first && case_idx <= cmd.last
+              && (case_idx - cmd.first) % cmd.step == 0)
+            list_case (&c, case_idx, ds); 
+        }
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+
   ds_destroy(&line_buffer);
 
   clean_up ();
@@ -242,11 +262,16 @@
 /* Writes headers to all devices.  This is done at the beginning of
    each SPLIT FILE group. */
 static void
-write_all_headers (const struct ccase *c, void *aux UNUSED, const struct 
dataset *ds)
+write_all_headers (struct casereader *input, const struct dataset *ds)
 {
   struct outp_driver *d;
+  struct ccase c;
+
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
 
-  output_split_file_values (ds, c);
   for (d = outp_drivers (NULL); d; d = outp_drivers (d))
     {
       if (!d->class->special)
@@ -623,16 +648,12 @@
 }
 
 /* Writes case C to output. */
-static bool
-list_cases (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+list_case (struct ccase *c, casenumber case_idx, const struct dataset *ds)
 {
+  struct dictionary *dict = dataset_dict (ds);
   struct outp_driver *d;
   
-  case_idx++;
-  if (case_idx < cmd.first || case_idx > cmd.last
-      || (cmd.step != 1 && (case_idx - cmd.first) % cmd.step))
-    return true;
-
   for (d = outp_drivers (NULL); d; d = outp_drivers (d))
     if (d->class->special == 0)
       {
@@ -681,7 +702,7 @@
               ds_put_char_multiple(&line_buffer, ' ', width - print->w);
 
             if (fmt_is_string (print->type)
-                || dict_contains_var (dataset_dict (ds), v))
+                || dict_contains_var (dict, v))
              {
                 data_out (case_data (c, v), print,
                           ds_put_uninit (&line_buffer, print->w));
@@ -720,7 +741,7 @@
            char buf[256];
            
             if (fmt_is_string (print->type)
-                || dict_contains_var (dataset_dict (ds), v))
+                || dict_contains_var (dict, v))
              data_out (case_data (c, v), print, buf);
             else 
               {
@@ -738,8 +759,6 @@
       }
     else
       NOT_REACHED ();
-
-  return true;
 }
 
 /* 
Index: merge/src/language/dictionary/apply-dictionary.c
===================================================================
--- merge.orig/src/language/dictionary/apply-dictionary.c       2007-06-05 
09:16:10.000000000 -0700
+++ merge/src/language/dictionary/apply-dictionary.c    2007-06-05 
09:18:06.000000000 -0700
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 
 #include <data/any-reader.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/file-handle-def.h>
 #include <data/missing-values.h>
@@ -42,7 +43,7 @@
 cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds)
 {
   struct file_handle *handle;
-  struct any_reader *reader;
+  struct casereader *reader;
   struct dictionary *dict;
 
   int n_matched = 0;
@@ -58,7 +59,7 @@
   reader = any_reader_open (handle, &dict);
   if (dict == NULL)
     return CMD_FAILURE;
-  any_reader_close (reader);
+  casereader_destroy (reader);
 
   for (i = 0; i < dict_get_var_cnt (dict); i++)
     {
@@ -136,7 +137,5 @@
         dict_set_weight (dataset_dict (ds), new_weight);
     }
   
-  any_reader_close (reader);
-
   return lex_end_of_command (lexer);
 }
Index: merge/src/language/dictionary/delete-variables.c
===================================================================
--- merge.orig/src/language/dictionary/delete-variables.c       2007-06-05 
09:16:10.000000000 -0700
+++ merge/src/language/dictionary/delete-variables.c    2007-06-05 
09:18:06.000000000 -0700
@@ -1,5 +1,5 @@
 /* PSPP - computes sample statistics.
-   Copyright (C) 2006 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2007 Free Software Foundation, Inc.
    Written by Ben Pfaff <address@hidden>.
 
    This program is free software; you can redistribute it and/or
@@ -21,6 +21,7 @@
 
 #include <stdlib.h>
 
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <language/command.h>
@@ -36,6 +37,7 @@
 {
   struct variable **vars;
   size_t var_cnt;
+  bool ok;
 
   if (proc_make_temporary_transformations_permanent (ds))
     msg (SE, _("DELETE VARIABLES may not be used after TEMPORARY.  "
@@ -50,11 +52,13 @@
                  "from the active file dictionary.  Use NEW FILE instead."));
       goto error;
     }
-
-  if (!procedure (ds, NULL, NULL))
+ 
+  ok = casereader_destroy (proc_open (ds));
+  ok = proc_commit (ds) && ok;
+  if (!ok)
     goto error;
-  
   dict_delete_vars (dataset_dict (ds), vars, var_cnt);
+  
   free (vars);
   
   return CMD_SUCCESS;
Index: merge/src/language/dictionary/modify-variables.c
===================================================================
--- merge.orig/src/language/dictionary/modify-variables.c       2007-06-05 
09:16:10.000000000 -0700
+++ merge/src/language/dictionary/modify-variables.c    2007-06-05 
09:18:06.000000000 -0700
@@ -40,7 +40,6 @@
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-/* FIXME: should change weighting variable, etc. */
 /* These control the ordering produced by
    compare_variables_given_ordering(). */
 struct ordering
@@ -322,7 +321,7 @@
   if (already_encountered & (1 | 4))
     {
       /* Read the data. */
-      if (!procedure (ds,NULL, NULL)) 
+      if (!proc_execute (ds)) 
         goto done; 
     }
 
Index: merge/src/language/dictionary/sys-file-info.c
===================================================================
--- merge.orig/src/language/dictionary/sys-file-info.c  2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/dictionary/sys-file-info.c       2007-06-05 
09:18:06.000000000 -0700
@@ -21,6 +21,7 @@
 #include <ctype.h>
 #include <stdlib.h>
 
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/file-handle-def.h>
 #include <data/format.h>
@@ -87,7 +88,7 @@
   struct file_handle *h;
   struct dictionary *d;
   struct tab_table *t;
-  struct sfm_reader *reader;
+  struct casereader *reader;
   struct sfm_read_info info;
   int r, nr;
   int i;
@@ -102,7 +103,7 @@
   reader = sfm_open_reader (h, &d, &info);
   if (!reader)
     return CMD_FAILURE;
-  sfm_close_reader (reader);
+  casereader_destroy (reader);
 
   t = tab_create (2, 10, 0);
   tab_vline (t, TAL_GAP, 1, 0, 8);
Index: merge/src/language/expressions/evaluate.c
===================================================================
--- merge.orig/src/language/expressions/evaluate.c      2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/expressions/evaluate.c   2007-06-05 09:18:06.000000000 
-0700
@@ -158,7 +158,7 @@
 
          if  ( ds == NULL )
            {
-             ds = create_dataset (NULL, NULL, NULL);
+             ds = create_dataset (NULL, NULL);
              d = dataset_dict (ds);
            }
 
Index: merge/src/language/lexer/variable-parser.c
===================================================================
--- merge.orig/src/language/lexer/variable-parser.c     2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/lexer/variable-parser.c  2007-06-05 09:18:06.000000000 
-0700
@@ -116,12 +116,6 @@
 
   vs = var_set_create_from_dict (d);
   success = parse_var_set_vars (lexer, vs, var, cnt, opts);
-  if ( success == 0 )
-    {
-      free ( *var ) ;
-      *var = NULL;
-      *cnt = 0;
-    }
   var_set_destroy (vs);
   return success;
 }
Index: merge/src/language/stats/aggregate.c
===================================================================
--- merge.orig/src/language/stats/aggregate.c   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/aggregate.c        2007-06-05 09:18:06.000000000 
-0700
@@ -21,15 +21,16 @@
 #include <stdlib.h>
 
 #include <data/any-writer.h>
+#include <data/case-ordering.h>
-#include <data/case-sink.h>
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
 #include <data/dictionary.h>
 #include <data/file-handle-def.h>
 #include <data/format.h>
 #include <data/procedure.h>
 #include <data/settings.h>
-#include <data/storage-stream.h>
 #include <data/sys-file-writer.h>
 #include <data/variable.h>
 #include <language/command.h>
@@ -135,12 +136,8 @@
 /* An entire AGGREGATE procedure. */
 struct agr_proc 
   {
-    /* We have either an output file or a sink. */
-    struct any_writer *writer;          /* Output file, or null if none. */
-    struct case_sink *sink;             /* Sink, or null if none. */
-
     /* Break variables. */
-    struct sort_criteria *sort;         /* Sort criteria. */
+    struct case_ordering *sort;         /* Sort criteria. */
     const struct variable **break_vars;       /* Break variables. */
     size_t break_var_cnt;               /* Number of break variables. */
     struct ccase break_case;            /* Last values of break variables. */
@@ -150,20 +147,18 @@
     struct dictionary *dict;            /* Aggregate dictionary. */
     const struct dictionary *src_dict;  /* Dict of the source */
     int case_cnt;                       /* Counts aggregated cases. */
-    struct ccase agr_case;              /* Aggregate case for output. */
   };
 
 static void initialize_aggregate_info (struct agr_proc *,
                                        const struct ccase *);
-
+static void accumulate_aggregate_info (struct agr_proc *,
+                                       const struct ccase *);
 /* Prototypes. */
 static bool parse_aggregate_functions (struct lexer *, const struct dictionary 
*,
                                       struct agr_proc *);
 static void agr_destroy (struct agr_proc *);
-static bool aggregate_single_case (struct agr_proc *agr,
-                                  const struct ccase *input,
-                                  struct ccase *output);
-static void dump_aggregate_info (struct agr_proc *agr, struct ccase *output);
+static void dump_aggregate_info (struct agr_proc *agr,
+                                 struct casewriter *output);
 
 /* Parsing. */
 
@@ -174,10 +169,14 @@
   struct dictionary *dict = dataset_dict (ds);
   struct agr_proc agr;
   struct file_handle *out_file = NULL;
+  struct casereader *input = NULL, *group;
+  struct casegrouper *grouper;
+  struct casewriter *output = NULL;
 
   bool copy_documents = false;
   bool presorted = false;
   bool saw_direction;
+  bool ok;
 
   memset(&agr, 0 , sizeof (agr));
   agr.missing = ITEMWISE;
@@ -223,11 +222,13 @@
           int i;
 
          lex_match (lexer, '=');
-          agr.sort = sort_parse_criteria (lexer, dict,
-                                          &agr.break_vars, &agr.break_var_cnt,
-                                          &saw_direction, NULL);
+          agr.sort = parse_case_ordering (lexer, dict,
+                                          
+                                          &saw_direction);
           if (agr.sort == NULL)
             goto error;
+          case_ordering_get_vars (agr.sort,
+                                  &agr.break_vars, &agr.break_var_cnt);
          
           for (i = 0; i < agr.break_var_cnt; i++)
             dict_clone_var_assert (agr.dict, agr.break_vars[i],
@@ -261,109 +262,69 @@
   
   /* Initialize. */
   agr.case_cnt = 0;
-  case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict));
 
-  /* Output to active file or external file? */
   if (out_file == NULL) 
     {
-      struct ccase *c;
-      
       /* The active file will be replaced by the aggregated data,
          so TEMPORARY is moot. */
       proc_cancel_temporary_transformations (ds);
+      proc_discard_output (ds);
+      output = autopaging_writer_create (dict_get_next_value_idx (agr.dict));
+    }
+  else 
+    {
+      output = any_writer_open (out_file, agr.dict);
+      if (output == NULL)
+        goto error;
+    }
 
-      if (agr.sort != NULL && !presorted) 
-        {
-          if (!sort_active_file_in_place (ds, agr.sort))
-            goto error;
-        }
+  input = proc_open (ds);
+  if (agr.sort != NULL && !presorted) 
+    {
+      input = sort_execute (input, agr.sort);
+      agr.sort = NULL; 
+    }
 
-      agr.sink = create_case_sink (&storage_sink_class, agr.dict,
-                                  dataset_get_casefile_factory (ds),
-                                  NULL);
-      if (agr.sink->class->open != NULL)
-        agr.sink->class->open (agr.sink);
-      proc_set_sink (ds, 
-                    create_case_sink (&null_sink_class, dict,
-                                      dataset_get_casefile_factory (ds),
-                                      NULL));
-      proc_open (ds);
-      while (proc_read (ds, &c))
-        if (aggregate_single_case (&agr, c, &agr.agr_case)) 
-          if (!agr.sink->class->write (agr.sink, &agr.agr_case)) 
-            {
-              proc_close (ds);
-              goto error; 
-            }
-      if (!proc_close (ds))
-        goto error;
+  for (grouper = casegrouper_create_vars (input, agr.break_vars,
+                                          agr.break_var_cnt);
+       casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      if (!casereader_peek (group, 0, &c))
+        continue;
+      initialize_aggregate_info (&agr, &c);
+      case_destroy (&c);
+
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        accumulate_aggregate_info (&agr, &c);
+      dump_aggregate_info (&agr, output);
+    }
+  if (!casegrouper_destroy (grouper))
+    goto error;
 
-      if (agr.case_cnt > 0) 
-        {
-          dump_aggregate_info (&agr, &agr.agr_case);
-          if (!agr.sink->class->write (agr.sink, &agr.agr_case))
-            goto error;
-        }
-      discard_variables (ds);
-      dataset_set_dict (ds, agr.dict);
-      agr.dict = NULL;
-      proc_set_source (ds, agr.sink->class->make_source (agr.sink));
-      free_case_sink (agr.sink);
+  if (!proc_commit (ds)) 
+    {
+      input = NULL;
+      goto error;
     }
-  else
+  input = NULL;
+
+  if (out_file == NULL) 
     {
-      agr.writer = any_writer_open (out_file, agr.dict);
-      if (agr.writer == NULL)
+      struct casereader *next_input = casewriter_make_reader (output);
+      if (next_input == NULL)
         goto error;
       
-      if (agr.sort != NULL && !presorted) 
-        {
-          /* Sorting is needed. */
-          struct casefile *dst;
-          struct casereader *reader;
-          struct ccase c;
-          bool ok = true;
-          
-          dst = sort_active_file_to_casefile (ds, agr.sort);
-          if (dst == NULL)
-            goto error;
-          reader = casefile_get_destructive_reader (dst);
-          while (ok && casereader_read_xfer (reader, &c)) 
-            {
-              if (aggregate_single_case (&agr, &c, &agr.agr_case)) 
-                ok = any_writer_write (agr.writer, &agr.agr_case);
-              case_destroy (&c);
-            }
-          casereader_destroy (reader);
-          if (ok)
-            ok = !casefile_error (dst);
-          casefile_destroy (dst);
-          if (!ok)
-            goto error;
-        }
-      else 
-        {
-          /* Active file is already sorted. */
-          struct ccase *c;
-          
-          proc_open (ds);
-          while (proc_read (ds, &c))
-            if (aggregate_single_case (&agr, c, &agr.agr_case)) 
-              if (!any_writer_write (agr.writer, &agr.agr_case)) 
-                {
-                  proc_close (ds);
-                  goto error;
-                }
-          if (!proc_close (ds))
-            goto error;
-        }
-      
-      if (agr.case_cnt > 0) 
-        {
-          dump_aggregate_info (&agr, &agr.agr_case);
-          any_writer_write (agr.writer, &agr.agr_case);
-        }
-      if (any_writer_error (agr.writer))
+      proc_set_active_file (ds, next_input, agr.dict);
+      agr.dict = NULL;
+    }
+  else 
+    {
+      ok = casewriter_destroy (output);
+      output = NULL;
+      if (!ok)
         goto error;
     }
   
@@ -371,6 +332,9 @@
   return CMD_SUCCESS;
 
 error:
+  if (input != NULL)
+    proc_commit (ds);
+  casewriter_destroy (output);
   agr_destroy (&agr);
   return CMD_CASCADING_FAILURE;
 }
@@ -717,9 +681,7 @@
 {
   struct agr_var *iter, *next;
 
-  any_writer_close (agr->writer);
-  if (agr->sort != NULL)
-    sort_destroy_criteria (agr->sort);
+  case_ordering_destroy (agr->sort);
   free (agr->break_vars);
   case_destroy (&agr->break_case);
   for (iter = agr->agr_vars; iter; iter = next)
@@ -742,44 +704,13 @@
     }
   if (agr->dict != NULL)
     dict_destroy (agr->dict);
-
-  case_destroy (&agr->agr_case);
 }
 
 /* Execution. */
 
-static void accumulate_aggregate_info (struct agr_proc *,
-                                       const struct ccase *);
-static void dump_aggregate_info (struct agr_proc *, struct ccase *);
-
-/* Processes a single case INPUT for aggregation.  If output is
-   warranted, writes it to OUTPUT and returns true.
-   Otherwise, returns false and OUTPUT is unmodified. */
-static bool
-aggregate_single_case (struct agr_proc *agr,
-                       const struct ccase *input, struct ccase *output)
-{
-  bool finished_group = false;
-  
-  if (agr->case_cnt++ == 0)
-    initialize_aggregate_info (agr, input);
-  else if (case_compare (&agr->break_case, input,
-                         agr->break_vars, agr->break_var_cnt))
-    {
-      dump_aggregate_info (agr, output);
-      finished_group = true;
-
-      initialize_aggregate_info (agr, input);
-    }
-
-  accumulate_aggregate_info (agr, input);
-  return finished_group;
-}
-
 /* Accumulates aggregation data from the case INPUT. */
 static void 
-accumulate_aggregate_info (struct agr_proc *agr,
-                           const struct ccase *input)
+accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input)
 {
   struct agr_var *iter;
   double weight;
@@ -947,12 +878,14 @@
     }
 }
 
-/* We've come to a record that differs from the previous in one or
-   more of the break variables.  Make an output record from the
-   accumulated statistics in the OUTPUT case. */
+/* Writes an aggregated record to OUTPUT. */
 static void 
-dump_aggregate_info (struct agr_proc *agr, struct ccase *output)
+dump_aggregate_info (struct agr_proc *agr, struct casewriter *output)
 {
+  struct ccase c;
+
+  case_create (&c, dict_get_next_value_idx (agr->dict));
+
   {
     int value_idx = 0;
     int i;
@@ -961,7 +894,7 @@
       {
         const struct variable *v = agr->break_vars[i];
         size_t value_cnt = var_get_value_cnt (v);
-        memcpy (case_data_rw_idx (output, value_idx),
+        memcpy (case_data_rw_idx (&c, value_idx),
                 case_data (&agr->break_case, v),
                 sizeof (union value) * value_cnt);
         value_idx += value_cnt; 
@@ -973,7 +906,7 @@
   
     for (i = agr->agr_vars; i; i = i->next)
       {
-       union value *v = case_data_rw (output, i->dest);
+       union value *v = case_data_rw (&c, i->dest);
 
        if (agr->missing == COLUMNWISE && i->saw_missing
            && (i->function & FUNC) != N && (i->function & FUNC) != NU
@@ -1076,6 +1009,8 @@
          }
       }
   }
+
+  casewriter_write (output, &c);
 }
 
 /* Resets the state for all the aggregate functions. */
Index: merge/src/language/stats/autorecode.c
===================================================================
--- merge.orig/src/language/stats/autorecode.c  2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/autorecode.c       2007-06-05 09:18:06.000000000 
-0700
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 
 #include <data/case.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/transformations.h>
@@ -103,7 +104,8 @@
 cmd_autorecode (struct lexer *lexer, struct dataset *ds)
 {
   struct autorecode_pgm arc;
-  struct ccase *c;
+  struct casereader *input;
+  struct ccase c;
   size_t dst_cnt;
   size_t i;
   bool ok;
@@ -188,16 +190,16 @@
                                       hash_numeric_value, NULL, NULL);
    }
 
-  proc_open (ds);
-  while (proc_read (ds, &c))
+  input = proc_open (ds);
+  for (; casereader_read (input, &c); case_destroy (&c))
     for (i = 0; i < arc.var_cnt; i++)
       {
         union arc_value v, *vp, **vpp;
 
         if (var_is_numeric (arc.src_vars[i]))
-          v.f = case_num (c, arc.src_vars[i]);
+          v.f = case_num (&c, arc.src_vars[i]);
         else
-          v.c = (char *) case_str (c, arc.src_vars[i]);
+          v.c = (char *) case_str (&c, arc.src_vars[i]);
 
         vpp = (union arc_value **) hsh_probe (arc.src_values[i], &v);
         if (*vpp == NULL)
@@ -211,7 +213,8 @@
             *vpp = vp;
           }
       }
-  ok = proc_close (ds);
+  ok = casereader_destroy (input);
+  ok = proc_commit (ds) && ok;
 
   for (i = 0; i < arc.var_cnt; i++)
     arc.dst_vars[i] = dict_create_var_assert (dataset_dict (ds),
Index: merge/src/language/stats/binomial.c
===================================================================
--- merge.orig/src/language/stats/binomial.c    2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/binomial.c 2007-06-05 09:18:06.000000000 -0700
@@ -22,13 +22,12 @@
 #include <libpspp/alloc.h>
 
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/variable.h>
 #include <data/value.h>
 #include <data/value-labels.h>
-#include <data/casefilter.h>
 
 #include <libpspp/message.h>
 #include <libpspp/assertion.h>
@@ -89,50 +88,47 @@
   return sig1tailed ;
 }
 
-static void
+static bool
 do_binomial (const struct dictionary *dict,
-            const struct casefile *cf,
+            struct casereader *input,
             const struct binomial_test *bst,
-            struct freq *cat1,
-            struct freq *cat2,
-            const struct casefilter *filter
+            struct freq_mutable *cat1,
+            struct freq_mutable *cat2,
+             enum mv_class exclude
             )
 {
   bool warn = true;
 
   const struct one_sample_test *ost = (const struct one_sample_test *) bst;
   struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, NULL);
 
-  while (casereader_read(r, &c))
+  while (casereader_read(input, &c))
     {
       int v;
-      double w =
-       dict_get_case_weight (dict, &c, &warn);
+      double w = dict_get_case_weight (dict, &c, &warn);
 
       for (v = 0 ; v < ost->n_vars ; ++v )
        {
          const struct variable *var = ost->vars[v];
          const union value *value = case_data (&c, var);
+          int width = var_get_width (var);
 
-         if ( casefilter_variable_missing (filter, &c, var))
+         if (var_is_value_missing (var, value, exclude))
            break;
 
          if ( NULL == cat1[v].value )
            {
-             cat1[v].value = value_dup (value, var_get_width (var));
+             cat1[v].value = value_dup (value, width);
              cat1[v].count = w;
            }
-         else if ( 0 == compare_values (cat1[v].value, value,
-                                        var_get_width (var)))
+         else if ( 0 == compare_values (cat1[v].value, value, width))
            cat1[v].count += w;
          else if ( NULL == cat2[v].value )
            {
-             cat2[v].value = value_dup (value, var_get_width (var));
+             cat2[v].value = value_dup (value, width);
              cat2[v].count = w;
            }
-         else if ( 0 == compare_values (cat2[v].value, value,
-                                        var_get_width (var)))
+         else if ( 0 == compare_values (cat2[v].value, value, width))
            cat2[v].count += w;
          else if ( bst->category1 == SYSMIS)
            msg (ME, _("Variable %s is not dichotomous"), var_get_name (var));
@@ -140,24 +136,23 @@
 
       case_destroy (&c);
     }
-  casereader_destroy (r);
+  return casereader_destroy (input);
 }
 
 
 
 void
 binomial_execute (const struct dataset *ds,
-                 const struct casefile *cf,
-                 struct casefilter *filter,
+                 struct casereader *input,
+                  enum mv_class exclude,
                  const struct npar_test *test)
 {
   int v;
   const struct binomial_test *bst = (const struct binomial_test *) test;
   const struct one_sample_test *ost = (const struct one_sample_test*) test;
 
-  struct freq *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
-  struct freq *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
-  struct tab_table *table ;
+  struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars);
+  struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars);
 
   assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) );
 
@@ -175,95 +170,78 @@
       cat2->value = value_dup (&v, 0);
     }
 
-  do_binomial (dataset_dict(ds), cf, bst, cat1, cat2, filter);
-
-  table = tab_create (7, ost->n_vars * 3 + 1, 0);
-
-  tab_dim (table, tab_natural_dimensions);
-
-  tab_title (table, _("Binomial Test"));
-
-  tab_headers (table, 2, 0, 1, 0);
-
-  tab_box (table, TAL_1, TAL_1, -1, TAL_1,
-          0, 0, table->nc - 1, tab_nr(table) - 1 );
-
-  for (v = 0 ; v < ost->n_vars; ++v)
+  if (do_binomial (dataset_dict(ds), input, bst, cat1, cat2, exclude)) 
     {
-      double n_total, sig;
-      const struct variable *var = ost->vars[v];
-      tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
-
-      /* Titles */
-      tab_text (table, 0, 1 + v * 3, TAB_LEFT,
-               var_to_string (var));
-
-      tab_text (table, 1, 1 + v * 3, TAB_LEFT,
-               _("Group1"));
-
-      tab_text (table, 1, 2 + v * 3, TAB_LEFT,
-               _("Group2"));
-
-      tab_text (table, 1, 3 + v * 3, TAB_LEFT,
-               _("Total"));
-
-      /* Test Prop */
-      tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
-
-      /* Category labels */
-      tab_text (table, 2, 1 + v * 3, TAB_NONE,
-               var_get_value_name (var, cat1[v].value));
-
-      tab_text (table, 2, 2 + v * 3, TAB_NONE,
-               var_get_value_name (var, cat2[v].value));
-
-      /* Observed N */
-      tab_float (table, 3, 1 + v * 3, TAB_NONE,
-                cat1[v].count, 8, 0);
+      struct tab_table *table = tab_create (7, ost->n_vars * 3 + 1, 0);
 
-      tab_float (table, 3, 2 + v * 3, TAB_NONE,
-                cat2[v].count, 8, 0);
+      tab_dim (table, tab_natural_dimensions);
 
-      n_total = cat1[v].count + cat2[v].count;
+      tab_title (table, _("Binomial Test"));
 
+      tab_headers (table, 2, 0, 1, 0);
 
-      tab_float (table, 3, 3 + v * 3, TAB_NONE,
-                n_total, 8, 0);
+      tab_box (table, TAL_1, TAL_1, -1, TAL_1,
+               0, 0, table->nc - 1, tab_nr(table) - 1 );
+
+      for (v = 0 ; v < ost->n_vars; ++v)
+        {
+          double n_total, sig;
+          const struct variable *var = ost->vars[v];
+          tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3);
+
+          /* Titles */
+          tab_text (table, 0, 1 + v * 3, TAB_LEFT, var_to_string (var));
+          tab_text (table, 1, 1 + v * 3, TAB_LEFT, _("Group1"));
+          tab_text (table, 1, 2 + v * 3, TAB_LEFT, _("Group2"));
+          tab_text (table, 1, 3 + v * 3, TAB_LEFT, _("Total"));
+
+          /* Test Prop */
+          tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3);
+
+          /* Category labels */
+          tab_text (table, 2, 1 + v * 3, TAB_NONE,
+                    var_get_value_name (var, cat1[v].value));
+          tab_text (table, 2, 2 + v * 3, TAB_NONE,
+                    var_get_value_name (var, cat2[v].value));
+
+          /* Observed N */
+          tab_float (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, 8, 0);
+          tab_float (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, 8, 0);
+
+          n_total = cat1[v].count + cat2[v].count;
+          tab_float (table, 3, 3 + v * 3, TAB_NONE, n_total, 8, 0);
+
+          /* Observed Proportions */
+          tab_float (table, 4, 1 + v * 3, TAB_NONE,
+                     cat1[v].count / n_total, 8, 3);
+          tab_float (table, 4, 2 + v * 3, TAB_NONE,
+                     cat2[v].count / n_total, 8, 3);
+          tab_float (table, 4, 3 + v * 3, TAB_NONE,
+                     (cat1[v].count + cat2[v].count) / n_total, 8, 2);
+
+          /* Significance */
+          sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p);
+          tab_float (table, 6, 1 + v * 3, TAB_NONE, sig, 8, 3);
+        }
+
+      tab_text (table,  2, 0,  TAB_CENTER, _("Category"));
+      tab_text (table,  3, 0,  TAB_CENTER, _("N"));
+      tab_text (table,  4, 0,  TAB_CENTER, _("Observed Prop."));
+      tab_text (table,  5, 0,  TAB_CENTER, _("Test Prop."));
+
+      tab_text (table,  6, 0,  TAB_CENTER | TAT_PRINTF,
+                _("Exact Sig. (%d-tailed)"),
+                bst->p == 0.5 ? 2: 1);
 
-      /* Observed Proportions */
-
-      tab_float (table, 4, 1 + v * 3, TAB_NONE,
-                cat1[v].count / n_total, 8, 3);
-
-      tab_float (table, 4, 2 + v * 3, TAB_NONE,
-                cat2[v].count / n_total, 8, 3);
-
-      tab_float (table, 4, 3 + v * 3, TAB_NONE,
-                (cat1[v].count + cat2[v].count) / n_total, 8, 2);
-
-
-      /* Significance */
-      sig = calculate_binomial (cat1[v].count, cat2[v].count,
-                                      bst->p);
-
-      tab_float (table, 6, 1 + v * 3, TAB_NONE,
-                sig, 8, 3);
+      tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
+      tab_submit (table);
+    }
+  
+  for (v = 0; v < ost->n_vars; v++) 
+    {
+      free (cat1[v].value);
+      free (cat2[v].value); 
     }
-
-  tab_text (table,  2, 0,  TAB_CENTER, _("Category"));
-  tab_text (table,  3, 0,  TAB_CENTER, _("N"));
-  tab_text (table,  4, 0,  TAB_CENTER, _("Observed Prop."));
-  tab_text (table,  5, 0,  TAB_CENTER, _("Test Prop."));
-
-  tab_text (table,  6, 0,  TAB_CENTER | TAT_PRINTF,
-           _("Exact Sig. (%d-tailed)"),
-           bst->p == 0.5 ? 2: 1);
-
-  tab_vline (table, TAL_2, 2, 0, tab_nr (table) -1);
-
   free (cat1);
-  free (cat2);
-
-  tab_submit (table);
-
+  free (cat2); 
 }
Index: merge/src/language/stats/binomial.h
===================================================================
--- merge.orig/src/language/stats/binomial.h    2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/binomial.h 2007-06-05 09:18:06.000000000 -0700
@@ -36,13 +36,13 @@
 };
 
 
-struct casefile;
+struct casereader;
 struct dataset;
 
 
 void binomial_execute (const struct dataset *, 
-                      const struct casefile *, 
-                      struct casefilter *, 
+                      struct casereader *,
+                       enum mv_class,
                       const struct npar_test *);
 
 #endif
Index: merge/src/language/stats/chisquare.c
===================================================================
--- merge.orig/src/language/stats/chisquare.c   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/chisquare.c        2007-06-05 09:18:06.000000000 
-0700
@@ -17,39 +17,33 @@
    02110-1301, USA. */
 
 #include <config.h>
-#include <libpspp/compiler.h>
-#include <libpspp/assertion.h>
+
+#include <language/stats/chisquare.h>
 
 #include <stdlib.h>
+#include <math.h>
 
 #include <data/case.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
-#include <data/variable.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
-
-#include <libpspp/message.h>
-#include <libpspp/hash.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
+#include <language/stats/freq.h>
+#include <language/stats/npar.h>
 #include <libpspp/alloc.h>
-
-#include <gsl/gsl_cdf.h>
-
+#include <libpspp/assertion.h>
+#include <libpspp/compiler.h>
+#include <libpspp/hash.h>
+#include <libpspp/message.h>
+#include <libpspp/taint.h>
 #include <output/table.h>
-#include <data/value-labels.h>
 
-#include "npar.h"
-#include "chisquare.h"
-#include "freq.h"
-
-#include <math.h>
+#include <gsl/gsl_cdf.h>
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-
-
-
 /* Return a hash table containing the frequency counts of each 
    value of VAR in CF .
    It is the caller's responsibility to free the hash table when 
@@ -57,8 +51,7 @@
 */
 static struct hsh_table *
 create_freq_hash_with_range (const struct dictionary *dict, 
-                            const struct casefile *cf, 
-                            struct casefilter *filter,
+                            struct casereader *input, 
                             const struct variable *var, 
                             double lo, 
                             double hi)
@@ -66,7 +59,6 @@
   bool warn = true;
   float i_d;
   struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, filter);
 
   struct hsh_table *freq_hash = 
     hsh_create (4, compare_freq, hash_freq, 
@@ -87,19 +79,13 @@
       hsh_insert (freq_hash, fr);
     }
 
-  while (casereader_read(r, &c))
+  while (casereader_read (input, &c))
     {
       union value obs_value;
       struct freq **existing_fr;
       struct freq *fr = xmalloc(sizeof  (*fr));
       fr->value = case_data (&c, var);
 
-      if ( casefilter_variable_missing (filter, &c, var))
-       {
-         free (fr);
-         continue;
-       }
-
       fr->count = dict_get_case_weight (dict, &c, &warn);
 
       obs_value.f = trunc (fr->value->f);
@@ -124,43 +110,39 @@
 
       case_destroy (&c);
     }
-  casereader_destroy (r);
-
-  return freq_hash;
+  if (casereader_destroy (input))
+    return freq_hash;
+  else 
+    {
+      hsh_destroy (freq_hash);
+      return NULL;
+    }
 }
 
 
 /* Return a hash table containing the frequency counts of each 
-   value of VAR in CF .
+   value of VAR in INPUT .
    It is the caller's responsibility to free the hash table when 
    no longer required.
 */
 static struct hsh_table *
 create_freq_hash (const struct dictionary *dict, 
-                 const struct casefile *cf, 
-                 struct casefilter *filter, 
+                 struct casereader *input, 
                  const struct variable *var)
 {
   bool warn = true;
   struct ccase c;
-  struct casereader *r = casefile_get_reader (cf, filter);
 
   struct hsh_table *freq_hash = 
     hsh_create (4, compare_freq, hash_freq, 
                free_freq_mutable_hash,
                (void *) var);
 
-  while (casereader_read(r, &c))
+  for (; casereader_read (input, &c); case_destroy (&c))
     {
       struct freq **existing_fr;
       struct freq *fr = xmalloc(sizeof  (*fr));
-      fr->value = case_data (&c, var );
-
-      if ( casefilter_variable_missing (filter, &c, var))
-       {
-         free (fr);
-         continue;
-       }
+      fr->value = case_data (&c, var);
 
       fr->count = dict_get_case_weight (dict, &c, &warn);
 
@@ -175,20 +157,21 @@
           *existing_fr = fr;
           fr->value = value_dup (fr->value, var_get_width (var));
        }
-
-      case_destroy (&c);
     }
-  casereader_destroy (r);
-
-  return freq_hash;
+  if (casereader_destroy (input))
+    return freq_hash;
+  else
+    {
+      hsh_destroy (freq_hash);
+      return NULL;
+    }
 }
 
 
 
 static struct tab_table *
 create_variable_frequency_table (const struct dictionary *dict, 
-                                const struct casefile *cf, 
-                                struct casefilter *filter,
+                                struct casereader *input, 
                                 const struct chisquare_test *test, 
                                 int v, 
                                 struct hsh_table **freq_hash)
@@ -200,7 +183,9 @@
   struct tab_table *table ;
   const struct variable *var =  ost->vars[v];
 
-  *freq_hash = create_freq_hash (dict, cf, filter, var);
+  *freq_hash = create_freq_hash (dict, input, var);
+  if (*freq_hash == NULL)
+    return NULL;
       
   n_cells = hsh_count (*freq_hash);
 
@@ -305,7 +290,8 @@
 {
   const struct one_sample_test *ost = (const struct one_sample_test*) test;
   
-  struct tab_table *table = tab_create (1 + ost->n_vars, 4, 0);
+  struct tab_table *table;
+  table = tab_create (1 + ost->n_vars, 4, 0);
   tab_dim (table, tab_natural_dimensions);
   tab_title (table, _("Test Statistics"));
   tab_headers (table, 1, 0, 1, 0);
@@ -331,20 +317,20 @@
 
 void 
 chisquare_execute (const struct dataset *ds,
-                  const struct casefile *cf, 
-                  struct casefilter *filter,
+                  struct casereader *input,
+                   enum mv_class exclude,
                   const struct npar_test *test)
 {
   const struct dictionary *dict = dataset_dict (ds);
   int v, i;
   struct one_sample_test *ost = (struct one_sample_test *) test;
   struct chisquare_test *cst = (struct chisquare_test *) test;
-  struct tab_table *stats_table = create_stats_table (cst);
   int n_cells = 0;
   double total_expected = 0.0;
 
   double *df = xzalloc (sizeof (*df) * ost->n_vars);
   double *xsq = xzalloc (sizeof (*df) * ost->n_vars);
+  bool ok;
   
   for ( i = 0 ; i < cst->n_expected ; ++i ) 
     total_expected += cst->expected[i];
@@ -355,17 +341,17 @@
        {
          double total_obs = 0.0;
          struct hsh_table *freq_hash = NULL;
+          struct casereader *reader =
+            casereader_create_filter_missing (casereader_clone (input),
+                                              &ost->vars[v], 1, exclude, NULL);
          struct tab_table *freq_table = 
-           create_variable_frequency_table(dict, cf, filter, cst, 
-                                           v, &freq_hash);
+            create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
 
-         struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+         struct freq **ff;
 
          if ( NULL == freq_table ) 
-           {
-             hsh_destroy (freq_hash);
-             continue;
-           }
+            continue;
+          ff = (struct freq **) hsh_sort (freq_hash);
 
          n_cells = hsh_count (freq_hash);
 
@@ -420,12 +406,19 @@
       for ( v = 0 ; v < ost->n_vars ; ++v ) 
        {
          double total_obs = 0.0;
+          struct casereader *reader =
+            casereader_create_filter_missing (casereader_clone (input),
+                                              &ost->vars[v], 1, exclude, NULL);
          struct hsh_table *freq_hash = 
-           create_freq_hash_with_range (dict, cf, filter, ost->vars[v], 
-                                        cst->lo, cst->hi);
+           create_freq_hash_with_range (dict, reader,
+                                         ost->vars[v], cst->lo, cst->hi);
+
+         struct freq **ff;
 
-         struct freq **ff = (struct freq **) hsh_sort (freq_hash);
+          if (freq_hash == NULL)
+            continue;
 
+          ff = (struct freq **) hsh_sort (freq_hash);
          assert ( n_cells == hsh_count (freq_hash));
 
          for ( i = 0 ; i < hsh_count (freq_hash) ; ++i ) 
@@ -473,25 +466,30 @@
 
       tab_submit (freq_table);
     }
+  ok = !taint_has_tainted_successor (casereader_get_taint (input));
+  casereader_destroy (input);
 
-
-  /* Populate the summary statistics table */
-  for ( v = 0 ; v < ost->n_vars ; ++v ) 
+  if (ok) 
     {
-      const struct variable *var = ost->vars[v];
+      struct tab_table *stats_table = create_stats_table (cst);
+      
+      /* Populate the summary statistics table */
+      for ( v = 0 ; v < ost->n_vars ; ++v ) 
+        {
+          const struct variable *var = ost->vars[v];
 
-      tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
+          tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var));
 
-      tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
-      tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
+          tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3);
+          tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0);
 
-      tab_float (stats_table, 1 + v, 3, TAB_NONE, 
-                gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+          tab_float (stats_table, 1 + v, 3, TAB_NONE, 
+                     gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3);
+        }
+      tab_submit (stats_table);
     }
-
+  
   free (xsq);
   free (df);
-
-  tab_submit (stats_table);
 }
 
Index: merge/src/language/stats/chisquare.h
===================================================================
--- merge.orig/src/language/stats/chisquare.h   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/chisquare.h        2007-06-05 09:18:06.000000000 
-0700
@@ -19,11 +19,10 @@
 #if !chisquare_h
 #define chisquare_h 1
 
-#include <config.h>
 #include <stddef.h>
 #include <stdbool.h>
+#include <language/stats/npar.h>
 
-#include "npar.h"
 struct chisquare_test
 {
   struct one_sample_test parent;  
@@ -37,17 +36,18 @@
   int n_expected;
 };
 
-struct casefile;
-struct dictionary ;
+struct casereader;
+struct dictionary;
 struct hsh_table;
+struct dataset;
 
 void chisquare_insert_variables (const struct npar_test *test,
                                 struct hsh_table *variables);
 
 
 void chisquare_execute (const struct dataset *ds, 
-                       const struct casefile *cf, 
-                       struct casefilter *filter,
+                       struct casereader *input,
+                        enum mv_class exclude,
                        const struct npar_test *test);
 
 
Index: merge/src/language/stats/crosstabs.q
===================================================================
--- merge.orig/src/language/stats/crosstabs.q   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/crosstabs.q        2007-06-05 09:18:06.000000000 
-0700
@@ -36,6 +36,8 @@
 #include <stdio.h>
 
 #include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/data-out.h>
 #include <data/dictionary.h>
 #include <data/format.h>
@@ -177,10 +179,10 @@
 static struct pool *pl_col;    /* For column data. */
 
 static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds);
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc_general (const struct ccase *, void *, const struct dataset 
*);
-static bool calc_integer (const struct ccase *, void *, const struct dataset 
*);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, const struct dataset *);
+static void calc_general (struct ccase *, const struct dataset *);
+static void calc_integer (struct ccase *, const struct dataset *);
+static void postcalc (void);
 static void submit (struct tab_table *);
 
 static void format_short (char *s, const struct fmt_spec *fp,
@@ -203,8 +205,10 @@
 static int
 internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds)
 {
-  int i;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
   bool ok;
+  int i;
 
   variables = NULL;
   variables_cnt = 0;
@@ -294,9 +298,28 @@
   else
     write_style = CRS_WR_NONE;
 
-  ok = procedure_with_splits (ds, precalc,
-                              mode == GENERAL ? calc_general : calc_integer,
-                              postcalc, NULL);
+  input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+                                           NULL, NULL);
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    {
+      struct ccase c;
+      
+      precalc (group, ds);
+      
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        {
+          if (mode == GENERAL)
+            calc_general (&c, ds);
+          else
+            calc_integer (&c, ds); 
+        }
+      casereader_destroy (group);
+
+      postcalc ();
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 }
@@ -490,10 +513,16 @@
 static unsigned hash_table_entry (const void *, const void *);
 
 /* Set up the crosstabulation tables for processing. */
-static  void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+static void
+precalc (struct casereader *input, const struct dataset *ds)
 {
-  output_split_file_values (ds, first);
+  struct ccase c;
+
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
   if (mode == GENERAL)
     {
       gen_tab = hsh_create (512, compare_table_entry, hash_table_entry,
@@ -565,18 +594,16 @@
 }
 
 /* Form crosstabulations for general mode. */
-static bool
-calc_general (const struct ccase *c, void *aux UNUSED, const struct dataset 
*ds)
+static void
+calc_general (struct ccase *c, const struct dataset *ds)
 {
-  bool bad_warn = true;
-
   /* Missing values to exclude. */
   enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY
                            : cmd.miss == CRS_INCLUDE ? MV_SYSTEM
                            : MV_NEVER);
 
   /* Case weight. */
-  double weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
+  double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
 
   /* Flattened current table index. */
   int t;
@@ -637,12 +664,10 @@
     next_crosstab:
       local_free (te);
     }
-  
-  return true;
 }
 
-static bool
-calc_integer (const struct ccase *c, void *aux UNUSED, const struct dataset 
*ds)
+static void
+calc_integer (struct ccase *c, const struct dataset *ds)
 {
   bool bad_warn = true;
 
@@ -695,8 +720,6 @@
       
     next_crosstab: ;
     }
-  
-  return true;
 }
 
 /* Compare the table_entry's at A and B and return a strcmp()-type
@@ -764,8 +787,8 @@
                                int *, int *, int *);
 static void make_summary_table (void);
 
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds UNUSED)
+static void
+postcalc (void)
 {
   if (mode == GENERAL)
     {
@@ -801,8 +824,6 @@
   }
   
   hsh_destroy (gen_tab);
-
-  return true;
 }
 
 static void insert_summary (struct tab_table *, int tab_index, double valid);
Index: merge/src/language/stats/descriptives.c
===================================================================
--- merge.orig/src/language/stats/descriptives.c        2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/stats/descriptives.c     2007-06-05 09:18:06.000000000 
-0700
@@ -16,16 +16,14 @@
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA. */
 
-/* FIXME: Many possible optimizations. */
-
 #include <config.h>
 
 #include <limits.h>
 #include <math.h>
 #include <stdlib.h>
 
-#include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/transformations.h>
@@ -180,9 +178,8 @@
 static void setup_z_trns (struct dsc_proc *, struct dataset *);
 
 /* Procedure execution functions. */
-static bool calc_descriptives (const struct ccase *first,
-                               const struct casefile *, void *dsc_, 
-                              const struct dataset *);
+static void calc_descriptives (struct dsc_proc *, struct casereader *,
+                               struct dataset *);
 static void display (struct dsc_proc *dsc);
 
 /* Parser and outline. */
@@ -200,6 +197,9 @@
   size_t i;
   bool ok;
 
+  struct casegrouper *grouper;
+  struct casereader *group;
+
   /* Create and initialize dsc. */
   dsc = xmalloc (sizeof *dsc);
   dsc->vars = NULL;
@@ -316,8 +316,7 @@
             {
               int i;
               
-              if (!parse_variables_const (lexer, dataset_dict (ds), 
-                                         &vars, &var_cnt,
+              if (!parse_variables_const (lexer, dict, &vars, &var_cnt,
                                     PV_APPEND | PV_NO_DUPLICATE | PV_NUMERIC))
                goto error;
 
@@ -413,8 +412,12 @@
     for (i = 0; i < dsc->var_cnt; i++)
       dsc->vars[i].moments = moments_create (dsc->max_moment);
 
-  /* Data pass. */
-  ok = multipass_procedure_with_splits (ds, calc_descriptives, dsc);
+  /* Data pass.  FIXME: error handling. */
+  grouper = casegrouper_create_splits (proc_open (ds), dict);
+  while (casegrouper_get_next_group (grouper, &group)) 
+    calc_descriptives (dsc, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   /* Z-scoring! */
   if (ok && z_cnt)
@@ -689,17 +692,25 @@
 
 /* Calculates and displays descriptive statistics for the cases
    in CF. */
-static bool
-calc_descriptives (const struct ccase *first,
-                   const struct casefile *cf, void *dsc_, 
-                  const struct dataset *ds) 
+static void
+calc_descriptives (struct dsc_proc *dsc, struct casereader *group,
+                   struct dataset *ds) 
 {
-  struct dsc_proc *dsc = dsc_;
-  struct casereader *reader;
+  struct casereader *pass1, *pass2;
   struct ccase c;
   size_t i;
 
-  output_split_file_values (ds, first);
+  if (!casereader_peek (group, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
+  group = casereader_create_filter_weight (group, dataset_dict (ds),
+                                           NULL, NULL);
+
+  casereader_split (group, &pass1, &pass2);
+  if (dsc->max_moment <= MOMENT_MEAN)
+    casereader_destroy (pass2);
 
   for (i = 0; i < dsc->var_cnt; i++)
     {
@@ -715,13 +726,9 @@
   dsc->valid = 0.;
 
   /* First pass to handle most of the work. */
-  for (reader = casefile_get_reader (cf, NULL);
-       casereader_read (reader, &c);
-       case_destroy (&c))
-    {
-      double weight = dict_get_case_weight (dataset_dict (ds), &c, 
&dsc->bad_warn);
-      if (weight <= 0.0) 
-        continue;
+  for (; casereader_read (pass1, &c); case_destroy (&c))
+    {
+      double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
        
       /* Check for missing values. */
       if (listwise_missing (dsc, &c)) 
@@ -737,8 +744,7 @@
           struct dsc_var *dv = &dsc->vars[i];
           double x = case_num (&c, dv->v);
           
-          if (dsc->missing_type != DSC_LISTWISE
-              && var_is_num_missing (dv->v, x, dsc->exclude))
+          if (var_is_num_missing (dv->v, x, dsc->exclude))
             {
               dv->missing += weight;
               continue;
@@ -753,19 +759,15 @@
             dv->max = x;
         }
     }
-  casereader_destroy (reader);
+  if (!casereader_destroy (pass1))
+    return;
 
   /* Second pass for higher-order moments. */
   if (dsc->max_moment > MOMENT_MEAN) 
     {
-      for (reader = casefile_get_reader (cf, NULL);
-           casereader_read (reader, &c);
-           case_destroy (&c))
+      for (; casereader_read (pass2, &c); case_destroy (&c))
         {
-          double weight = dict_get_case_weight (dataset_dict (ds), &c, 
-                                               &dsc->bad_warn);
-          if (weight <= 0.0)
-            continue;
+          double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL);
       
           /* Check for missing values. */
           if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c))
@@ -776,17 +778,17 @@
               struct dsc_var *dv = &dsc->vars[i];
               double x = case_num (&c, dv->v);
           
-              if (dsc->missing_type != DSC_LISTWISE
-                  && var_is_num_missing (dv->v, x, dsc->exclude))
+              if (var_is_num_missing (dv->v, x, dsc->exclude))
                 continue;
 
               if (dv->moments != NULL)
                 moments_pass_two (dv->moments, x, weight);
             }
         }
-      casereader_destroy (reader);
+      if (!casereader_destroy (pass2))
+        return;
     }
-  
+
   /* Calculate results. */
   for (i = 0; i < dsc->var_cnt; i++)
     {
@@ -825,8 +827,6 @@
 
   /* Output results. */
   display (dsc);
-
-  return true;
 }
 
 /* Returns true if any of the descriptives variables in DSC's
Index: merge/src/language/stats/examine.q
===================================================================
--- merge.orig/src/language/stats/examine.q     2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/examine.q  2007-06-05 09:18:06.000000000 -0700
@@ -26,7 +26,8 @@
 #include <stdlib.h>
 
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/value-labels.h>
@@ -152,8 +153,8 @@
 
 
 /* Per Split function */
-static bool run_examine (const struct ccase *,
-                        const struct casefile *cf, void *cmd_, const struct 
dataset *);
+static void run_examine (struct cmd_examine *, struct casereader *,
+                         struct dataset *);
 
 static void output_examine (void);
 
@@ -193,6 +194,8 @@
 int
 cmd_examine (struct lexer *lexer, struct dataset *ds)
 {
+  struct casegrouper *grouper;
+  struct casereader *group;
   bool ok;
 
   subc_list_double_create (&percentile_list);
@@ -222,7 +225,11 @@
       subc_list_double_push (&percentile_list, 75);
     }
 
-  ok = multipass_procedure_with_splits (ds, run_examine, &cmd);
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    run_examine (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   if ( totals )
     {
@@ -627,9 +634,6 @@
 
 
 
-static bool bad_weight_warn = true;
-
-
 /* Perform calculations for the sub factors */
 void
 factor_calc (const struct ccase *c, int case_no, double weight,
@@ -706,23 +710,28 @@
     }
 }
 
-static bool
-run_examine (const struct ccase *first, const struct casefile *cf,
-           void *cmd_, const struct dataset *ds)
+static void
+run_examine (struct cmd_examine *cmd, struct casereader *input,
+             struct dataset *ds)
 {
   struct dictionary *dict = dataset_dict (ds);
-  struct casereader *r;
+  casenumber case_no;
   struct ccase c;
   int v;
-
-  const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+  bool ok;
 
   struct factor *fctr;
 
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
+
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
+  input = casereader_create_counter (input, &case_no, 0);
 
   /* Make sure we haven't got rubbish left over from a
-     previous split */
+     previous split. */
   fctr = factors;
   while (fctr)
     {
@@ -738,15 +747,10 @@
   for ( v = 0 ; v < n_dependent_vars ; ++v )
     metrics_precalc (&totals[v]);
 
-  for (r = casefile_get_reader (cf, NULL);
-      casereader_read (r, &c) ;
-      case_destroy (&c) )
+  for (; casereader_read (input, &c); case_destroy (&c))
     {
-      int case_missing=0;
-      const int case_no = casereader_cnum (r);
-
-      const double weight =
-       dict_get_case_weight (dict, &c, &bad_weight_warn);
+      int case_missing = 0;
+      const double weight = dict_get_case_weight (dict, &c, NULL);
 
       if ( cmd->miss == XMN_LISTWISE )
        {
@@ -787,6 +791,7 @@
 
       factor_calc (&c, case_no, weight, case_missing);
     }
+  ok = casereader_destroy (input);
 
   for ( v = 0 ; v < n_dependent_vars ; ++v)
     {
@@ -882,7 +887,8 @@
       fctr = fctr->next;
     }
 
-  output_examine ();
+  if (ok)
+    output_examine ();
 
 
   if ( totals )
@@ -893,8 +899,6 @@
          metrics_destroy (&totals[i]);
        }
     }
-
-  return true;
 }
 
 
Index: merge/src/language/stats/flip.c
===================================================================
--- merge.orig/src/language/stats/flip.c        2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/flip.c     2007-06-05 09:18:06.000000000 -0700
@@ -27,9 +27,9 @@
 #include <sys/types.h>
 #endif
 
-#include <data/case-sink.h>
-#include <data/case-source.h>
 #include <data/case.h>
+#include <data/casereader.h>
+#include <data/casereader-provider.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/settings.h>
@@ -42,7 +42,6 @@
 #include <libpspp/array.h>
 #include <libpspp/assertion.h>
 #include <libpspp/message.h>
-#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/pool.h>
 #include <libpspp/str.h>
@@ -70,8 +69,6 @@
     int case_cnt;               /* Pre-flip case count. */
     size_t case_size;           /* Post-flip bytes per case. */
 
-    union value *output_buf;            /* Case output buffer. */
-
     struct variable *new_names; /* Variable containing new variable names. */
     struct varname *new_names_head; /* First new variable. */
     struct varname *new_names_tail; /* Last new variable. */
@@ -82,22 +79,23 @@
     bool error;                 /* Error reading temporary file? */
   };
 
+static const struct casereader_class flip_casereader_class;
+
 static void destroy_flip_pgm (struct flip_pgm *);
-static struct case_sink *flip_sink_create (struct dataset *ds, struct flip_pgm 
*);
-static struct case_source *flip_source_create (struct flip_pgm *);
 static bool flip_file (struct flip_pgm *);
-static int build_dictionary (struct dictionary *, struct flip_pgm *);
-
-static const struct case_source_class flip_source_class;
-static const struct case_sink_class flip_sink_class;
+static bool build_dictionary (struct dictionary *, struct flip_pgm *);
+static bool write_flip_case (struct flip_pgm *, const struct ccase *);
 
 /* Parses and executes FLIP. */
 int
 cmd_flip (struct lexer *lexer, struct dataset *ds)
 {
-  struct flip_pgm *flip;
-  struct case_sink *sink;
   struct dictionary *dict = dataset_dict (ds);
+  struct flip_pgm *flip;
+  struct casereader *input, *reader;
+  union value *output_buf;
+  struct ccase c;
+  size_t i;
   bool ok;
 
   if (proc_make_temporary_transformations_permanent (ds))
@@ -144,8 +142,6 @@
 
   if (flip->new_names)
     {
-      size_t i;
-      
       for (i = 0; i < flip->var_cnt; i++)
        if (flip->var[i] == flip->new_names)
          {
@@ -155,20 +151,46 @@
          }
     }
 
+  output_buf = pool_nalloc (flip->pool,
+                                  flip->var_cnt, sizeof *output_buf);
+
+  flip->file = pool_tmpfile (flip->pool);
+  if (flip->file == NULL)
+    {
+      msg (SE, _("Could not create temporary file for FLIP."));
+      goto error;
+    }
+
+  /* Write variable names as first case. */
+  for (i = 0; i < flip->var_cnt; i++) 
+    buf_copy_str_rpad (output_buf[i].s, MAX_SHORT_STRING,
+                       var_get_name (flip->var[i]));
+  if (fwrite (output_buf, sizeof *output_buf,
+              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
+    {
+      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
+      goto error;
+    }
+
+  flip->case_cnt = 1;
+
   /* Read the active file into a flip_sink. */
-  flip->case_cnt = 0;
   proc_make_temporary_transformations_permanent (ds);
-  sink = flip_sink_create (ds, flip);
-  if (sink == NULL)
-    goto error;
-  proc_set_sink (ds, sink);
-  flip->new_names_tail = NULL;
-  ok = procedure (ds,NULL, NULL);
+  proc_discard_output (ds);
+
+  input = proc_open (ds);
+  while (casereader_read (input, &c)) 
+    {
+      write_flip_case (flip, &c);
+      case_destroy (&c);
+    }
+  ok = casereader_destroy (input);
+  ok = proc_commit (ds) && ok;
 
   /* Flip the data we read. */
-  if (!flip_file (flip)) 
+  if (!ok || !flip_file (flip)) 
     {
-      discard_variables (ds);
+      proc_discard_active_file (ds);
       goto error;
     }
 
@@ -176,15 +198,17 @@
   dict_clear (dict);
   if (!build_dictionary (dict, flip))
     {
-      discard_variables (ds);
+      proc_discard_active_file (ds);
       goto error;
     }
   flip->case_size = dict_get_case_size (dict);
 
   /* Set up flipped data for reading. */
-  proc_set_source (ds, flip_source_create (flip));
-
-  return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+  reader = casereader_create_sequential (NULL, dict_get_next_value_idx (dict),
+                                         flip->case_cnt,
+                                         &flip_casereader_class, flip);
+  proc_set_active_file_data (ds, reader);
+  return lex_end_of_command (lexer);
 
  error:
   destroy_flip_pgm (flip);
@@ -251,7 +275,7 @@
 }
 
 /* Make a new dictionary for all the new variable names. */
-static int
+static bool
 build_dictionary (struct dictionary *dict, struct flip_pgm *flip)
 {
   dict_create_var_assert (dict, "CASE_LBL", 8);
@@ -263,7 +287,7 @@
       if (flip->case_cnt > 99999)
        {
          msg (SE, _("Cannot create more than 99999 variable names."));
-         return 0;
+         return false;
        }
       
       for (i = 0; i < flip->case_cnt; i++)
@@ -281,54 +305,17 @@
 
       for (v = flip->new_names_head; v; v = v->next)
         if (!make_new_var (dict, v->name))
-          return 0;
+          return false;
     }
   
-  return 1;
+  return true;
 }
      
-/* Creates a flip sink based on FLIP. */
-static struct case_sink *
-flip_sink_create (struct dataset *ds, struct flip_pgm *flip) 
-{
-  size_t i;
-
-  flip->output_buf = pool_nalloc (flip->pool,
-                                  flip->var_cnt, sizeof *flip->output_buf);
-
-  flip->file = pool_tmpfile (flip->pool);
-  if (flip->file == NULL)
-    {
-      msg (SE, _("Could not create temporary file for FLIP: %s."),
-           strerror (errno));
-      return NULL;
-    }
-
-  /* Write variable names as first case. */
-  for (i = 0; i < flip->var_cnt; i++) 
-    buf_copy_str_rpad (flip->output_buf[i].s, MAX_SHORT_STRING,
-                       var_get_name (flip->var[i]));
-  if (fwrite (flip->output_buf, sizeof *flip->output_buf,
-              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
-    {
-      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
-      return NULL;
-    }
-
-  flip->case_cnt = 1;
-
-  return create_case_sink (&flip_sink_class,
-                          dataset_dict (ds),
-                          dataset_get_casefile_factory (ds),
-                          flip);
-}
-
 /* Writes case C to the FLIP sink.
    Returns true if successful, false if an I/O error occurred. */
 static bool
-flip_sink_write (struct case_sink *sink, const struct ccase *c)
+write_flip_case (struct flip_pgm *flip, const struct ccase *c)
 {
-  struct flip_pgm *flip = sink->aux;
   size_t i;
   
   flip->case_cnt++;
@@ -377,14 +364,7 @@
         }
       else
         out = SYSMIS;
-      flip->output_buf[i].f = out;
-    }
-         
-  if (fwrite (flip->output_buf, sizeof *flip->output_buf,
-              flip->var_cnt, flip->file) != (size_t) flip->var_cnt) 
-    {
-      msg (SE, _("Error writing FLIP file: %s."), strerror (errno));
-      return false; 
+      fwrite (&out, sizeof out, 1, flip->file);
     }
   return true;
 }
@@ -511,57 +491,39 @@
   return true;
 }
 
-/* FLIP sink class. */
-static const struct case_sink_class flip_sink_class = 
-  {
-    "FLIP",
-    NULL,
-    flip_sink_write,
-    NULL,
-    NULL,
-  };
-
-/* Creates and returns a FLIP source based on PGM,
-   which should have already been used as a sink. */
-static struct case_source *
-flip_source_create (struct flip_pgm *pgm)
-{
-  return create_case_source (&flip_source_class, pgm);
-}
-
 /* Reads one case into C.
    Returns true if successful, false at end of file or if an
    I/O error occurred. */
 static bool
-flip_source_read (struct case_source *source, struct ccase *c)
+flip_casereader_read (struct casereader *reader UNUSED, void *flip_,
+                      struct ccase *c)
 {
-  struct flip_pgm *flip = source->aux;
+  struct flip_pgm *flip = flip_;
   size_t i;
 
   if (flip->error || flip->cases_read >= flip->var_cnt)
     return false;
-  
-  if (flip->input_buf == NULL)
-    flip->input_buf = pool_nmalloc (flip->pool,
-                                    flip->case_cnt, sizeof *flip->input_buf);
-
-  if (fread (flip->input_buf, sizeof *flip->input_buf, flip->case_cnt,
-             flip->file) != flip->case_cnt) 
-    {
-      if (ferror (flip->file))
-        msg (SE, _("Error reading FLIP temporary file: %s."),
-             strerror (errno));
-      else if (feof (flip->file))
-        msg (SE, _("Unexpected end of file reading FLIP temporary file."));
-      else
-        NOT_REACHED ();
-      flip->error = true;
-      return false;
-    }
-
-  for (i = 0; i < flip->case_cnt; i++)
-    case_data_rw_idx (c, i)->f = flip->input_buf[i].f;
 
+  case_create (c, flip->case_cnt);
+  for (i = 0; i < flip->case_cnt; i++) 
+    {
+      double in;
+      if (fread (&in, sizeof in, 1, flip->file) != 1)
+        {
+          case_destroy (c);
+          if (ferror (flip->file))
+            msg (SE, _("Error reading FLIP temporary file: %s."),
+                 strerror (errno));
+          else if (feof (flip->file))
+            msg (SE, _("Unexpected end of file reading FLIP temporary file."));
+          else
+            NOT_REACHED ();
+          flip->error = true;
+          return false;
+        }
+      case_data_rw_idx (c, i)->f = in;
+    }
+  
   flip->cases_read++;
 
   return true;
@@ -570,19 +532,19 @@
 /* Destroys the source.
    Returns true if successful read, false if an I/O occurred
    during destruction or previously. */
-static bool
-flip_source_destroy (struct case_source *source)
+static void
+flip_casereader_destroy (struct casereader *reader UNUSED, void *flip_)
 {
-  struct flip_pgm *flip = source->aux;
-  bool ok = !flip->error;
+  struct flip_pgm *flip = flip_;
+  if (flip->error)
+    casereader_force_error (reader);
   destroy_flip_pgm (flip);
-  return ok;
 }
 
-static const struct case_source_class flip_source_class = 
+static const struct casereader_class flip_casereader_class = 
   {
-    "FLIP",
-    NULL,
-    flip_source_read,
-    flip_source_destroy
+    flip_casereader_read,
+    flip_casereader_destroy,
+    NULL,
+    NULL,
   };
Index: merge/src/language/stats/frequencies.q
===================================================================
--- merge.orig/src/language/stats/frequencies.q 2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/frequencies.q      2007-06-05 09:18:06.000000000 
-0700
@@ -29,6 +29,8 @@
 #include <gsl/gsl_histogram.h>
 
 #include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/format.h>
 #include <data/procedure.h>
@@ -45,7 +47,6 @@
 #include <libpspp/hash.h>
 #include <libpspp/magic.h>
 #include <libpspp/message.h>
-#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/pool.h>
 #include <libpspp/str.h>
@@ -271,9 +272,9 @@
 
 static void calc_stats (const struct variable *v, double d[frq_n_stats]);
 
-static void precalc (const struct ccase *, void *, const struct dataset *);
-static bool calc (const struct ccase *, void *, const struct dataset *);
-static bool postcalc (void *, const struct dataset *);
+static void precalc (struct casereader *, struct dataset *);
+static void calc (const struct ccase *, const struct dataset *);
+static void postcalc (void);
 
 static void postprocess_freq_tab (const struct variable *);
 static void dump_full (const struct variable *);
@@ -318,8 +319,10 @@
 static int
 internal_cmd_frequencies (struct lexer *lexer, struct dataset *ds)
 {
-  int i;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
   bool ok;
+  int i;
 
   n_percentiles = 0;
   percentiles = NULL;
@@ -383,7 +386,21 @@
   
 
   /* Do it! */
-  ok = procedure_with_splits (ds, precalc, calc, postcalc, NULL);
+  input = casereader_create_filter_weight (proc_open (ds), dataset_dict (ds),
+                                           NULL, NULL);
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  for (; casegrouper_get_next_group (grouper, &group);
+       casereader_destroy (group)) 
+    {
+      struct ccase c;
+      
+      precalc (group, ds);
+      for (; casereader_read (group, &c); case_destroy (&c)) 
+        calc (&c, ds);
+      postcalc ();
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   free_frequencies(&cmd);
 
@@ -496,14 +513,11 @@
 }
 
 /* Add data from case C to the frequency table. */
-static bool
-calc (const struct ccase *c, void *aux UNUSED, const struct dataset *ds)
+static void
+calc (const struct ccase *c, const struct dataset *ds)
 {
-  double weight;
+  double weight = dict_get_case_weight (dataset_dict (ds), c, NULL);
   size_t i;
-  bool bad_warn = true;
-
-  weight = dict_get_case_weight (dataset_dict (ds), c, &bad_warn);
 
   for (i = 0; i < n_variables; i++)
     {
@@ -530,7 +544,8 @@
                  struct freq *fp = pool_alloc (gen_pool, sizeof *fp);
                   fp->count = weight;
                   fp->value = pool_clone (gen_pool,
-                                      val, MAX (MAX_SHORT_STRING, vf->width));
+                                          val,
+                                          MAX (MAX_SHORT_STRING, vf->width));
                   *fpp = fp;
                }
            }
@@ -552,17 +567,20 @@
           NOT_REACHED ();
        }
     }
-  return true;
 }
 
 /* Prepares each variable that is the target of FREQUENCIES by setting
    up its hash table. */
 static void
-precalc (const struct ccase *first, void *aux UNUSED, const struct dataset *ds)
+precalc (struct casereader *input, struct dataset *ds)
 {
+  struct ccase c;
   size_t i;
 
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
 
   pool_destroy (gen_pool);
   gen_pool = pool_create ();
@@ -590,8 +608,8 @@
 
 /* Finishes up with the variables after frequencies have been
    calculated.  Displays statistics, percentiles, ... */
-static bool
-postcalc (void *aux UNUSED, const struct dataset *ds  UNUSED)
+static void
+postcalc (void)
 {
   size_t i;
 
@@ -666,8 +684,6 @@
       cleanup_freq_tab (v);
 
     }
-
-  return true;
 }
 
 /* Returns the comparison function that should be used for
Index: merge/src/language/stats/npar-summary.c
===================================================================
--- merge.orig/src/language/stats/npar-summary.c        2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/stats/npar-summary.c     2007-06-05 09:18:06.000000000 
-0700
@@ -18,12 +18,11 @@
 
 #include <config.h>
 #include <output/table.h>
+#include <data/casereader.h>
 #include <libpspp/hash.h>
 #include <data/variable.h>
 #include "npar-summary.h"
 #include <math/moments.h>
-#include <data/casefile.h>
-#include <data/casefilter.h>
 #include <data/case.h>
 #include <data/dictionary.h>
 #include <math.h>
@@ -35,38 +34,38 @@
 
 void
 npar_summary_calc_descriptives (struct descriptives *desc,
-                               const struct casefile *cf,
-                               struct casefilter *filter, 
+                               struct casereader *input,
                                const struct dictionary *dict,
                                const struct variable *const *vv, 
-                               int n_vars UNUSED)
+                               int n_vars UNUSED,
+                                enum mv_class filter)
 {
   int i = 0;
   while (*vv)
     {
-      bool warn = true;
       double minimum = DBL_MAX;
       double maximum = -DBL_MAX;
       double var;
       struct moments1 *moments = moments1_create (MOMENT_VARIANCE);
-      struct casereader *r = casefile_get_reader (cf, filter);
       struct ccase c;
       const struct variable *v = *vv++;
+      struct casereader *pass;
 
-      while (casereader_read(r, &c))
+      pass = casereader_clone (input);
+      pass = casereader_create_filter_missing (pass,
+                                               (struct variable **) &v, 1,
+                                               filter, NULL);
+      pass = casereader_create_filter_weight (pass, dict, NULL, NULL);
+      while (casereader_read(pass, &c))
        {
-         const union value *val = case_data (&c, v);
-         double w = dict_get_case_weight (dict, &c, &warn);
-
-         if ( ! casefilter_variable_missing (filter, &c, v ))
-           {
-             minimum = MIN (minimum, val->f);
-             maximum = MAX (maximum, val->f);
-             moments1_add (moments, val->f, w); 
-           }
+          double val = case_num (&c, v);
+          double w = dict_get_case_weight (dict, &c, NULL);
+          minimum = MIN (minimum, val);
+          maximum = MAX (maximum, val);
+          moments1_add (moments, val, w); 
          case_destroy (&c);
        }
-      casereader_destroy (r);
+      casereader_destroy (pass);
 
       moments1_calculate (moments, 
                          &desc[i].n, 
@@ -83,6 +82,7 @@
       
       i++;
     }
+  casereader_destroy (input);
 }
 
 
Index: merge/src/language/stats/npar-summary.h
===================================================================
--- merge.orig/src/language/stats/npar-summary.h        2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/stats/npar-summary.h     2007-06-05 09:18:06.000000000 
-0700
@@ -22,9 +22,8 @@
 #include <config.h>
 
 struct variable ;
-struct casefile ;
+struct casereader ;
 struct dictionary;
-struct casefilter;
 
 struct descriptives
 {
@@ -36,11 +35,11 @@
 };
 
 void npar_summary_calc_descriptives (struct descriptives *desc,
-                                    const struct casefile *cf,
-                                    struct casefilter *filter,
+                                    struct casereader *input,
                                     const struct dictionary *dict,
                                     const struct variable *const *vv, 
-                                    int n_vars);
+                                    int n_vars,
+                                     enum mv_class filter);
 
 
 void do_summary_box (const struct descriptives *desc, 
Index: merge/src/language/stats/npar.h
===================================================================
--- merge.orig/src/language/stats/npar.h        2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/npar.h     2007-06-05 09:18:06.000000000 -0700
@@ -19,18 +19,25 @@
 #if !npar_h
 #define npar_h 1
 
-typedef const struct variable *var_ptr;
-typedef var_ptr variable_pair[2];
+#include <stddef.h>
+#include <data/missing-values.h>
+
+#include <stddef.h>
+#include <data/missing-values.h>
+ 
+typedef struct variable *variable_pair[2];
 
 struct hsh_table;
 struct const_hsh_table;
-struct casefilter ;
+struct casefilter;
+struct casereader;
+struct dataset;
 
 struct npar_test
 {
   void (*execute) (const struct dataset *, 
-                  const struct casefile *, 
-                  struct casefilter *,
+                  struct casereader *,
+                   enum mv_class exclude,
                   const struct npar_test *
                   );
 
Index: merge/src/language/stats/npar.q
===================================================================
--- merge.orig/src/language/stats/npar.q        2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/npar.q     2007-06-05 09:18:06.000000000 -0700
@@ -20,23 +20,25 @@
 
 #include <config.h>
 
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <language/command.h>
-#include <data/procedure.h>
-#include <libpspp/pool.h>
-#include <libpspp/hash.h>
+#include <language/stats/npar.h>
+
+#include <math.h>
 
-#include <data/casefilter.h>
 #include <data/case.h>
-#include <data/casefile.h>
-#include <math/moments.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
-#include <language/stats/chisquare.h>
+#include <data/procedure.h>
+#include <language/command.h>
+#include <language/lexer/lexer.h>
+#include <language/lexer/variable-parser.h>
 #include <language/stats/binomial.h>
-#include <math.h>
+#include <language/stats/chisquare.h>
+#include <libpspp/hash.h>
+#include <libpspp/pool.h>
+#include <libpspp/taint.h>
+#include <math/moments.h>
 
-#include "npar.h"
 #include "npar-summary.h"
 
 #include "gettext.h"
@@ -75,7 +77,7 @@
                                       (those mentioned on ANY subcommand */
   int n_vars; /* Number of variables in vv */
 
-  struct casefilter *filter; /* The missing value filter */
+  enum mv_class filter;    /* Missing values to filter. */
 
   bool descriptives;       /* Descriptive statistics should be calculated */
   bool quartiles;          /* Quartiles should be calculated */
@@ -84,13 +86,12 @@
 void one_sample_insert_variables (const struct npar_test *test,
                                  struct const_hsh_table *variables);
 
-static bool 
-npar_execute(const struct ccase *first UNUSED,
-            const struct casefile *cf, void *aux, 
+static void
+npar_execute(struct casereader *input,
+             const struct npar_specs *specs,
             const struct dataset *ds)
 {
   int t;
-  const struct npar_specs *specs = aux;
   struct descriptives *summary_descriptives = NULL;
 
   for ( t = 0 ; t < specs->n_tests; ++t ) 
@@ -101,7 +102,7 @@
          msg (SW, _("NPAR subcommand not currently implemented."));
          continue;
        }
-      test->execute (ds, cf, specs->filter, test);
+      test->execute (ds, casereader_clone (input), specs->filter, test);
     }
 
   if ( specs->descriptives )
@@ -109,21 +110,21 @@
       summary_descriptives = xnmalloc (sizeof (*summary_descriptives), 
                                       specs->n_vars);
 
-      npar_summary_calc_descriptives (summary_descriptives, cf, 
-                                     specs->filter,
+      npar_summary_calc_descriptives (summary_descriptives,
+                                      casereader_clone (input), 
                                      dataset_dict (ds),
-                                     specs->vv, specs->n_vars);
+                                     specs->vv, specs->n_vars,
+                                      specs->filter);
     }
 
-  if ( specs->descriptives || specs->quartiles ) 
+  if ( (specs->descriptives || specs->quartiles)
+       && !taint_has_tainted_successor (casereader_get_taint (input)) ) 
     do_summary_box (summary_descriptives, specs->vv, specs->n_vars );
 
   free (summary_descriptives);
-  
-  return true;
+  casereader_destroy (input);
 }
 
-
 int
 cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
 {
@@ -131,6 +132,9 @@
   int i;
   struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0};
   struct const_hsh_table *var_hash;
+  struct casegrouper *grouper;
+  struct casereader *input, *group;
+  
   npar_specs.pool = pool_create ();
 
   var_hash = const_hsh_create_pool (npar_specs.pool, 0, 
@@ -179,17 +183,20 @@
        }
     }
 
-  npar_specs.filter = 
-    casefilter_create (cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM, 0, 0);
-
-  if ( cmd.miss == NPAR_LISTWISE ) 
-    casefilter_add_variables (npar_specs.filter, 
-                             npar_specs.vv, 
-                             npar_specs.n_vars);
-
-  ok = multipass_procedure_with_splits (ds, npar_execute, &npar_specs);
+  npar_specs.filter = cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM;
 
-  casefilter_destroy (npar_specs.filter);
+  input = proc_open (ds);
+  if ( cmd.miss == NPAR_LISTWISE )
+    input = casereader_create_filter_missing (input,
+                                              (struct variable **) 
npar_specs.vv,
+                                              npar_specs.n_vars,
+                                              npar_specs.filter, NULL);
+
+  grouper = casegrouper_create_splits (input, dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group))
+    npar_execute (group, &npar_specs, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   const_hsh_destroy (var_hash);
 
Index: merge/src/language/stats/oneway.q
===================================================================
--- merge.orig/src/language/stats/oneway.q      2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/oneway.q   2007-06-05 09:18:06.000000000 -0700
@@ -25,12 +25,12 @@
 #include <stdlib.h>
 
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/value-labels.h>
 #include <data/variable.h>
-#include <data/casefilter.h>
 #include <language/command.h>
 #include <language/dictionary/split-file.h>
 #include <language/lexer/lexer.h>
@@ -39,9 +39,9 @@
 #include <libpspp/hash.h>
 #include <libpspp/magic.h>
 #include <libpspp/message.h>
-#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/str.h>
+#include <libpspp/taint.h>
 #include <math/group-proc.h>
 #include <math/group.h>
 #include <math/levene.h>
@@ -65,9 +65,6 @@
 /* (declarations) */
 /* (functions) */
 
-static bool bad_weight_warn = true;
-
-
 static struct cmd_oneway cmd;
 
 /* The independent variable */
@@ -89,9 +86,8 @@
 static int ostensible_number_of_groups = -1;
 
 
-static bool run_oneway(const struct ccase *first,
-                       const struct casefile *cf, 
-                      void *_mode, const struct dataset *);
+static void run_oneway (struct cmd_oneway *, struct casereader *, 
+                        const struct dataset *);
 
 
 /* Routines to show the output tables */
@@ -113,6 +109,8 @@
 int
 cmd_oneway (struct lexer *lexer, struct dataset *ds)
 {
+  struct casegrouper *grouper;
+  struct casereader *group;
   int i;
   bool ok;
 
@@ -138,7 +136,12 @@
        }
     }
 
-  ok = multipass_procedure_with_splits (ds, run_oneway, &cmd);
+  /* Data pass.  FIXME: error handling. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    run_oneway (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   free (vars);
   free_oneway (&cmd);
@@ -887,17 +890,23 @@
   free (value);
 }
 
-static bool
-run_oneway(const struct ccase *first, const struct casefile *cf, 
-          void *cmd_, const struct dataset *ds)
+static void
+run_oneway (struct cmd_oneway *cmd,
+            struct casereader *input, 
+            const struct dataset *ds)
 {
-  struct casereader *r;
+  struct taint *taint;
+  struct dictionary *dict = dataset_dict (ds);
+  enum mv_class exclude;
+  struct casereader *reader;
   struct ccase c;
-  struct casefilter *filter = NULL;
 
-  struct cmd_oneway *cmd = (struct cmd_oneway *) cmd_;
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
 
-  output_split_file_values (ds, first);
+  taint = taint_clone (casereader_get_taint (input));
 
   global_group_hash = hsh_create(4, 
                                 (hsh_compare_func *) compare_values,
@@ -907,31 +916,25 @@
 
   precalc(cmd);
 
-  filter = casefilter_create ( (cmd->incl != ONEWAY_INCLUDE
-                                ? MV_ANY : MV_SYSTEM), 
-                              vars, n_vars );
-
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
+  exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
+  input = casereader_create_filter_missing (input, &indep_var, 1,
+                                            exclude, NULL);
+  if (cmd->miss == ONEWAY_LISTWISE)
+    input = casereader_create_filter_missing (input, vars, n_vars,
+                                              exclude, NULL);
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+  reader = casereader_clone (input);
+  for (; casereader_read (reader, &c); case_destroy (&c)) 
     {
       size_t i;
 
-      const double weight = 
-       dict_get_case_weight (dataset_dict (ds), &c, &bad_weight_warn);
-
-      const union value *indep_val;
-      void **p;
+      const double weight = dict_get_case_weight (dict, &c, NULL);
       
-      if ( casefilter_variable_missing (filter, &c, indep_var))
-       continue;
-
-      indep_val = case_data (&c, indep_var);
-      p = hsh_probe (global_group_hash, indep_val);
+      const union value *indep_val = case_data (&c, indep_var);
+      void **p = hsh_probe (global_group_hash, indep_val);
       if (*p == NULL)
         *p = value_dup (indep_val, var_get_width (indep_var));
-         
-      hsh_insert ( global_group_hash, (void *) indep_val );
 
       for ( i = 0 ; i < n_vars ; ++i ) 
        {
@@ -960,7 +963,7 @@
              hsh_insert ( group_hash, (void *) gs );
            }
 
-         if (! casefilter_variable_missing (filter, &c, v))
+         if (!var_is_value_missing (v, val, exclude))
            {
              struct group_statistics *totals = &gp->ugs;
 
@@ -989,24 +992,21 @@
        }
   
     }
-
-  casereader_destroy (r);
+  casereader_destroy (reader);
 
   postcalc(cmd);
 
   
   if ( stat_tables & STAT_HOMO ) 
-    levene (dataset_dict (ds), cf, indep_var, n_vars, vars, 
-           filter);
+    levene (dict, casereader_clone (input), indep_var, n_vars, vars, exclude);
 
-  casefilter_destroy (filter);
+  casereader_destroy (input);
 
   ostensible_number_of_groups = hsh_count (global_group_hash);
 
-
-  output_oneway();
-
-  return true;
+  if (!taint_has_tainted_successor (taint))
+    output_oneway();
+  taint_destroy (taint);
 }
 
 
Index: merge/src/language/stats/rank.q
===================================================================
--- merge.orig/src/language/stats/rank.q        2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/rank.q     2007-06-05 09:18:06.000000000 -0700
@@ -18,27 +18,28 @@
 
 #include <config.h>
 
-#include "sort-criteria.h"
+#include <limits.h>
+#include <math.h>
 
 #include <data/dictionary.h>
 #include <data/format.h>
 #include <data/missing-values.h>
 #include <data/procedure.h>
 #include <data/variable.h>
+#include <data/case-ordering.h>
 #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/storage-stream.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
 #include <language/command.h>
 #include <language/stats/sort-criteria.h>
-#include <limits.h>
 #include <libpspp/compiler.h>
+#include <libpspp/taint.h>
 #include <math/sort.h>
 #include <output/table.h>
 #include <output/manager.h>
 
 #include <gsl/gsl_cdf.h>
-#include <math.h>
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
@@ -152,7 +153,7 @@
 static struct rank_spec *rank_specs;
 static size_t n_rank_specs;
 
-static struct sort_criteria *sc;
+static struct case_ordering *sc;
 
 static const struct variable **group_vars;
 static size_t n_group_vars;
@@ -165,14 +166,14 @@
 
 static struct cmd_rank cmd;
 
-static struct casefile *rank_sorted_casefile (struct casefile *cf,
-                                             const struct sort_criteria *,
-                                             const struct dictionary *,
-                                             const struct rank_spec *rs,
-                                             int n_rank_specs,
-                                             int idx,
-                                             const struct missing_values *miss
-                                             );
+static void rank_sorted_file (struct casereader *, 
+                              struct casewriter *,
+                              const struct dictionary *,
+                              const struct rank_spec *rs, 
+                              int n_rank_specs,
+                              int idx,
+                              struct variable *rank_var);
+
 static const char *
 fraction_name(void)
 {
@@ -232,69 +233,56 @@
 }
 
 
-static bool
-rank_cmd (struct dataset *ds, const struct sort_criteria *sc,
+static bool 
+rank_cmd (struct dataset *ds, const struct case_ordering *sc, 
          const struct rank_spec *rank_specs, int n_rank_specs)
 {
-  struct sort_criteria criteria;
-  bool result = true;
+  struct case_ordering *base_ordering;
+  bool ok = true;
   int i;
   const int n_splits = dict_get_split_cnt (dataset_dict (ds));
 
-  criteria.crit_cnt = n_splits + n_group_vars + 1;
-  criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits);
+  base_ordering = case_ordering_create (dataset_dict (ds));
   for (i = 0; i < n_splits ; i++)
-    {
-      const struct variable *v = dict_get_split_vars (dataset_dict (ds))[i];
-      criteria.crits[i].fv = var_get_case_index (v);
-      criteria.crits[i].width = var_get_width (v);
-      criteria.crits[i].dir = SRT_ASCEND;
-    }
+    case_ordering_add_var (base_ordering,
+                           dict_get_split_vars (dataset_dict (ds))[i],
+                           SRT_ASCEND);
+
   for (i = 0; i < n_group_vars; i++)
+    case_ordering_add_var (base_ordering, group_vars[i], SRT_ASCEND);
+  for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i )
     {
-      criteria.crits[i + n_splits].fv = var_get_case_index (group_vars[i]);
-      criteria.crits[i + n_splits].width = var_get_width (group_vars[i]);
-      criteria.crits[i + n_splits].dir = SRT_ASCEND;
-    }
-  for (i = 0 ; i < sc->crit_cnt ; ++i )
-    {
-      struct casefile *out ;
-      struct casefile *cf ;
-      struct casereader *reader ;
-      struct casefile *sorted_cf ;
-
-      /* Obtain active file in CF. */
-      if (!procedure (ds, NULL, NULL))
-       goto error;
-
-      cf = proc_capture_output (ds);
-
-      /* Sort CF into SORTED_CF. */
-      reader = casefile_get_destructive_reader (cf) ;
-      criteria.crits[criteria.crit_cnt - 1] = sc->crits[i];
-      assert ( sc->crits[i].fv == var_get_case_index (src_vars[i]) );
-      sorted_cf = sort_execute (reader, &criteria, NULL);
-      casefile_destroy (cf);
-
-      out = rank_sorted_casefile (sorted_cf, &criteria,
-                                 dataset_dict (ds),
-                                  rank_specs, n_rank_specs,
-                                 i, var_get_missing_values (src_vars[i]));
-      if ( NULL == out )
-       {
-         result = false ;
-         continue ;
-       }
-
-      proc_set_source (ds, storage_source_create (out));
+      struct case_ordering *ordering;
+      struct casegrouper *grouper;
+      struct casereader *group;
+      struct casewriter *output;
+      struct casereader *ranked_file;
+
+      ordering = case_ordering_clone (base_ordering);
+      case_ordering_add_var (ordering,
+                             case_ordering_get_var (sc, i),
+                             case_ordering_get_direction (sc, i));
+
+      proc_discard_output (ds);
+      grouper = casegrouper_create_case_ordering (sort_execute (proc_open (ds),
+                                                                ordering),
+                                                  base_ordering);
+      output = autopaging_writer_create (dict_get_next_value_idx (
+                                           dataset_dict (ds)));
+      while (casegrouper_get_next_group (grouper, &group)) 
+        rank_sorted_file (group, output, dataset_dict (ds),
+                          rank_specs, n_rank_specs,
+                          i, src_vars[i]); 
+      ok = casegrouper_destroy (grouper);
+      ok = proc_commit (ds) && ok;
+      ranked_file = casewriter_make_reader (output);
+      ok = proc_set_active_file_data (ds, ranked_file) && ok;
+      if (!ok)
+        break;
     }
+  case_ordering_destroy (base_ordering);
 
-  free (criteria.crits);
-  return result ;
-
-error:
-  free (criteria.crits);
-  return false ;
+  return ok; 
 }
 
 /* Hardly a rank function !! */
@@ -311,7 +299,8 @@
          int i, double w UNUSED)
 {
   double rank;
-  if ( c >= 1.0 )
+
+  if ( c >= 1.0 ) 
     {
       switch (cmd.ties)
        {
@@ -471,192 +460,71 @@
   NOT_REACHED();
 }
 
-
-/* Rank the casefile belonging to CR, starting from the current
-   postition of CR continuing up to and including the ENDth case.
-
-   RS points to an array containing  the rank specifications to
-   use. N_RANK_SPECS is the number of elements of RS.
-
-
-   DEST_VAR_INDEX is the index into the rank_spec destvar element
-   to be used for this ranking.
-
-   Prerequisites: 1. The casefile must be sorted according to CRITERION.
-                  2. W is the sum of the non-missing caseweights for this
-                 range of the casefile.
-*/
 static void
-rank_cases (struct casereader *cr,
-           unsigned long end,
-           const struct dictionary *dict,
-           const struct sort_criterion *criterion,
-           const struct missing_values *mv,
-           double w,
-           const struct rank_spec *rs,
-           int n_rank_specs,
-           int dest_var_index,
-           struct casefile *dest)
-{
-  bool warn = true;
+rank_sorted_file (struct casereader *input, 
+                  struct casewriter *output,
+                  const struct dictionary *dict,
+                  const struct rank_spec *rs, 
+                  int n_rank_specs, 
+                  int dest_idx, 
+                  struct variable *rank_var)
+{
+  struct casereader *pass1, *pass2, *pass2_1;
+  struct casegrouper *tie_grouper;
+  struct ccase c;
+  double w = 0.0;
   double cc = 0.0;
-  double cc_1;
-  int iter = 1;
-
-  const int fv = criterion->fv;
-  const int width = criterion->width;
-
-  while (casereader_cnum (cr) < end)
-    {
-      struct casereader *lookahead;
-      const union value *this_value;
-      bool this_value_is_missing;
-      struct ccase this_case, lookahead_case;
-      double c;
-      int i;
-      size_t n = 0;
-
-      if (!casereader_read_xfer (cr, &this_case))
-        break;
-
-      this_value = case_data_idx (&this_case, fv);
-      this_value_is_missing = mv_is_value_missing (mv, this_value,
-                                                   exclude_values);
-      c = dict_get_case_weight (dict, &this_case, &warn);
-
-      lookahead = casereader_clone (cr);
-      n = 0;
-      while (casereader_cnum (lookahead) < end
-             && casereader_read_xfer (lookahead, &lookahead_case))
-        {
-          const union value *lookahead_value = case_data_idx (&lookahead_case, 
fv);
-          int diff = compare_values (this_value, lookahead_value, width);
-
-          if (diff != 0)
-            {
-             /* Make sure the casefile was sorted */
-             assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1));
-
-              case_destroy (&lookahead_case);
-              break;
-            }
-
-          c += dict_get_case_weight (dict, &lookahead_case, &warn);
-          case_destroy (&lookahead_case);
-          n++;
-        }
-      casereader_destroy (lookahead);
-
-      cc_1 = cc;
-      if ( !this_value_is_missing )
-       cc += c;
-
-      do
-        {
-          for (i = 0; i < n_rank_specs; ++i)
-            {
-              const struct variable *dst_var = rs[i].destvars[dest_var_index];
-
-             if  (this_value_is_missing)
-               case_data_rw (&this_case, dst_var)->f = SYSMIS;
-             else
-               case_data_rw (&this_case, dst_var)->f =
-                 rank_func[rs[i].rfunc](c, cc, cc_1, iter, w);
-            }
-          casefile_append_xfer (dest, &this_case);
-        }
-      while (n-- > 0 && casereader_read_xfer (cr, &this_case));
-
-      if ( !this_value_is_missing )
-       iter++;
-    }
-
-  /* If this isn't true, then all the results will be wrong */
-  assert ( w == cc );
-}
-
-static bool
-same_group (const struct ccase *a, const struct ccase *b,
-            const struct sort_criteria *crit)
-{
-  size_t i;
+  int tie_group = 1;
 
-  for (i = 0; i < crit->crit_cnt - 1; i++)
-    {
-      struct sort_criterion *c = &crit->crits[i];
-      if (compare_values (case_data_idx (a, c->fv),
-                          case_data_idx (b, c->fv), c->width) != 0)
-        return false;
-    }
 
-  return true;
-}
+  input = casereader_create_filter_missing (input, &rank_var, 1,
+                                            exclude_values, output);
+  input = casereader_create_filter_weight (input, dict, NULL, output);
 
-static struct casefile *
-rank_sorted_casefile (struct casefile *cf,
-                     const struct sort_criteria *crit,
-                     const struct dictionary *dict,
-                     const struct rank_spec *rs,
-                     int n_rank_specs,
-                     int dest_idx,
-                     const struct missing_values *mv)
-{
-  struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf));
-  struct casereader *lookahead = casefile_get_reader (cf, NULL);
-  struct casereader *pos = casereader_clone (lookahead);
-  struct ccase group_case;
-  bool warn = true;
+  casereader_split (input, &pass1, &pass2);
 
-  struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1];
+  /* Pass 1: Get total group weight. */
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    w += dict_get_case_weight (dict, &c, NULL);
+  casereader_destroy (pass1);
 
-  if (casereader_read (lookahead, &group_case))
+  /* Pass 2: Do ranking. */
+  tie_grouper = casegrouper_create_vars (pass2, &rank_var, 1);
+  while (casegrouper_get_next_group (tie_grouper, &pass2_1)) 
     {
-      struct ccase this_case;
-      const union value *this_value ;
-      double w = 0.0;
-      this_value = case_data_idx( &group_case, ultimate_crit->fv);
+      struct casereader *pass2_2;
+      double cc_1 = cc;
+      double tw = 0.0;
+      int i;
 
-      if ( !mv_is_value_missing (mv, this_value, exclude_values) )
-       w = dict_get_case_weight (dict, &group_case, &warn);
+      pass2_2 = casereader_clone (pass2_1);
+      taint_propagate (casereader_get_taint (pass2_2),
+                       casewriter_get_taint (output));
+
+      /* Pass 2.1: Sum up weight for tied cases. */
+      for (; casereader_read (pass2_1, &c); case_destroy (&c)) 
+        tw += dict_get_case_weight (dict, &c, NULL);
+      cc += tw;
+      casereader_destroy (pass2_1);
 
-      while (casereader_read (lookahead, &this_case))
+      /* Pass 2.2: Rank tied cases. */
+      while (casereader_read (pass2_2, &c)) 
         {
-         const union value *this_value =
-           case_data_idx(&this_case, ultimate_crit->fv);
-          double c = dict_get_case_weight (dict, &this_case, &warn);
-          if (!same_group (&group_case, &this_case, crit))
+          for (i = 0; i < n_rank_specs; ++i)
             {
-              rank_cases (pos, casereader_cnum (lookahead) - 1,
-                         dict,
-                         ultimate_crit,
-                         mv, w,
-                         rs, n_rank_specs,
-                         dest_idx, dest);
-
-              w = 0.0;
-              case_destroy (&group_case);
-              case_move (&group_case, &this_case);
+              const struct variable *dst_var = rs[i].destvars[dest_idx];
+              double *dst_value = &case_data_rw (&c, dst_var)->f;
+              *dst_value = rank_func[rs[i].rfunc] (tw, cc, cc_1, tie_group, w);
             }
-         if ( !mv_is_value_missing (mv, this_value, exclude_values) )
-           w += c;
-          case_destroy (&this_case);
+          casewriter_write (output, &c);
         }
-      case_destroy (&group_case);
-      rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w,
-                 rs, n_rank_specs, dest_idx, dest);
+      casereader_destroy (pass2_2);
+          
+      tie_group++;
     }
-
-  if (casefile_error (dest))
-    {
-      casefile_destroy (dest);
-      dest = NULL;
-    }
-
-  casefile_destroy (cf);
-  return dest;
+  casegrouper_destroy (tie_grouper);
 }
 
-
 /* Transformation function to enumerate all the cases */
 static int
 create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num)
@@ -749,7 +617,7 @@
   rank_specs = NULL;
   n_rank_specs = 0;
 
-  sort_destroy_criteria (sc);
+  case_ordering_destroy (sc);
   sc = NULL;
 
   free (src_vars);
@@ -783,13 +651,13 @@
 
       rank_specs = xmalloc (sizeof (*rank_specs));
       rank_specs[0].rfunc = RANK;
-      rank_specs[0].destvars =
-       xcalloc (sc->crit_cnt, sizeof (struct variable *));
+      rank_specs[0].destvars = 
+       xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *));
 
       n_rank_specs = 1;
     }
 
-  assert ( sc->crit_cnt == n_src_vars);
+  assert ( case_ordering_get_var_cnt (sc) == n_src_vars);
 
   /* Create variables for all rank destinations which haven't
      already been created with INTO.
@@ -891,31 +759,29 @@
     msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank 
functions have not been requested.  The FRACTION subcommand will be ignored.") 
);
 
   /* Add a variable which we can sort by to get back the original
-     order */
-  order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
+     order */ 
+  order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0); 
 
   add_transformation (ds, create_resort_key, 0, order);
 
   /* Do the ranking */
   result = rank_cmd (ds, sc, rank_specs, n_rank_specs);
 
-  /* Put the active file back in its original order */
+  /* Put the active file back in its original order.  Delete
+     our sort key, which we don't need anymore.  */
   {
-    struct sort_criteria criteria;
-    struct sort_criterion restore_criterion ;
-    restore_criterion.fv = var_get_case_index (order);
-    restore_criterion.width = 0;
-    restore_criterion.dir = SRT_ASCEND;
-
-    criteria.crits = &restore_criterion;
-    criteria.crit_cnt = 1;
+    struct case_ordering *ordering = case_ordering_create (dataset_dict (ds));
+    struct casereader *sorted;
+    case_ordering_add_var (ordering, order, SRT_ASCEND);
+    /* FIXME: loses error conditions. */
+    proc_discard_output (ds);
+    sorted = sort_execute (proc_open (ds), ordering);
+    result = proc_commit (ds) && result;
 
-    sort_active_file_in_place (ds, &criteria);
+    dict_delete_var (dataset_dict (ds), order);
+    result = proc_set_active_file_data (ds, sorted) && result;
   }
 
-  /* ... and we don't need our sort key anymore. So delete it */
-  dict_delete_var (dataset_dict (ds), order);
-
   rank_cleanup();
 
 
@@ -928,16 +794,16 @@
 static int
 rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct 
cmd_rank *cmd UNUSED, void *aux UNUSED)
 {
-  static const int terminators[2] = {T_BY, 0};
-
   lex_match (lexer, '=');
 
   if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), 
lex_tokid (lexer)) == NULL)
       && lex_token (lexer) != T_ALL)
       return 2;
 
-  sc = sort_parse_criteria (lexer, dataset_dict (ds),
-                           &src_vars, &n_src_vars, 0, terminators);
+  sc = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+  if (sc == NULL)
+    return 0;
+  case_ordering_get_vars (sc, &src_vars, &n_src_vars);
 
   if ( lex_match (lexer, T_BY)  )
     {
@@ -970,9 +836,10 @@
   rank_specs[n_rank_specs - 1].rfunc = f;
   rank_specs[n_rank_specs - 1].destvars = NULL;
 
-  rank_specs[n_rank_specs - 1].destvars =
-           xcalloc (sc->crit_cnt, sizeof (struct variable *));
-
+  rank_specs[n_rank_specs - 1].destvars = 
+           xcalloc (case_ordering_get_var_cnt (sc),
+                     sizeof (struct variable *));
+         
   if (lex_match_id (lexer, "INTO"))
     {
       struct variable *destvar;
@@ -985,7 +852,7 @@
              msg(SE, _("Variable %s already exists."), lex_tokid (lexer));
              return 0;
            }
-         if ( var_count >= sc->crit_cnt )
+         if ( var_count >= case_ordering_get_var_cnt (sc) ) 
            {
              msg(SE, _("Too many variables in INTO clause."));
              return 0;
Index: merge/src/language/stats/regression.q
===================================================================
--- merge.orig/src/language/stats/regression.q  2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/regression.q       2007-06-05 09:18:06.000000000 
-0700
@@ -26,7 +26,8 @@
 
 #include "regression-export.h"
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/category.h>
 #include <data/dictionary.h>
 #include <data/missing-values.h>
@@ -41,6 +42,7 @@
 #include <libpspp/alloc.h>
 #include <libpspp/compiler.h>
 #include <libpspp/message.h>
+#include <libpspp/taint.h>
 #include <math/design-matrix.h>
 #include <math/coefficient.h>
 #include <math/linreg/linreg.h>
@@ -48,6 +50,7 @@
 #include <output/table.h>
 
 #include "gettext.h"
+#define _(msgid) gettext (msgid)
 
 #define REG_LARGE_DATA 1000
 
@@ -120,14 +123,8 @@
  */
 static struct file_handle *model_file;
 
-/*
-  Return value for the procedure.
- */
-static int pspp_reg_rc = CMD_SUCCESS;
-
-static bool run_regression (const struct ccase *,
-                           const struct casefile *, void *,
-                           const struct dataset *);
+static bool run_regression (struct casereader *, struct cmd_regression *,
+                            struct dataset *);
 
 /* 
    STATISTICS subcommand output functions.
@@ -951,6 +948,9 @@
 int
 cmd_regression (struct lexer *lexer, struct dataset *ds)
 {
+  struct casegrouper *grouper;
+  struct casereader *group;
+  bool ok;
   size_t i;
 
   if (!parse_regression (lexer, ds, &cmd, NULL))
@@ -961,12 +961,18 @@
     {
       models[i] = NULL;
     }
-  if (!multipass_procedure_with_splits (ds, run_regression, &cmd))
-    return CMD_CASCADING_FAILURE;
+
+  /* Data pass. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group))
+    run_regression (group, &cmd, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+
   subcommand_save (ds, cmd.sbc_save, models);
   free (v_variables);
   free (models);
-  return pspp_reg_rc;
+  return ok ? CMD_SUCCESS : CMD_FAILURE;
 }
 
 /*
@@ -978,47 +984,6 @@
   return v == v_variables[k];
 }
 
-/*
-  Mark missing cases. Return the number of non-missing cases.
-  Compute the first two moments.
- */
-static size_t
-mark_missing_cases (const struct casefile *cf, const struct variable *v,
-                   int *is_missing_case, double n_data,
-                   struct moments_var *mom)
-{
-  struct casereader *r;
-  struct ccase c;
-  size_t row;
-  const union value *val;
-  double w = 1.0;
-
-  for (r = casefile_get_reader (cf, NULL);
-       casereader_read (r, &c); case_destroy (&c))
-    {
-      row = casereader_cnum (r) - 1;
-
-      val = case_data (&c, v);
-      if (mom != NULL)
-       {
-         moments1_add (mom->m, val->f, w);
-       }
-      cat_value_update (v, val);
-      if (var_is_value_missing (v, val, MV_ANY))
-       {
-         if (!is_missing_case[row])
-           {
-             /* Now it is missing. */
-             n_data--;
-             is_missing_case[row] = 1;
-           }
-       }
-    }
-  casereader_destroy (r);
-
-  return n_data;
-}
-
 /* Parser for the variables sub command */
 static int
 regression_custom_variables (struct lexer *lexer, struct dataset *ds,
@@ -1046,74 +1011,59 @@
   return 1;
 }
 
-/*
-  Count the explanatory variables. The user may or may
-  not have specified a response variable in the syntax.
- */
+/* Identify the explanatory variables in v_variables.  Returns
+   the number of independent variables. */
 static int
-get_n_indep (const struct variable *v)
+identify_indep_vars (struct variable **indep_vars, struct variable *depvar)
 {
-  int result;
-  int i = 0;
+  int n_indep_vars = 0;
+  int i;
 
-  result = n_variables;
-  while (i < n_variables)
-    {
-      if (is_depvar (i, v))
-       {
-         result--;
-         i = n_variables;
-       }
-      i++;
-    }
-  return (result == 0) ? 1 : result;
+  for (i = 0; i < n_variables; i++)
+    if (!is_depvar (i, depvar))
+      indep_vars[n_indep_vars++] = v_variables[i];
+
+  return n_indep_vars;
 }
 
-/*
-  Read from the active file. Identify the explanatory variables in
-  v_variables. Encode categorical variables. Drop cases with missing
-  values.
-*/
+/* Encode categorical variables.
+   Returns number of valid cases. */
 static int
-prepare_data (int n_data, int is_missing_case[],
-             const struct variable **indep_vars,
-             const struct variable *depvar, const struct casefile *cf,
-             struct moments_var *mom)
+prepare_categories (struct casereader *input,
+                    struct variable **vars, size_t n_vars,
+                    struct moments_var *mom)
 {
-  int i;
-  int j;
+  int n_data;
+  struct ccase c;
+  size_t i;
 
-  assert (indep_vars != NULL);
-  j = 0;
-  for (i = 0; i < n_variables; i++)
+  for (i = 0; i < n_vars; i++)
+    if (var_is_alpha (vars[i]))
+      cat_stored_values_create (vars[i]);
+
+  n_data = 0;
+  for (; casereader_read (input, &c); case_destroy (&c)) 
     {
       /*
        The second condition ensures the program will run even if
        there is only one variable to act as both explanatory and
        response.
        */
-      if ((!is_depvar (i, depvar)) || (n_variables == 1))
-       {
-         indep_vars[j] = v_variables[i];
-         j++;
-         if (var_is_alpha (v_variables[i]))
-           {
-             /* Make a place to hold the binary vectors
-                corresponding to this variable's values. */
-             cat_stored_values_create (v_variables[i]);
-           }
-         n_data =
-           mark_missing_cases (cf, v_variables[i], is_missing_case, n_data,
-                               mom + i);
-       }
-    }
-  /*
-     Mark missing cases for the dependent variable.
-   */
-  n_data = mark_missing_cases (cf, depvar, is_missing_case, n_data, NULL);
+      for (i = 0; i < n_vars; i++)
+        {
+          const union value *val = case_data (&c, vars[i]);
+          if (var_is_alpha (vars[i])) 
+            cat_value_update (vars[i], val); 
+          else
+            moments1_add (mom[i].m, val->f, 1.0);
+        }
+      n_data++; 
+   }
+  casereader_destroy (input);
 
   return n_data;
 }
+
 static void
 coeff_init (pspp_linreg_cache * c, struct design_matrix *dm)
 {
@@ -1155,24 +1105,14 @@
        }
     }
 }
+
 static bool
-run_regression (const struct ccase *first,
-               const struct casefile *cf, void *cmd_ UNUSED,
-               const struct dataset *ds)
+run_regression (struct casereader *input, struct cmd_regression *cmd,
+                struct dataset *ds)
 {
   size_t i;
-  size_t n_data = 0;           /* Number of valide cases. */
-  size_t n_cases;              /* Number of cases. */
-  size_t row;
-  size_t case_num;
   int n_indep = 0;
   int k;
-  /*
-     Keep track of the missing cases.
-   */
-  int *is_missing_case;
-  const union value *val;
-  struct casereader *r;
   struct ccase c;
   const struct variable **indep_vars;
   struct design_matrix *X;
@@ -1183,7 +1123,10 @@
 
   assert (models != NULL);
 
-  output_split_file_values (ds, first);
+  if (!casereader_peek (input, 0, &c))
+    return true;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
 
   if (!v_variables)
     {
@@ -1191,19 +1134,15 @@
                     1u << DC_SYSTEM);
     }
 
-  n_cases = casefile_get_case_cnt (cf);
-
-  for (i = 0; i < cmd.n_dependent; i++)
+  for (i = 0; i < cmd->n_dependent; i++)
     {
-      if (!var_is_numeric (cmd.v_dependent[i]))
+      if (!var_is_numeric (cmd->v_dependent[i]))
        {
-         msg (SE, gettext ("Dependent variable must be numeric."));
-         pspp_reg_rc = CMD_FAILURE;
-         return true;
+         msg (SE, _("Dependent variable must be numeric."));
+         return false;
        }
     }
 
-  is_missing_case = xnmalloc (n_cases, sizeof (*is_missing_case));
   mom = xnmalloc (n_variables, sizeof (*mom));
   for (i = 0; i < n_variables; i++)
     {
@@ -1212,20 +1151,28 @@
     }
   lopts.get_depvar_mean_std = 1;
 
-  for (k = 0; k < cmd.n_dependent; k++)
+  lopts.get_indep_mean_std = xnmalloc (n_variables, sizeof (int));
+  indep_vars = xnmalloc (n_variables, sizeof *indep_vars);
+
+  for (k = 0; k < cmd->n_dependent; k++)
     {
-      n_indep = get_n_indep ((const struct variable *) cmd.v_dependent[k]);
-      lopts.get_indep_mean_std = xnmalloc (n_indep, sizeof (int));
-      indep_vars = xnmalloc (n_indep, sizeof *indep_vars);
-      assert (indep_vars != NULL);
+      struct variable *dep_var;
+      struct casereader *reader;
+      casenumber row;
+      struct ccase c;
+      size_t n_data;           /* Number of valid cases. */
+      
+      dep_var = cmd->v_dependent[k];
+      n_indep = identify_indep_vars (indep_vars, dep_var);
+
+      reader = casereader_clone (input);
+      reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
+                                                 MV_ANY, NULL);
+      reader = casereader_create_filter_missing (reader, &dep_var, 1,
+                                                 MV_ANY, NULL);
+       n_data = prepare_categories (casereader_clone (reader),
+                                    indep_vars, n_indep, mom);
 
-      for (i = 0; i < n_cases; i++)
-       {
-         is_missing_case[i] = 0;
-       }
-      n_data = prepare_data (n_cases, is_missing_case, indep_vars,
-                            cmd.v_dependent[k],
-                            (const struct casefile *) cf, mom);
       if ((n_data > 0) && (n_indep > 0))
        {
          Y = gsl_vector_alloc (n_data);
@@ -1240,8 +1187,8 @@
          models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2);
          models[k]->indep_means = gsl_vector_alloc (X->m->size2);
          models[k]->indep_std = gsl_vector_alloc (X->m->size2);
-         models[k]->depvar = (const struct variable *) cmd.v_dependent[k];
-         /*
+          models[k]->depvar = dep_var;
+          /*
             For large data sets, use QR decomposition.
           */
          if (n_data > sqrt (n_indep) && n_data > REG_LARGE_DATA)
@@ -1250,50 +1197,23 @@
            }
 
          /*
-            The second pass fills the design matrix.
-          */
-         row = 0;
-         for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c);
-              case_destroy (&c))
-           /* Iterate over the cases. */
-           {
-             case_num = casereader_cnum (r) - 1;
-             if (!is_missing_case[case_num])
-               {
-                 for (i = 0; i < n_variables; ++i)     /* Iterate over the
-                                                          variables for the
-                                                          current case.
-                                                        */
-                   {
-                     val = case_data (&c, v_variables[i]);
-                     /*
-                        Independent/dependent variable separation. The
-                        'variables' subcommand specifies a varlist which 
contains
-                        both dependent and independent variables. The dependent
-                        variables are specified with the 'dependent'
-                        subcommand, and maybe also in the 'variables' 
subcommand. 
-                        We need to separate the two.
-                      */
-                     if (!is_depvar (i, cmd.v_dependent[k]))
-                       {
-                         if (var_is_alpha (v_variables[i]))
-                           {
-                             design_matrix_set_categorical (X, row,
-                                                            v_variables[i],
-                                                            val);
-                           }
-                         else
-                           {
-                             design_matrix_set_numeric (X, row,
-                                                        v_variables[i], val);
-                           }
-                       }
-                   }
-                 val = case_data (&c, cmd.v_dependent[k]);
-                 gsl_vector_set (Y, row, val->f);
-                 row++;
-               }
-           }
+            The second pass fills the design matrix.
+          */
+          reader = casereader_create_counter (reader, &row, -1);
+          for (; casereader_read (reader, &c); case_destroy (&c))
+            {
+              for (i = 0; i < n_indep; ++i)
+                {
+                  struct variable *v = indep_vars[i];
+                  const union value *val = case_data (&c, v);
+                  if (var_is_alpha (v))
+                    design_matrix_set_categorical (X, row, v, val);
+                  else
+                    design_matrix_set_numeric (X, row, v, val);
+                }
+          gsl_vector_set (Y, row, case_num (&c, dep_var));
+            }
+          casereader_destroy (reader);
          /*
             Now that we know the number of coefficients, allocate space
             and store pointers to the variables that correspond to the
@@ -1306,26 +1226,24 @@
           */
          pspp_linreg ((const gsl_vector *) Y, X->m, &lopts, models[k]);
          compute_moments (models[k], mom, X, n_variables);
-         subcommand_statistics (cmd.a_statistics, models[k]);
-         subcommand_export (cmd.sbc_export, models[k]);
+
+          if (!taint_has_tainted_successor (casereader_get_taint (input)))
+            {
+              subcommand_statistics (cmd->a_statistics, models[k]);
+              subcommand_export (cmd->sbc_export, models[k]); 
+            }
 
          gsl_vector_free (Y);
          design_matrix_destroy (X);
-         free (indep_vars);
-         free (lopts.get_indep_mean_std);
-         casereader_destroy (r);
        }
       else
        {
          msg (SE, gettext ("No valid data found. This command was skipped."));
        }
     }
-  for (i = 0; i < n_variables; i++)
-    {
-      moments1_destroy ((mom + i)->m);
-    }
-  free (mom);
-  free (is_missing_case);
+  free (indep_vars);
+  free (lopts.get_indep_mean_std);
+  casereader_destroy (input);
 
   return true;
 }
Index: merge/src/language/stats/sort-cases.c
===================================================================
--- merge.orig/src/language/stats/sort-cases.c  2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/sort-cases.c       2007-06-05 09:18:06.000000000 
-0700
@@ -30,6 +30,7 @@
 #include <language/lexer/lexer.h>
 #include <libpspp/alloc.h>
 #include <libpspp/message.h>
+#include <data/case-ordering.h>
 #include <math/sort.h>
 #include <sys/types.h>
 
@@ -41,13 +42,15 @@
 int
 cmd_sort_cases (struct lexer *lexer, struct dataset *ds)
 {
-  struct sort_criteria *criteria;
-  bool success = false;
+  struct case_ordering *ordering;
+  struct casereader *output;
+  bool ok = false;
 
   lex_match (lexer, T_BY);
 
-  criteria = sort_parse_criteria (lexer, dataset_dict (ds), NULL, NULL, NULL, 
NULL);
-  if (criteria == NULL)
+  proc_cancel_temporary_transformations (ds);
+  ordering = parse_case_ordering (lexer, dataset_dict (ds), NULL);
+  if (ordering == NULL)
     return CMD_CASCADING_FAILURE;
 
   if (get_testing_mode () && lex_match (lexer, '/')) 
@@ -57,7 +60,6 @@
         goto done;
 
       min_buffers = max_buffers = lex_integer (lexer);
-      allow_internal_sort = false;
       if (max_buffers < 2) 
         {
           msg (SE, _("Buffer limit must be at least 2."));
@@ -67,14 +69,17 @@
       lex_get (lexer);
     }
 
-  success = sort_active_file_in_place (ds, criteria);
+  proc_discard_output (ds);
+  output = sort_execute (proc_open (ds), ordering);
+  ordering = NULL;
+  ok = proc_commit (ds);
+  ok = proc_set_active_file_data (ds, output) && ok;
 
  done:
   min_buffers = 64;
   max_buffers = INT_MAX;
-  allow_internal_sort = true;
   
-  sort_destroy_criteria (criteria);
-  return success ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
+  case_ordering_destroy (ordering);
+  return ok ? lex_end_of_command (lexer) : CMD_CASCADING_FAILURE;
 }
 
Index: merge/src/language/stats/sort-criteria.c
===================================================================
--- merge.orig/src/language/stats/sort-criteria.c       2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/stats/sort-criteria.c    2007-06-05 09:18:06.000000000 
-0700
@@ -1,5 +1,5 @@
 /* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -17,72 +17,46 @@
    02110-1301, USA. */
 
 #include <config.h>
-#include <sys/types.h>
-#include <assert.h>
+
+#include <language/stats/sort-criteria.h>
+
 #include <stdlib.h>
-#include <limits.h>
-#include <libpspp/alloc.h>
-#include <language/command.h>
-#include <libpspp/message.h>
+
+#include <data/case-ordering.h>
+#include <data/dictionary.h>
+#include <data/variable.h>
 #include <language/lexer/lexer.h>
 #include <language/lexer/variable-parser.h>
-#include <data/settings.h>
-#include <data/variable.h>
-#include "sort-criteria.h"
-#include <math/sort.h>
+#include <libpspp/message.h>
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
 
-static bool  is_terminator(int tok, const int *terminators);
-
-
 /* Parses a list of sort keys and returns a struct sort_criteria
    based on it.  Returns a null pointer on error.
    If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at
    least one parenthesized sort direction was specified, false
-   otherwise. 
-   If TERMINATORS is non-null, then it must be a pointer to a 
-   null terminated list of tokens, in addition to the defaults,
-   which are to be considered terminators of the clause being parsed.
-   The default terminators are '/' and '.'
-   
-*/
-struct sort_criteria *
-sort_parse_criteria (struct lexer *lexer, const struct dictionary *dict,
-                     const struct variable ***vars, size_t *var_cnt,
-                     bool *saw_direction,
-                    const int *terminators
-                    )
+   otherwise. */
+struct case_ordering *
+parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
+                     bool *saw_direction)
 {
-  struct sort_criteria *criteria;
-  const struct variable **local_vars = NULL;
-  size_t local_var_cnt;
-
-  assert ((vars == NULL) == (var_cnt == NULL));
-  if (vars == NULL) 
-    {
-      vars = &local_vars;
-      var_cnt = &local_var_cnt;
-    }
-
-  criteria = xmalloc (sizeof *criteria);
-  criteria->crits = NULL;
-  criteria->crit_cnt = 0;
-
-  *vars = NULL;
-  *var_cnt = 0;
-  if (saw_direction != NULL)
+  struct case_ordering *ordering = case_ordering_create (dict);
+  struct variable **vars = NULL;
+  size_t var_cnt = 0;
+  
+ if (saw_direction != NULL)
     *saw_direction = false;
 
   do
     {
-      size_t prev_var_cnt = *var_cnt;
       enum sort_direction direction;
+      size_t i;
 
       /* Variables. */
-      if (!parse_variables_const (lexer, dict, vars, var_cnt,
-                           PV_NO_DUPLICATE | PV_APPEND | PV_NO_SCRATCH))
+      free (vars);
+      vars = NULL;
+      if (!parse_variables_const (lexer, dict, &vars, &var_cnt, PV_NO_SCRATCH))
         goto error;
 
       /* Sort direction. */
@@ -108,57 +82,19 @@
       else
         direction = SRT_ASCEND;
 
-      criteria->crits = xnrealloc (criteria->crits,
-                                   *var_cnt, sizeof *criteria->crits);
-      criteria->crit_cnt = *var_cnt;
-      for (; prev_var_cnt < criteria->crit_cnt; prev_var_cnt++) 
-        {
-          struct sort_criterion *c = &criteria->crits[prev_var_cnt];
-          c->fv = var_get_case_index ((*vars)[prev_var_cnt]);
-          c->width = var_get_width ((*vars)[prev_var_cnt]);
-          c->dir = direction;
-        }
+      for (i = 0; i < var_cnt; i++)
+        if (!case_ordering_add_var (ordering, vars[i], direction))
+          msg (SW, _("Variable %s specified twice in sort criteria."),
+               var_get_name (vars[i]));
     }
-  while (lex_token (lexer) != '.' && lex_token (lexer) != '/' && 
!is_terminator(lex_token (lexer), terminators));
+  while (lex_token (lexer) == T_ID
+         && dict_lookup_var (dict, lex_tokid (lexer)) != NULL);
 
-  free (local_vars);
-  return criteria;
+  free (vars);
+  return ordering;
 
  error:
-  free (local_vars);
-  sort_destroy_criteria (criteria);
+  free (vars);
+  case_ordering_destroy (ordering);
   return NULL;
 }
-
-/* Return TRUE if TOK is a member of the list of TERMINATORS.
-   FALSE otherwise */
-static bool 
-is_terminator(int tok, const int *terminators)
-{
-  if (terminators == NULL ) 
-    return false;
-
-  while ( *terminators) 
-    {
-      if (tok == *terminators++)
-       return true;
-    }
-
-  return false;
-}
-
-
-
-/* Destroys a SORT CASES program. */
-void
-sort_destroy_criteria (struct sort_criteria *criteria) 
-{
-  if (criteria != NULL) 
-    {
-      free (criteria->crits);
-      free (criteria);
-    }
-}
-
-
-
Index: merge/src/language/stats/sort-criteria.h
===================================================================
--- merge.orig/src/language/stats/sort-criteria.h       2007-06-05 
09:16:11.000000000 -0700
+++ merge/src/language/stats/sort-criteria.h    2007-06-05 09:18:06.000000000 
-0700
@@ -23,17 +23,12 @@
 #include <stdbool.h>
 #include <stddef.h>
 
-struct variable;
 struct dictionary;
-struct lexer ;
+struct lexer;
 
-struct sort_criteria *sort_parse_criteria (struct lexer *, const struct 
dictionary *,
-                                           const struct variable ***, size_t *,
-                                           bool *saw_direction,
-                                          const int *terminators
-                                          );
-
-void sort_destroy_criteria (struct sort_criteria *criteria) ;
+struct case_ordering *parse_case_ordering (struct lexer *,
+                                           const struct dictionary *,
+                                           bool *saw_direction);
 
 
 #endif /* SORT_PRS_H */
Index: merge/src/language/stats/t-test.q
===================================================================
--- merge.orig/src/language/stats/t-test.q      2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/stats/t-test.q   2007-06-05 09:18:06.000000000 -0700
@@ -25,13 +25,12 @@
 #include <stdlib.h>
 
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include <data/procedure.h>
 #include <data/value-labels.h>
 #include <data/variable.h>
-#include <data/casefilter.h>
-
 #include <language/command.h>
 #include <language/dictionary/split-file.h>
 #include <language/lexer/lexer.h>
@@ -41,9 +40,9 @@
 #include <libpspp/hash.h>
 #include <libpspp/magic.h>
 #include <libpspp/message.h>
-#include <libpspp/message.h>
 #include <libpspp/misc.h>
 #include <libpspp/str.h>
+#include <libpspp/taint.h>
 #include <math/group-proc.h>
 #include <math/levene.h>
 #include <output/manager.h>
@@ -215,28 +214,28 @@
 
 static int common_calc (const struct dictionary *dict, 
                        const struct ccase *, void *, 
-                       const struct casefilter *filter);
+                       enum mv_class);
 static void common_precalc (struct cmd_t_test *);
 static void common_postcalc (struct cmd_t_test *);
 
-static int one_sample_calc (const struct dictionary *dict, const struct ccase 
*, void *, const struct casefilter *);
+static int one_sample_calc (const struct dictionary *dict, const struct ccase 
*, void *, enum mv_class);
 static void one_sample_precalc (struct cmd_t_test *);
 static void one_sample_postcalc (struct cmd_t_test *);
 
 static int  paired_calc (const struct dictionary *dict, const struct ccase *, 
-                        struct cmd_t_test*, const struct casefilter *);
+                        struct cmd_t_test*, enum mv_class);
 static void paired_precalc (struct cmd_t_test *);
 static void paired_postcalc (struct cmd_t_test *);
 
 static void group_precalc (struct cmd_t_test *);
 static int  group_calc (const struct dictionary *dict, const struct ccase *, 
-                       struct cmd_t_test *, const struct casefilter *);
+                       struct cmd_t_test *, enum mv_class);
 static void group_postcalc (struct cmd_t_test *);
 
 
-static bool calculate(const struct ccase *first,
-                      const struct casefile *cf, void *_mode, 
-                     const struct dataset *ds);
+static void calculate(struct cmd_t_test *,
+                      struct casereader *,
+                     const struct dataset *);
 
 static  int mode;
 
@@ -258,6 +257,8 @@
 int
 cmd_t_test (struct lexer *lexer, struct dataset *ds)
 {
+  struct casegrouper *grouper;
+  struct casereader *group;
   bool ok;
   
   if ( !parse_t_test (lexer, ds, &cmd, NULL) )
@@ -338,7 +339,12 @@
 
   bad_weight_warn = true;
 
-  ok = multipass_procedure_with_splits (ds, calculate, &cmd);
+  /* Data pass. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group)) 
+    calculate (&cmd, group, ds);
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
 
   n_pairs=0;
   free(pairs);
@@ -1411,30 +1417,30 @@
 common_calc (const struct dictionary *dict, 
             const struct ccase *c, 
             void *_cmd, 
-            const struct casefilter *filter)
+            enum mv_class exclude)
 {
   int i;
   struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd;  
 
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
 
 
   /* Listwise has to be implicit if the independent variable is missing ?? */
   if ( cmd->sbc_groups )
     {
-      if ( casefilter_variable_missing (filter, c, indep_var) )
+      if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
        return 0;
     }
 
   for(i = 0; i < cmd->n_variables ; ++i) 
     {
       const struct variable *v = cmd->v_variables[i];
-
-      if (! casefilter_variable_missing (filter, c, v) )
+      const union value *val = case_data (c, v);
+      
+      if (!var_is_value_missing (v, val, exclude))
        {
          struct group_statistics *gs;
-         const union value *val = case_data (c, v);
-         gs = &group_proc_get (cmd->v_variables[i])->ugs;
+         gs = &group_proc_get (v)->ugs;
 
          gs->n += weight;
          gs->sum += weight * val->f;
@@ -1492,13 +1498,13 @@
 static int 
 one_sample_calc (const struct dictionary *dict, 
                 const struct ccase *c, void *cmd_, 
-                const struct casefilter *filter)
+                enum mv_class exclude)
 {
   int i;
 
   struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_;
 
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
 
 
   for(i=0; i< cmd->n_variables ; ++i) 
@@ -1509,7 +1515,7 @@
 
       gs= &group_proc_get (cmd->v_variables[i])->ugs;
 
-      if ( ! casefilter_variable_missing (filter, c, v))
+      if (!var_is_value_missing (v, val, exclude))
        gs->sum_diff += weight * (val->f - cmd->n_testval[0]);
     }
 
@@ -1569,11 +1575,11 @@
 
 static int  
 paired_calc (const struct dictionary *dict, const struct ccase *c, 
-            struct cmd_t_test *cmd UNUSED, const struct casefilter *filter)
+            struct cmd_t_test *cmd UNUSED, enum mv_class exclude)
 {
   int i;
 
-  double weight = dict_get_case_weight (dict, c, &bad_weight_warn);
+  double weight = dict_get_case_weight (dict, c, NULL);
 
   for(i=0; i < n_pairs ; ++i )
     {
@@ -1583,8 +1589,8 @@
       const union value *val0 = case_data (c, v0);
       const union value *val1 = case_data (c, v1);
 
-      if (  ! casefilter_variable_missing (filter, c, v0) && 
-           ! casefilter_variable_missing (filter, c, v1) )
+      if (!var_is_value_missing (v0, val0, exclude) &&
+          !var_is_value_missing (v1, val1, exclude))
        {
          pairs[i].n += weight;
          pairs[i].sum[0] += weight * val0->f;
@@ -1694,16 +1700,15 @@
 static int  
 group_calc (const struct dictionary *dict, 
            const struct ccase *c, struct cmd_t_test *cmd, 
-           const struct casefilter *filter)
+           enum mv_class exclude)
 {
   int i;
 
-  const double weight = 
-    dict_get_case_weight (dict, c, &bad_weight_warn);
+  const double weight = dict_get_case_weight (dict, c, NULL);
 
   const union value *gv;
 
-  if ( casefilter_variable_missing (filter, c, indep_var))
+  if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude))
     return 0;
 
   gv = case_data (c, indep_var);
@@ -1722,7 +1727,7 @@
       if ( ! gs ) 
        return 0;
 
-      if ( ! casefilter_variable_missing (filter, c, var) )
+      if (!var_is_value_missing (var, val, exclude))
        {
          gs->n += weight;
          gs->sum += weight * val->f;
@@ -1771,95 +1776,83 @@
 
 
 
-static bool
-calculate(const struct ccase *first, const struct casefile *cf, 
-         void *cmd_, const struct dataset *ds)
+static void
+calculate(struct cmd_t_test *cmd,
+          struct casereader *input, const struct dataset *ds)
 {
   const struct dictionary *dict = dataset_dict (ds);
   struct ssbox stat_summary_box;
   struct trbox test_results_box;
 
-  struct casereader *r;
+  struct casereader *pass1, *pass2, *pass3;
+  struct taint *taint;
   struct ccase c;
 
-  struct cmd_t_test *cmd = (struct cmd_t_test *) cmd_;
+  enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
 
-  struct casefilter *filter = casefilter_create ((cmd->miss != TTS_INCLUDE
-                                                  ? MV_ANY : MV_SYSTEM), 
-                                                NULL, 0);
+  if (!casereader_peek (input, 0, &c))
+    return;
+  output_split_file_values (ds, &c);
+  case_destroy (&c);
 
   if ( cmd->miss == TTS_LISTWISE ) 
-    casefilter_add_variables (filter,
-                             cmd->v_variables, cmd->n_variables);
+    input = casereader_create_filter_missing (input,
+                                              cmd->v_variables,
+                                              cmd->n_variables,
+                                              exclude, NULL);
+
+  input = casereader_create_filter_weight (input, dict, NULL, NULL);
+
+  taint = taint_clone (casereader_get_taint (input));
+  casereader_split (input, &pass1, &pass2);
                                
-  output_split_file_values (ds, first);
   common_precalc (cmd);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      common_calc (dict, &c, cmd, filter);
-    }
-
-  casereader_destroy (r);
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    common_calc (dict, &c, cmd, exclude);
+  casereader_destroy (pass1);
   common_postcalc (cmd);
 
   switch(mode)
     {
     case T_1_SAMPLE:
       one_sample_precalc (cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         one_sample_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for (; casereader_read (pass2, &c); case_destroy (&c)) 
+        one_sample_calc (dict, &c, cmd, exclude);
       one_sample_postcalc (cmd);
       break;
     case T_PAIRED:
       paired_precalc(cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         paired_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for (; casereader_read (pass2, &c); case_destroy (&c)) 
+        paired_calc (dict, &c, cmd, exclude);
       paired_postcalc (cmd);
-
       break;
     case T_IND_SAMPLES:
+      pass3 = casereader_clone (pass2);
 
       group_precalc(cmd);
-      for(r = casefile_get_reader (cf, filter);
-         casereader_read (r, &c) ;
-          case_destroy (&c)) 
-       {
-         group_calc (dict, &c, cmd, filter);
-       }
-      casereader_destroy (r);
+      for(; casereader_read (pass2, &c); case_destroy (&c)) 
+        group_calc (dict, &c, cmd, exclude);
       group_postcalc(cmd);
 
-      levene (dict, cf, indep_var, cmd->n_variables, cmd->v_variables,
-             filter);
+      levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables,
+              exclude);
       break;
     }
+  casereader_destroy (pass2);
+ 
+  if (!taint_has_tainted_successor (taint)) 
+    {
+      ssbox_create(&stat_summary_box,cmd,mode);
+      ssbox_populate(&stat_summary_box,cmd);
+      ssbox_finalize(&stat_summary_box);
 
-  casefilter_destroy (filter);
-
-  ssbox_create(&stat_summary_box,cmd,mode);
-  ssbox_populate(&stat_summary_box,cmd);
-  ssbox_finalize(&stat_summary_box);
-
-  if ( mode == T_PAIRED) 
-      pscbox();
-
-  trbox_create(&test_results_box,cmd,mode);
-  trbox_populate(&test_results_box,cmd);
-  trbox_finalize(&test_results_box);
-
-  return true;
+      if ( mode == T_PAIRED ) 
+        pscbox();
+  
+      trbox_create(&test_results_box,cmd,mode);
+      trbox_populate(&test_results_box,cmd);
+      trbox_finalize(&test_results_box);
+    }
 }
 
 short which_group(const struct group_statistics *g,
Index: merge/src/language/tests/automake.mk
===================================================================
--- merge.orig/src/language/tests/automake.mk   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/language/tests/automake.mk        2007-06-05 09:18:06.000000000 
-0700
@@ -4,7 +4,6 @@
        src/language/tests/check-model.c
 
 language_tests_sources = \
-       src/language/tests/casefile-test.c \
        src/language/tests/check-model.h \
        src/language/tests/datasheet-test.c \
        src/language/tests/float-format.c \
Index: merge/src/libpspp/deque.h
===================================================================
--- merge.orig/src/libpspp/deque.h      2007-06-05 09:16:11.000000000 -0700
+++ merge/src/libpspp/deque.h   2007-06-05 09:18:06.000000000 -0700
@@ -66,6 +66,7 @@
 
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdlib.h>
 
 #include <libpspp/assertion.h>
 
Index: merge/src/math/automake.mk
===================================================================
--- merge.orig/src/math/automake.mk     2007-06-05 09:16:11.000000000 -0700
+++ merge/src/math/automake.mk  2007-06-05 09:18:06.000000000 -0700
@@ -19,6 +19,8 @@
        src/math/interaction.h \
        src/math/levene.c \
        src/math/levene.h \
+       src/math/merge.c \
+       src/math/merge.h \
        src/math/moments.c  src/math/moments.h \
        src/math/percentiles.c src/math/percentiles.h \
        src/math/design-matrix.c src/math/design-matrix.h \
Index: merge/src/math/levene.c
===================================================================
--- merge.orig/src/math/levene.c        2007-06-05 09:16:11.000000000 -0700
+++ merge/src/math/levene.c     2007-06-05 09:18:06.000000000 -0700
@@ -22,14 +22,13 @@
 #include "levene.h"
 #include <libpspp/message.h>
 #include <data/case.h>
-#include <data/casefile.h>
+#include <data/casereader.h>
 #include <data/dictionary.h>
 #include "group-proc.h"
 #include <libpspp/hash.h>
 #include <libpspp/str.h>
 #include <data/variable.h>
 #include <data/procedure.h>
-#include <data/casefilter.h>
 #include <libpspp/alloc.h>
 #include <libpspp/misc.h>
 #include "group.h"
@@ -74,90 +73,87 @@
   const struct variable  **v_dep;
 
   /* Filter for missing values */
-  struct casefilter *filter;
+  enum mv_class exclude;
+
+  /* An array of lz_stats for each variable */
+  struct lz_stats *lz;
+
+  /* The denominator for the expression for the Levene */
+  double *lz_denominator;
+
+};
+
+/* Per variable statistics */
+struct lz_stats
+{
+  /* Total of all lz */
+  double grand_total;
+
+  /* Mean of all lz */
+  double grand_mean;
+
+  /* The total number of cases */
+  double total_n ; 
+
+  /* Number of groups */
+  int n_groups;
 };
 
 /* First pass */
 static void  levene_precalc (const struct levene_info *l);
 static int levene_calc (const struct dictionary *dict, const struct ccase *, 
                        const struct levene_info *l);
-static void levene_postcalc (void *);
+static void levene_postcalc (struct levene_info *);
 
 
 /* Second pass */
 static void levene2_precalc (struct levene_info *l);
 static int levene2_calc (const struct dictionary *, const struct ccase *, 
                         struct levene_info *l);
-static void levene2_postcalc (void *);
+static void levene2_postcalc (struct levene_info *);
 
 
-void  
+void
 levene(const struct dictionary *dict, 
-       const struct casefile *cf,
+       struct casereader *reader,
        const struct variable *v_indep, size_t n_dep, 
        const struct variable **v_dep,
-       struct casefilter *filter)
+       enum mv_class exclude)
 {
-  struct casereader *r;
+  struct casereader *pass1, *pass2;
   struct ccase c;
   struct levene_info l;
 
   l.n_dep      = n_dep;
   l.v_indep    = v_indep;
   l.v_dep      = v_dep;
-  l.filter = filter;
+  l.exclude    = exclude;
+  l.lz         = xnmalloc (l.n_dep, sizeof *l.lz);
+  l.lz_denominator = xnmalloc (l.n_dep, sizeof *l.lz_denominator);
 
+  casereader_split (reader, &pass1, &pass2);
 
   levene_precalc (&l);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      levene_calc (dict, &c, &l);
-    }
-  casereader_destroy (r);
+  for (; casereader_read (pass1, &c); case_destroy (&c)) 
+    levene_calc (dict, &c, &l);
+  casereader_destroy (pass1);
   levene_postcalc (&l);
 
   levene2_precalc(&l);
-  for(r = casefile_get_reader (cf, filter);
-      casereader_read (r, &c) ;
-      case_destroy (&c)) 
-    {
-      levene2_calc (dict, &c,&l);
-    }
-  casereader_destroy (r);
+  for (; casereader_read (pass2, &c); case_destroy (&c)) 
+    levene2_calc (dict, &c, &l);
+  casereader_destroy (pass2);
   levene2_postcalc (&l);
-}
-
-/* Internal variables used in calculating the Levene statistic */
-
-/* Per variable statistics */
-struct lz_stats
-{
-  /* Total of all lz */
-  double grand_total;
-
-  /* Mean of all lz */
-  double grand_mean;
-
-  /* The total number of cases */
-  double total_n ; 
-
-  /* Number of groups */
-  int n_groups;
-};
-
-/* An array of lz_stats for each variable */
-static struct lz_stats *lz;
 
+  free (l.lz_denominator);
+  free (l.lz);
+}
 
 static void 
 levene_precalc (const struct levene_info *l)
 {
   size_t i;
 
-  lz = xnmalloc (l->n_dep, sizeof *lz);
-
   for(i = 0; i < l->n_dep ; ++i ) 
     {
       const struct variable *var = l->v_dep[i];
@@ -165,9 +161,9 @@
       struct group_statistics *gs;
       struct hsh_iterator hi;
 
-      lz[i].grand_total = 0;
-      lz[i].total_n = 0;
-      lz[i].n_groups = gp->n_groups ; 
+      l->lz[i].grand_total = 0;
+      l->lz[i].total_n = 0;
+      l->lz[i].n_groups = gp->n_groups ; 
 
       
       for ( gs = hsh_first(gp->group_hash, &hi);
@@ -206,11 +202,11 @@
       if ( 0 == gs ) 
        continue ;
 
-      if ( ! casefilter_variable_missing (l->filter, c, var))
+      if ( !var_is_value_missing (var, v, l->exclude))
        {
          levene_z= fabs(v->f - gs->mean);
-         lz[i].grand_total += levene_z * weight;
-         lz[i].total_n += weight; 
+         l->lz[i].grand_total += levene_z * weight;
+         l->lz[i].total_n += weight; 
 
          gs->lz_total += levene_z * weight;
        }
@@ -220,16 +216,14 @@
 
 
 static void 
-levene_postcalc (void *_l)
+levene_postcalc (struct levene_info *l)
 {
   size_t v;
 
-  struct levene_info *l = (struct levene_info *) _l;
-
   for (v = 0; v < l->n_dep; ++v) 
     {
       /* This is Z_LL */
-      lz[v].grand_mean = lz[v].grand_total / lz[v].total_n ;
+      l->lz[v].grand_mean = l->lz[v].grand_total / l->lz[v].total_n ;
     }
 
   
@@ -237,15 +231,11 @@
 
 
 
-/* The denominator for the expression for the Levene */
-static double *lz_denominator = 0;
-
 static void 
 levene2_precalc (struct levene_info *l)
 {
   size_t v;
 
-  lz_denominator = xnmalloc (l->n_dep, sizeof *lz_denominator);
 
   /* This stuff could go in the first post calc . . . */
   for (v = 0; 
@@ -265,7 +255,7 @@
        {
          g->lz_mean = g->lz_total / g->n ;
        }
-      lz_denominator[v] = 0;
+      l->lz_denominator[v] = 0;
   }
 }
 
@@ -295,11 +285,10 @@
       if ( 0 == gs ) 
        continue;
 
-      if ( ! casefilter_variable_missing (l->filter, c, var))
-
+      if ( !var_is_value_missing (var, v, l->exclude))
        {
          levene_z = fabs(v->f - gs->mean); 
-         lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
+         l->lz_denominator[i] += weight * pow2 (levene_z - gs->lz_mean);
        }
     }
 
@@ -308,12 +297,10 @@
 
 
 static void 
-levene2_postcalc (void *_l)
+levene2_postcalc (struct levene_info *l)
 {
   size_t v;
 
-  struct levene_info *l = (struct levene_info *) _l;
-
   for (v = 0; v < l->n_dep; ++v) 
     {
       double lz_numerator = 0;
@@ -328,18 +315,14 @@
          g != 0 ;
          g = (struct group_statistics *) hsh_next(hash,&hi) )
        {
-         lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
+         lz_numerator += g->n * pow2(g->lz_mean - l->lz[v].grand_mean );
        }
       lz_numerator *= ( gp->ugs.n - gp->n_groups );
 
-      lz_denominator[v] *= (gp->n_groups - 1);
+      l->lz_denominator[v] *= (gp->n_groups - 1);
 
-      gp->levene = lz_numerator / lz_denominator[v] ;
+      gp->levene = lz_numerator / l->lz_denominator[v] ;
 
     }
-
-  /* Now clear up after ourselves */
-  free(lz_denominator);
-  free(lz);
 }
 
Index: merge/src/math/levene.h
===================================================================
--- merge.orig/src/math/levene.h        2007-06-05 09:16:11.000000000 -0700
+++ merge/src/math/levene.h     2007-06-05 09:18:06.000000000 -0700
@@ -21,9 +21,9 @@
 #if !levene_h
 #define levene_h 1
 
-
+#include <data/casereader.h>
+#include <data/missing-values.h>
 #include <data/variable.h>
-#include <data/casefile.h>
 
 /* Calculate the Levene statistic 
 
@@ -39,10 +39,10 @@
 struct dictionary ;
 struct casefilter ;
 
-void  levene(const struct dictionary *dict, const struct casefile *cf, 
+void  levene(const struct dictionary *dict, struct casereader *,
             const struct variable *v_indep, size_t n_dep, 
             const struct variable **v_dep,
-            struct casefilter *filter);
+            enum mv_class exclude);
 
 
 
Index: merge/src/math/merge.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ merge/src/math/merge.c      2007-06-05 09:18:06.000000000 -0700
@@ -0,0 +1,159 @@
+/* PSPP - computes sample statistics.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
+
+/* FIXME: error checking. */
+/* FIXME: merge pattern should be improved, this one causes a
+   performance regression. */
+#include <config.h>
+
+#include <math/merge.h>
+
+#include <data/case-ordering.h>
+#include <data/case.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <libpspp/array.h>
+#include <libpspp/assertion.h>
+#include <libpspp/taint.h>
+
+#include "xalloc.h"
+
+#define MAX_MERGE_ORDER 7
+
+struct merge_input 
+  {
+    struct casereader *reader;
+    struct ccase c;
+  };
+
+struct merge 
+  {
+    struct case_ordering *ordering;
+    struct merge_input inputs[MAX_MERGE_ORDER];
+    size_t input_cnt;
+  };
+
+static void do_merge (struct merge *m);
+
+struct merge *
+merge_create (const struct case_ordering *ordering) 
+{
+  struct merge *m = xmalloc (sizeof *m);
+  m->ordering = case_ordering_clone (ordering);
+  m->input_cnt = 0;
+  return m;
+}
+
+void
+merge_destroy (struct merge *m) 
+{
+  if (m != NULL) 
+    {
+      size_t i;
+      
+      case_ordering_destroy (m->ordering);
+      for (i = 0; i < m->input_cnt; i++)
+        casereader_destroy (m->inputs[i].reader);
+      free (m);
+    }
+}
+
+void
+merge_append (struct merge *m, struct casereader *r) 
+{
+  r = casereader_rename (r);
+  m->inputs[m->input_cnt++].reader = r;
+  if (m->input_cnt >= MAX_MERGE_ORDER)
+    do_merge (m);
+}
+
+struct casereader *
+merge_make_reader (struct merge *m) 
+{
+  struct casereader *r;
+  
+  if (m->input_cnt > 1)
+    do_merge (m);
+
+  if (m->input_cnt == 1)
+    {
+      r = m->inputs[0].reader;
+      m->input_cnt = 0;
+    }
+  else if (m->input_cnt == 0)
+    {
+      size_t value_cnt = case_ordering_get_value_cnt (m->ordering);
+      struct casewriter *writer = mem_writer_create (value_cnt);
+      r = casewriter_make_reader (writer);
+    }
+  else
+    NOT_REACHED ();
+
+  return r;
+}
+
+static bool
+read_input_case (struct merge *m, size_t idx) 
+{
+  struct merge_input *i = &m->inputs[idx];
+
+  if (casereader_read (i->reader, &i->c))
+    return true;
+  else
+    {
+      casereader_destroy (i->reader);
+      remove_element (m->inputs, m->input_cnt, sizeof *m->inputs, idx);
+      m->input_cnt--;
+      return false;
+    }  
+}
+
+static void
+do_merge (struct merge *m) 
+{
+  struct casewriter *w;
+  size_t i;
+  
+  assert (m->input_cnt > 1);
+
+  w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering));
+  for (i = 0; i < m->input_cnt; i++) 
+    taint_propagate (casereader_get_taint (m->inputs[i].reader),
+                     casewriter_get_taint (w));
+  
+  for (i = 0; i < m->input_cnt; ) 
+    if (read_input_case (m, i))
+      i++;
+  while (m->input_cnt > 0) 
+    {
+      size_t min;
+
+      min = 0;
+      for (i = 1; i < m->input_cnt; i++)
+        if (case_ordering_compare_cases (&m->inputs[i].c, &m->inputs[min].c,
+                                         m->ordering) < 0)
+          min = i;
+
+      casewriter_write (w, &m->inputs[min].c);
+      read_input_case (m, min);
+    }
+
+  m->input_cnt = 1;
+  m->inputs[0].reader = casewriter_make_reader (w);
+}
+
Index: merge/src/math/merge.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ merge/src/math/merge.h      2007-06-05 09:18:06.000000000 -0700
@@ -0,0 +1,32 @@
+/* PSPP - computes sample statistics.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
+
+#ifndef MATH_MERGE_H
+#define MATH_MERGE_H 1
+
+#include <stdbool.h>
+
+struct case_ordering;
+struct casereader;
+
+struct merge *merge_create (const struct case_ordering *);
+void merge_destroy (struct merge *);
+void merge_append (struct merge *, struct casereader *);
+struct casereader *merge_make_reader (struct merge *);
+
+#endif /* math/merge.h */
Index: merge/src/math/sort.c
===================================================================
--- merge.orig/src/math/sort.c  2007-06-05 09:16:11.000000000 -0700
+++ merge/src/math/sort.c       2007-06-05 09:18:06.000000000 -0700
@@ -20,31 +20,18 @@
 
 #include "sort.h"
 
-#include <errno.h>
-#include <limits.h>
-#include <stdbool.h>
 #include <stdio.h>
-#include <stdlib.h>
 
+#include <data/case-ordering.h>
-#include <data/case-source.h>
 #include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
-#include <data/casefile-factory.h>
-#include <data/fastfile-factory.h>
-#include <data/procedure.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/casewriter-provider.h>
 #include <data/settings.h>
-#include <data/variable.h>
-#include <data/storage-stream.h>
 #include <libpspp/alloc.h>
 #include <libpspp/array.h>
 #include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/str.h>
-
-#include "minmax.h"
+#include <math/merge.h>
 
 #include "gettext.h"
 #define _(msgid) gettext (msgid)
@@ -52,701 +39,261 @@
 /* These should only be changed for testing purposes. */
 int min_buffers = 64;
 int max_buffers = INT_MAX;
-bool allow_internal_sort = true;
-
-static int compare_record (const struct ccase *, const struct ccase *,
-                           const struct sort_criteria *);
-static struct casefile *do_internal_sort (struct casereader *,
-                                          const struct sort_criteria *,
-                                         struct casefile_factory *
-                                         );
-static struct casefile *do_external_sort (struct casereader *,
-                                          const struct sort_criteria *,
-                                         struct casefile_factory *
-                                         );
-
-
-/* Sorts the active file in-place according to CRITERIA.
-   Returns true if successful. */
-bool
-sort_active_file_in_place (struct dataset *ds, 
-                          const struct sort_criteria *criteria) 
-{
-  struct casefile *in, *out;
-
-  proc_cancel_temporary_transformations (ds);
-  if (!procedure (ds, NULL, NULL))
-    return false;
-  
-  in = proc_capture_output (ds);
-  out = sort_execute (casefile_get_destructive_reader (in), criteria, 
-                     dataset_get_casefile_factory (ds));
-  if (out == NULL) 
-    return false;
-
-  proc_set_source (ds, storage_source_create (out));
-  return true;
-}
 
-/* Data passed to sort_to_casefile_callback(). */
-struct sort_to_casefile_cb_data 
+struct sort_writer 
   {
-    const struct sort_criteria *criteria;
-    struct casefile *output;
-    struct casefile_factory *factory ;
+    struct case_ordering *ordering;
+    struct merge *merge;
+    struct pqueue *pqueue;
+
+    struct casewriter *run;
+    casenumber run_id;
+    struct ccase run_end;
   };
 
-/* Sorts casefile CF according to the criteria in CB_DATA. */
-static bool
-sort_to_casefile_callback (const struct casefile *cf, void *cb_data_) 
-{
-  struct sort_to_casefile_cb_data *cb_data = cb_data_;
-  cb_data->output = sort_execute (casefile_get_reader (cf, NULL), 
-                                 cb_data->criteria,
-                                 cb_data->factory
-                                 );
-  return cb_data->output != NULL;
-}
-
-/* Sorts the active file to a separate casefile.  If successful,
-   returns the sorted casefile.  Returns a null pointer on
-   failure. */
-struct casefile *
-sort_active_file_to_casefile (struct dataset *ds, 
-                             const struct sort_criteria *criteria) 
+static struct casewriter_class sort_casewriter_class;
+
+static struct pqueue *pqueue_create (const struct case_ordering *);
+static void pqueue_destroy (struct pqueue *);
+static bool pqueue_is_full (const struct pqueue *);
+static bool pqueue_is_empty (const struct pqueue *);
+static void pqueue_push (struct pqueue *, struct ccase *, casenumber);
+static void pqueue_pop (struct pqueue *, struct ccase *, casenumber *);
+
+static void output_record (struct sort_writer *);
+
+struct casewriter *
+sort_create_writer (struct case_ordering *ordering) 
 {
-  struct sort_to_casefile_cb_data cb_data;
-  
-  proc_cancel_temporary_transformations (ds);
+  struct sort_writer *sort;
 
-  cb_data.criteria = criteria;
-  cb_data.output = NULL;
-  cb_data.factory = dataset_get_casefile_factory (ds);
-  if (!multipass_procedure (ds, sort_to_casefile_callback, &cb_data)) 
-    {
-      casefile_destroy (cb_data.output);
-      return NULL;
-    }
-  return cb_data.output;
+  sort = xmalloc (sizeof *sort);
+  sort->ordering = case_ordering_clone (ordering);
+  sort->merge = merge_create (ordering);
+  sort->pqueue = pqueue_create (ordering);
+  sort->run = NULL;
+  sort->run_id = 0;
+  case_nullify (&sort->run_end);
+
+  case_ordering_destroy (ordering);
+
+  return casewriter_create (&sort_casewriter_class, sort);
 }
 
+static void
+sort_casewriter_write (struct casewriter *writer UNUSED, void *sort_,
+                       struct ccase *c)
+{
+  struct sort_writer *sort = sort_;
+  bool next_run;
 
-/* Reads all the cases from READER, which is destroyed.  Sorts
-   the cases according to CRITERIA.  Returns the sorted cases in
-   a newly created casefile, which will be created by FACTORY.
-   If FACTORY is NULL, then a local fastfile_factory will be used.
-*/
-struct casefile *
-sort_execute (struct casereader *reader,
-             const struct sort_criteria *criteria,
-             struct casefile_factory *factory
-             )
-{
-  struct casefile_factory *local_factory = NULL;
-  struct casefile *output ;
-  if ( factory == NULL )
-    factory = local_factory = fastfile_factory_create ();
-
-  output = do_internal_sort (reader, criteria, factory);
-  if (output == NULL)
-    output = do_external_sort (reader, criteria, factory);
-  casereader_destroy (reader);
+  if (pqueue_is_full (sort->pqueue)) 
+    output_record (sort); 
 
-  fastfile_factory_destroy (local_factory);
+  next_run = (case_is_null (&sort->run_end)
+              || case_ordering_compare_cases (c, &sort->run_end,
+                                              sort->ordering) < 0);
+  pqueue_push (sort->pqueue, c, sort->run_id + (next_run ? 1 : 0));
+}
 
-  return output;
+static void
+sort_casewriter_destroy (struct casewriter *writer UNUSED, void *sort_) 
+{
+  struct sort_writer *sort = sort_;
+  
+  case_ordering_destroy (sort->ordering);
+  merge_destroy (sort->merge);
+  pqueue_destroy (sort->pqueue);
+  casewriter_destroy (sort->run);
+  case_destroy (&sort->run_end);
+  free (sort);
 }
-
-/* A case and its index. */
-struct indexed_case 
-  {
-    struct ccase c;     /* Case. */
-    unsigned long idx;  /* Index to allow for stable sorting. */
-  };
 
-static int compare_indexed_cases (const void *, const void *, const void *);
+static struct casereader *
+sort_casewriter_convert_to_reader (struct casewriter *writer, void *sort_)
+{
+  struct sort_writer *sort = sort_;
+  struct casereader *output;
 
-/* If the data is in memory, do an internal sort and return a new
-   casefile for the data.  Otherwise, return a null pointer. */
-static struct casefile *
-do_internal_sort (struct casereader *reader,
-                  const struct sort_criteria *criteria, 
-                 struct casefile_factory *factory)
-{
-  const struct casefile *src;
-  struct casefile *dst;
-  unsigned long case_cnt;
-
-  if (!allow_internal_sort)
-    return NULL;
-
-  src = casereader_get_casefile (reader);
-  if (casefile_get_case_cnt (src) > 1 && !casefile_in_core (src))
-    return NULL;
-      
-  case_cnt = casefile_get_case_cnt (src);
-  dst = factory->create_casefile (factory, casefile_get_value_cnt (src));
-  if (case_cnt != 0) 
+  if (sort->run == NULL && sort->run_id == 0) 
     {
-      struct indexed_case *cases = nmalloc (sizeof *cases, case_cnt);
-      if (cases != NULL) 
-        {
-          unsigned long i;
-          
-          for (i = 0; i < case_cnt; i++)
-            {
-              bool ok = casereader_read_xfer (reader, &cases[i].c);
-              if (!ok)
-                NOT_REACHED ();
-              cases[i].idx = i;
-            }
-
-          sort (cases, case_cnt, sizeof *cases, compare_indexed_cases,
-                (void *) criteria);
-      
-          for (i = 0; i < case_cnt; i++)
-            casefile_append_xfer (dst, &cases[i].c);
-          if (casefile_error (dst))
-            NOT_REACHED ();
-
-          free (cases);
-        }
-      else 
-        {
-          /* Failure. */
-          casefile_destroy (dst);
-          dst = NULL;
-        }
+      /* In-core sort. */
+      sort->run = mem_writer_create (case_ordering_get_value_cnt (
+                                       sort->ordering));
+      sort->run_id = 1; 
     }
+  while (!pqueue_is_empty (sort->pqueue))
+    output_record (sort);
 
-  return dst;
-}
+  merge_append (sort->merge, casewriter_make_reader (sort->run));
+  sort->run = NULL;
 
-/* Compares the variables specified by CRITERIA between the cases
-   at A and B, with a "last resort" comparison for stability, and
-   returns a strcmp()-type result. */
-static int
-compare_indexed_cases (const void *a_, const void *b_, const void *criteria_)
-{
-  const struct sort_criteria *criteria = criteria_;
-  const struct indexed_case *a = a_;
-  const struct indexed_case *b = b_;
-  int result = compare_record (&a->c, &b->c, criteria);
-  if (result == 0)
-    result = a->idx < b->idx ? -1 : a->idx > b->idx;
-  return result;
+  output = merge_make_reader (sort->merge);
+  sort_casewriter_destroy (writer, sort);
+  return output;
 }
-
-/* External sort. */
 
-/* Maximum order of merge (external sort only).  The maximum
-   reasonable value is about 7.  Above that, it would be a good
-   idea to use a heap in merge_once() to select the minimum. */
-#define MAX_MERGE_ORDER 7
+static void
+output_record (struct sort_writer *sort)
+{
+  struct ccase min_case;
+  casenumber min_run_id;
 
-/* Results of an external sort. */
-struct external_sort 
-  {
-    const struct sort_criteria *criteria; /* Sort criteria. */
-    size_t value_cnt;                 /* Size of data in `union value's. */
-    struct casefile **runs;           /* Array of initial runs. */
-    size_t run_cnt, run_cap;          /* Number of runs, allocated capacity. */
-    struct casefile_factory *factory; /* Factory used to  create the result */
-  };
+  pqueue_pop (sort->pqueue, &min_case, &min_run_id);
+#if 0
+  printf ("\toutput: %f to run %d\n", case_num_idx (&min_case, 0), min_run_id);
+#endif
 
-/* Prototypes for helper functions. */
-static int write_runs (struct external_sort *, struct casereader *);
-static struct casefile *merge (struct external_sort *);
-static void destroy_external_sort (struct external_sort *);
-
-/* Performs a stable external sort of the active file according
-   to the specification in SCP.  Forms initial runs using a heap
-   as a reservoir.  Merges the initial runs according to a
-   pattern that assures stability. */
-static struct casefile *
-do_external_sort (struct casereader *reader,
-                  const struct sort_criteria *criteria,
-                 struct casefile_factory *factory
-                 )
-{
-  struct external_sort *xsrt;
-
-  if (!casefile_to_disk (casereader_get_casefile (reader)))
-    return NULL;
-
-  xsrt = xmalloc (sizeof *xsrt);
-  xsrt->criteria = criteria;
-  xsrt->value_cnt = casefile_get_value_cnt (casereader_get_casefile (reader));
-  xsrt->run_cap = 512;
-  xsrt->run_cnt = 0;
-  xsrt->runs = xnmalloc (xsrt->run_cap, sizeof *xsrt->runs);
-  xsrt->factory = factory;
-  if (write_runs (xsrt, reader))
+  if (sort->run_id != min_run_id && sort->run != NULL) 
     {
-      struct casefile *output = merge (xsrt);
-      destroy_external_sort (xsrt);
-      return output;
+      merge_append (sort->merge, casewriter_make_reader (sort->run));
+      sort->run = NULL; 
     }
-  else
+  if (sort->run == NULL) 
     {
-      destroy_external_sort (xsrt);
-      return NULL;
+      sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
+                                           sort->ordering));
+      sort->run_id = min_run_id;
     }
+
+  case_destroy (&sort->run_end);
+  case_clone (&sort->run_end, &min_case);
+  
+  casewriter_write (sort->run, &min_case);
 }
 
-/* Destroys XSRT. */
-static void
-destroy_external_sort (struct external_sort *xsrt) 
-{
-  if (xsrt != NULL) 
-    {
-      int i;
-      
-      for (i = 0; i < xsrt->run_cnt; i++)
-        casefile_destroy (xsrt->runs[i]);
-      free (xsrt->runs);
-      free (xsrt);
-    }
+static struct casewriter_class sort_casewriter_class = 
+  {
+    sort_casewriter_write,
+    sort_casewriter_destroy,
+    sort_casewriter_convert_to_reader,
+  };
+
+/* Reads all the cases from INPUT.  Sorts the cases according to
+   ORDERING.  Returns the sorted cases in a new casereader, or a
+   null pointer if an I/O error occurs.  Both INPUT and ORDERING
+   are destroyed upon return, regardless of success. */
+struct casereader *
+sort_execute (struct casereader *input, struct case_ordering *ordering)
+{
+  struct casewriter *output = sort_create_writer (ordering);
+  casereader_transfer (input, output);
+  return casewriter_make_reader (output);
 }
 
-/* Replacement selection. */
-
-/* Pairs a record with a run number. */
-struct record_run
+struct pqueue 
   {
-    int run;                    /* Run number of case. */
-    struct ccase record;        /* Case data. */
-    size_t idx;                 /* Case number (for stability). */
+    struct case_ordering *ordering;
+    struct pqueue_record *records;
+    size_t record_cnt;
+    size_t record_cap;
+    casenumber idx;
   };
 
-/* Represents a set of initial runs during an external sort. */
-struct initial_run_state 
+struct pqueue_record
   {
-    struct external_sort *xsrt;
-
-    /* Reservoir. */
-    struct record_run *records; /* Records arranged as a heap. */
-    size_t record_cnt;          /* Current number of records. */
-    size_t record_cap;          /* Capacity for records. */
-    
-    /* Run currently being output. */
-    int run;                    /* Run number. */
-    size_t case_cnt;            /* Number of cases so far. */
-    struct casefile *casefile;  /* Output file. */
-    struct ccase last_output;   /* Record last output. */
-
-    int okay;                   /* Zero if an error has been encountered. */
+    casenumber id;
+    struct ccase c;
+    casenumber idx;
   };
 
-static bool destroy_initial_run_state (struct initial_run_state *);
-static void process_case (struct initial_run_state *, 
-                         const struct ccase *, size_t);
-static int allocate_cases (struct initial_run_state *);
-static void output_record (struct initial_run_state *);
-static void start_run (struct initial_run_state *);
-static void end_run (struct initial_run_state *);
-static int compare_record_run (const struct record_run *,
-                               const struct record_run *,
-                               const struct initial_run_state *);
-static int compare_record_run_minheap (const void *, const void *, 
-                                      const void *);
+static int compare_pqueue_records_minheap (const void *a, const void *b,
+                                           const void *pq_);
 
-/* Reads cases from READER and composes initial runs in XSRT. */
-static int
-write_runs (struct external_sort *xsrt, struct casereader *reader)
+static struct pqueue *
+pqueue_create (const struct case_ordering *ordering) 
 {
-  struct initial_run_state *irs;
-  struct ccase c;
-  size_t idx = 0;
-  int success = 0;
-
-  /* Allocate memory for cases. */
-  irs = xmalloc (sizeof *irs);
-  irs->xsrt = xsrt;
-  irs->records = NULL;
-  irs->record_cnt = irs->record_cap = 0;
-  irs->run = 0;
-  irs->case_cnt = 0;
-  irs->casefile = NULL;
-  case_nullify (&irs->last_output);
-  irs->okay = 1;
-  if (!allocate_cases (irs)) 
-    goto done;
-
-  /* Create initial runs. */
-  start_run (irs);
-  for (; irs->okay && casereader_read (reader, &c); case_destroy (&c))
-    process_case (irs, &c, idx++);
-  while (irs->okay && irs->record_cnt > 0)
-    output_record (irs);
-  end_run (irs);
-
-  success = irs->okay;
-
- done:
-  if (!destroy_initial_run_state (irs))
-    success = false;
+  struct pqueue *pq;
 
-  return success;
-}
+  pq = xmalloc (sizeof *pq);
+  pq->ordering = case_ordering_clone (ordering);
+  pq->record_cap
+    = get_workspace_cases (case_ordering_get_value_cnt (ordering));
+  if (pq->record_cap > max_buffers)
+    pq->record_cap = max_buffers;
+  else if (pq->record_cap < min_buffers)
+    pq->record_cap = min_buffers;
+  pq->record_cnt = 0;
+  pq->records = xnmalloc (pq->record_cap, sizeof *pq->records);
+  pq->idx = 0;
 
-/* Add a single case to an initial run. */
-static void
-process_case (struct initial_run_state *irs, const struct ccase *c, 
-             size_t idx)
-{
-  struct record_run *rr;
-
-  /* Compose record_run for this run and add to heap. */
-  assert (irs->record_cnt < irs->record_cap - 1);
-  rr = irs->records + irs->record_cnt++;
-  case_copy (&rr->record, 0, c, 0, irs->xsrt->value_cnt);
-  rr->run = irs->run;
-  rr->idx = idx;
-  if (!case_is_null (&irs->last_output)
-      && compare_record (c, &irs->last_output, irs->xsrt->criteria) < 0)
-    rr->run = irs->run + 1;
-  push_heap (irs->records, irs->record_cnt, sizeof *irs->records,
-             compare_record_run_minheap, irs);
-
-  /* Output a record if the reservoir is full. */
-  if (irs->record_cnt == irs->record_cap - 1 && irs->okay)
-    output_record (irs);
+  return pq; 
 }
 
-/* Destroys the initial run state represented by IRS.
-   Returns true if successful, false if an I/O error occurred. */
-static bool
-destroy_initial_run_state (struct initial_run_state *irs) 
-{
-  int i;
-  bool ok = true;
-
-  if (irs == NULL)
-    return true;
-
-  for (i = 0; i < irs->record_cap; i++)
-    case_destroy (&irs->records[i].record);
-  free (irs->records);
-
-  if (irs->casefile != NULL)
-    ok = casefile_sleep (irs->casefile);
-
-  free (irs);
-  return ok;
-}
-
-/* Allocates room for lots of cases as a buffer. */
-static int
-allocate_cases (struct initial_run_state *irs)
-{
-  int approx_case_cost; /* Approximate memory cost of one case in bytes. */
-  int max_cases;        /* Maximum number of cases to allocate. */
-  int i;
-
-  /* Allocate as many cases as we can within the workspace
-     limit. */
-  approx_case_cost = (sizeof *irs->records
-                      + irs->xsrt->value_cnt * sizeof (union value)
-                      + 4 * sizeof (void *));
-  max_cases = get_workspace() / approx_case_cost;
-  if (max_cases > max_buffers)
-    max_cases = max_buffers;
-  irs->records = nmalloc (sizeof *irs->records, max_cases);
-  if (irs->records != NULL)
-    for (i = 0; i < max_cases; i++)
-      if (!case_try_create (&irs->records[i].record, irs->xsrt->value_cnt))
-        {
-          max_cases = i;
-          break;
-        }
-  irs->record_cap = max_cases;
-
-  /* Fail if we didn't allocate an acceptable number of cases. */
-  if (irs->records == NULL || max_cases < min_buffers)
-    {
-      msg (SE, _("Out of memory.  Could not allocate room for minimum of %d "
-                "cases of %d bytes each.  (PSPP workspace is currently "
-                "restricted to a maximum of %lu KB.)"),
-          min_buffers, approx_case_cost,
-           (unsigned long int) (get_workspace() / 1024));
-      return 0;
-    }
-  return 1;
-}
-
-/* Compares the VAR_CNT variables in VARS[] between the `value's at
-   A and B, and returns a strcmp()-type result. */
-static int
-compare_record (const struct ccase *a, const struct ccase *b,
-                const struct sort_criteria *criteria)
+static void
+pqueue_destroy (struct pqueue *pq) 
 {
-  int i;
-
-  assert (a != NULL);
-  assert (b != NULL);
-  
-  for (i = 0; i < criteria->crit_cnt; i++)
+  if (pq != NULL) 
     {
-      const struct sort_criterion *c = &criteria->crits[i];
-      int result;
-      
-      if (c->width == 0)
+      while (!pqueue_is_empty (pq)) 
         {
-          double af = case_num_idx (a, c->fv);
-          double bf = case_num_idx (b, c->fv);
-          
-          result = af < bf ? -1 : af > bf;
+          struct ccase c;
+          casenumber id;
+          pqueue_pop (pq, &c, &id);
+          case_destroy (&c);
         }
-      else
-        result = memcmp (case_str_idx (a, c->fv),
-                         case_str_idx (b, c->fv), c->width);
-
-      if (result != 0)
-        return c->dir == SRT_ASCEND ? result : -result;
+      case_ordering_destroy (pq->ordering);
+      free (pq->records);
+      free (pq);
     }
-
-  return 0;
 }
 
-/* Compares record-run tuples A and B on run number first, then
-   on record, then on case index. */
-static int
-compare_record_run (const struct record_run *a,
-                    const struct record_run *b,
-                    const struct initial_run_state *irs)
+static bool
+pqueue_is_full (const struct pqueue *pq) 
 {
-  int result = a->run < b->run ? -1 : a->run > b->run;
-  if (result == 0)
-    result = compare_record (&a->record, &b->record, irs->xsrt->criteria);
-  if (result == 0)
-    result = a->idx < b->idx ? -1 : a->idx > b->idx;
-  return result;
+  return pq->record_cnt >= pq->record_cap;
 }
 
-/* Compares record-run tuples A and B on run number first, then
-   on the current record according to SCP, but in descending
-   order. */
-static int
-compare_record_run_minheap (const void *a, const void *b, const void *irs) 
+static bool
+pqueue_is_empty (const struct pqueue *pq) 
 {
-  return -compare_record_run (a, b, irs);
+  return pq->record_cnt == 0;
 }
 
-/* Begins a new initial run, specifically its output file. */
 static void
-start_run (struct initial_run_state *irs)
+pqueue_push (struct pqueue *pq, struct ccase *c, casenumber id) 
 {
-  irs->run++;
-  irs->case_cnt = 0;
-
-  /* This casefile is internal to the sort, so don't use the factory
-     to create it. */
-  irs->casefile = fastfile_create (irs->xsrt->value_cnt);
-  casefile_to_disk (irs->casefile);
-  case_nullify (&irs->last_output); 
-}
+  struct pqueue_record *r;
+  
+  assert (!pqueue_is_full (pq));
 
-/* Ends the current initial run.  */
-static void
-end_run (struct initial_run_state *irs)
-{
-  struct external_sort *xsrt = irs->xsrt;
+  r = &pq->records[pq->record_cnt++];
+  r->id = id;
+  case_move (&r->c, c);
+  r->idx = pq->idx++;
 
-  /* Record initial run. */
-  if (irs->casefile != NULL) 
-    {
-      casefile_sleep (irs->casefile);
-      if (xsrt->run_cnt >= xsrt->run_cap) 
-        {
-          xsrt->run_cap *= 2;
-          xsrt->runs = xnrealloc (xsrt->runs,
-                                  xsrt->run_cap, sizeof *xsrt->runs);
-        }
-      xsrt->runs[xsrt->run_cnt++] = irs->casefile;
-      if (casefile_error (irs->casefile))
-        irs->okay = false;
-      irs->casefile = NULL; 
-    }
+  push_heap (pq->records, pq->record_cnt, sizeof *pq->records,
+             compare_pqueue_records_minheap, pq);
 }
 
-/* Writes a record to the current initial run. */
 static void
-output_record (struct initial_run_state *irs)
+pqueue_pop (struct pqueue *pq, struct ccase *c, casenumber *id) 
 {
-  struct record_run *record_run;
-  struct ccase case_tmp;
-  
-  /* Extract minimum case from heap. */
-  assert (irs->record_cnt > 0);
-  pop_heap (irs->records, irs->record_cnt--, sizeof *irs->records,
-            compare_record_run_minheap, irs);
-  record_run = irs->records + irs->record_cnt;
-
-  /* Bail if an error has occurred. */
-  if (!irs->okay)
-    return;
-
-  /* Start new run if necessary. */
-  assert (record_run->run == irs->run
-          || record_run->run == irs->run + 1);
-  if (record_run->run != irs->run)
-    {
-      end_run (irs);
-      start_run (irs);
-    }
-  assert (record_run->run == irs->run);
-  irs->case_cnt++;
+  struct pqueue_record *r;
 
-  /* Write to disk. */
-  if (irs->casefile != NULL)
-    casefile_append (irs->casefile, &record_run->record);
-
-  /* This record becomes last_output. */
-  irs->last_output = case_tmp = record_run->record;
-  record_run->record = irs->records[irs->record_cap - 1].record;
-  irs->records[irs->record_cap - 1].record = case_tmp;
-}
-
-/* Merging. */
+  assert (!pqueue_is_empty (pq));
 
-static int choose_merge (struct casefile *runs[], int run_cnt, int order);
-static struct casefile *merge_once (struct external_sort *,
-                                    struct casefile *[], size_t);
-
-/* Repeatedly merges run until only one is left,
-   and returns the final casefile.
-   Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge (struct external_sort *xsrt)
-{
-  while (xsrt->run_cnt > 1)
-    {
-      int order = MIN (MAX_MERGE_ORDER, xsrt->run_cnt);
-      int idx = choose_merge (xsrt->runs, xsrt->run_cnt, order);
-      xsrt->runs[idx] = merge_once (xsrt, xsrt->runs + idx, order);
-      remove_range (xsrt->runs, xsrt->run_cnt, sizeof *xsrt->runs,
-                    idx + 1, order - 1);
-      xsrt->run_cnt -= order - 1;
+  pop_heap (pq->records, pq->record_cnt--, sizeof *pq->records,
+            compare_pqueue_records_minheap, pq);
 
-      if (xsrt->runs[idx] == NULL)
-        return NULL;
-    }
-  assert (xsrt->run_cnt == 1);
-  xsrt->run_cnt = 0;
-  return xsrt->runs[0];
+  r = &pq->records[pq->record_cnt];
+  *id = r->id;
+  case_move (c, &r->c);
 }
 
-/* Chooses ORDER runs out of the RUN_CNT runs in RUNS to merge,
-   and returns the index of the first one.
-
-   For stability, we must merge only consecutive runs.  For
-   efficiency, we choose the shortest consecutive sequence of
-   runs. */
+/* Compares record-run tuples A and B on id, then on case data,
+   then on insertion order, in descending order. */
 static int
-choose_merge (struct casefile *runs[], int run_cnt, int order) 
+compare_pqueue_records_minheap (const void *a_, const void *b_,
+                                const void *pq_) 
 {
-  int min_idx, min_sum;
-  int cur_idx, cur_sum;
-  int i;
-
-  /* Sum up the length of the first ORDER runs. */
-  cur_sum = 0;
-  for (i = 0; i < order; i++)
-    cur_sum += casefile_get_case_cnt (runs[i]);
-
-  /* Find the shortest group of ORDER runs,
-     using a running total for efficiency. */
-  min_idx = 0;
-  min_sum = cur_sum;
-  for (cur_idx = 1; cur_idx + order <= run_cnt; cur_idx++)
-    {
-      cur_sum -= casefile_get_case_cnt (runs[cur_idx - 1]);
-      cur_sum += casefile_get_case_cnt (runs[cur_idx + order - 1]);
-      if (cur_sum < min_sum)
-        {
-          min_sum = cur_sum;
-          min_idx = cur_idx;
-        }
-    }
-
-  return min_idx;
-}
-
-/* Merges the RUN_CNT initial runs specified in INPUT_FILES into a
-   new run, and returns the new run.
-   Returns a null pointer if an I/O error occurs. */
-static struct casefile *
-merge_once (struct external_sort *xsrt,
-            struct casefile **const input_files,
-            size_t run_cnt)
-{
-  struct run
-    {
-      struct casefile *file;
-      struct casereader *reader;
-      struct ccase ccase;
-    }
-  *runs;
-
-  struct casefile *output = NULL;
-  int i;
-
-  /* Open input files. */
-  runs = xnmalloc (run_cnt, sizeof *runs);
-  for (i = 0; i < run_cnt; i++) 
-    {
-      struct run *r = &runs[i];
-      r->file = input_files[i];
-      r->reader = casefile_get_destructive_reader (r->file);
-      if (!casereader_read_xfer (r->reader, &r->ccase))
-        {
-          run_cnt--;
-          i--;
-        }
-    }
-
-  /* Create output file. */
-  output = xsrt->factory->create_casefile (xsrt->factory, xsrt->value_cnt);
-  casefile_to_disk (output);
-
-  /* Merge. */
-  while (run_cnt > 0) 
-    {
-      struct run *min_run, *run;
-      
-      /* Find minimum. */
-      min_run = runs;
-      for (run = runs + 1; run < runs + run_cnt; run++)
-       if (compare_record (&run->ccase, &min_run->ccase, xsrt->criteria) < 0)
-          min_run = run;
-
-      /* Write minimum to output file. */
-      casefile_append_xfer (output, &min_run->ccase);
-
-      /* Read another case from minimum run. */
-      if (!casereader_read_xfer (min_run->reader, &min_run->ccase))
-        {
-          if (casefile_error (min_run->file) || casefile_error (output))
-            goto error;
-          casereader_destroy (min_run->reader);
-          casefile_destroy (min_run->file);
-
-          remove_element (runs, run_cnt, sizeof *runs, min_run - runs);
-          run_cnt--;
-        } 
-    }
-
-  if (!casefile_sleep (output))
-    goto error;
-  free (runs);
-
-  return output;
-
- error:
-  for (i = 0; i < run_cnt; i++) 
-    casefile_destroy (runs[i].file);
-  casefile_destroy (output);
-  free (runs);
-  return NULL;
+  const struct pqueue_record *a = a_;
+  const struct pqueue_record *b = b_;
+  const struct pqueue *pq = pq_;
+  int result = a->id < b->id ? -1 : a->id > b->id;
+  if (result == 0)
+    result = case_ordering_compare_cases (&a->c, &b->c, pq->ordering);
+  if (result == 0)
+    result = a->idx < b->idx ? -1 : a->idx > b->idx;
+  return -result;
 }
Index: merge/src/math/sort.h
===================================================================
--- merge.orig/src/math/sort.h  2007-06-05 09:16:11.000000000 -0700
+++ merge/src/math/sort.h       2007-06-05 09:18:06.000000000 -0700
@@ -1,5 +1,5 @@
 /* PSPP - computes sample statistics.
-   Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -16,57 +16,18 @@
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301, USA. */
 
-#if !sort_h
-#define sort_h 1
+#ifndef MATH_SORT_H
+#define MATH_SORT_H 1
 
 #include <stddef.h>
 #include <stdbool.h>
 
-struct casereader;
-struct dictionary;
-struct variable;
-struct casefile_factory;
+struct case_ordering;
 
 extern int min_buffers ;
 extern int max_buffers ;
-extern bool allow_internal_sort ;
 
+struct casewriter *sort_create_writer (struct case_ordering *);
+struct casereader *sort_execute (struct casereader *, struct case_ordering *);
 
-/* Sort direction. */
-enum sort_direction
-  {
-    SRT_ASCEND,                        /* A, B, C, ..., X, Y, Z. */
-    SRT_DESCEND                        /* Z, Y, X, ..., C, B, A. */
-  };
-
-/* A sort criterion. */
-struct sort_criterion
-  {
-    int fv;                     /* Variable data index. */
-    int width;                  /* 0=numeric, otherwise string width. */
-    enum sort_direction dir;    /* Sort direction. */
-  };
-
-/* A set of sort criteria. */
-struct sort_criteria 
-  {
-    struct sort_criterion *crits;
-    size_t crit_cnt;
-  };
-
-
-void sort_destroy_criteria (struct sort_criteria *);
-
-struct casefile *sort_execute (struct casereader *,
-                               const struct sort_criteria *,
-                              struct casefile_factory *
-                              );
-
-struct dataset ;
-bool sort_active_file_in_place (struct dataset *ds, 
-                               const struct sort_criteria *);
-
-struct casefile *sort_active_file_to_casefile (struct dataset *ds, 
-                                              const struct sort_criteria *);
-
-#endif /* !sort_h */
+#endif /* math/sort.h */
Index: merge/src/ui/automake.mk
===================================================================
--- merge.orig/src/ui/automake.mk       2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/automake.mk    2007-06-05 09:18:06.000000000 -0700
@@ -10,6 +10,4 @@
 
 src_ui_libuicommon_a_SOURCES = \
        src/ui/debugger.c \
-       src/ui/debugger.h \
-       src/ui/flexifile.c \
-       src/ui/flexifile.h
+       src/ui/debugger.h
Index: merge/src/ui/gui/automake.mk
===================================================================
--- merge.orig/src/ui/gui/automake.mk   2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/gui/automake.mk        2007-06-05 09:18:06.000000000 -0700
@@ -84,8 +84,6 @@
        src/ui/gui/dialog-common.h \
        src/ui/gui/dict-display.c \
        src/ui/gui/dict-display.h \
-       src/ui/gui/flexifile-factory.h \
-       src/ui/gui/flexifile-factory.c \
        src/ui/gui/main.c \
        src/ui/gui/message-dialog.c \
        src/ui/gui/message-dialog.h \
Index: merge/src/ui/gui/helper.c
===================================================================
--- merge.orig/src/ui/gui/helper.c      2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/gui/helper.c   2007-06-05 09:18:06.000000000 -0700
@@ -29,7 +29,6 @@
 #include <data/data-in.h>
 #include <data/data-out.h>
 #include <data/dictionary.h>
-#include <data/storage-stream.h>
 #include <libpspp/message.h>
 
 #include <libpspp/i18n.h>
@@ -171,7 +170,7 @@
 {
   struct lexer *lexer;
 
-  g_return_val_if_fail (proc_has_source (the_dataset), FALSE);
+  g_return_val_if_fail (proc_has_active_file (the_dataset), FALSE);
 
   lexer = lex_create (the_source_stream);
 
@@ -189,18 +188,10 @@
 
   lex_destroy (lexer);
 
-  /* The GUI must *always* have a data source, even if it's an empty one.
-     Therefore, we find that there is none, (for example NEW FILE was the last
-     item in the syntax) then we create a new one. */
-  if ( ! proc_has_source (the_dataset))
-    proc_set_source (the_dataset,
-                    storage_source_create 
(the_data_store->case_file->flexifile)
-                    );
-
   /* GUI syntax needs this implicit EXECUTE command at the end of
      every script.  Otherwise commands like GET could leave the GUI without
      a casefile. */
-  return procedure (the_dataset, NULL, NULL);
+  return proc_execute (the_dataset);
 }
 
 
Index: merge/src/ui/gui/missing-val-dialog.c
===================================================================
--- merge.orig/src/ui/gui/missing-val-dialog.c  2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/ui/gui/missing-val-dialog.c       2007-06-05 09:18:06.000000000 
-0700
@@ -90,8 +90,8 @@
       gint nvals = 0;
       gint badvals = 0;
       gint i;
-      mv_clear (&dialog->mvl);
-      for (i = 0 ; i < 3 ; ++i )
+      mv_clear(&dialog->mvl);
+      for(i = 0 ; i < 3 ; ++i ) 
        {
          gchar *text =
            g_strdup (gtk_entry_get_text (GTK_ENTRY (dialog->mv[i])));
Index: merge/src/ui/gui/psppire-case-file.c
===================================================================
--- merge.orig/src/ui/gui/psppire-case-file.c   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/ui/gui/psppire-case-file.c        2007-06-05 09:18:06.000000000 
-0700
@@ -26,13 +26,14 @@
 #include <gtksheet/gtkextra-marshal.h>
 
 #include <data/case.h>
-#include <ui/flexifile.h>
-#include "flexifile-factory.h"
-#include <data/casefile.h>
 #include <data/data-in.h>
+#include <data/datasheet.h>
 #include <math/sort.h>
 #include <libpspp/misc.h>
 
+#include "xalloc.h"
+#include "xallocsa.h"
+
 /* --- prototypes --- */
 static void psppire_case_file_class_init       (PsppireCaseFileClass   *class);
 static void psppire_case_file_init     (PsppireCaseFile        *case_file);
@@ -132,8 +133,7 @@
 {
   PsppireCaseFile *cf = PSPPIRE_CASE_FILE (object);
 
-  if ( cf->flexifile)
-    casefile_destroy (cf->flexifile);
+  datasheet_destroy (cf->datasheet);
 
   G_OBJECT_CLASS (parent_class)->finalize (object);
 }
@@ -141,7 +141,7 @@
 static void
 psppire_case_file_init (PsppireCaseFile *cf)
 {
-  cf->flexifile = 0;
+  cf->datasheet = NULL;
 }
 
 
@@ -156,16 +156,16 @@
 {
   PsppireCaseFile *cf = g_object_new (G_TYPE_PSPPIRE_CASE_FILE, NULL);
 
-  cf->flexifile = flexifile_create (0);
+  cf->datasheet = datasheet_create (NULL);
 
   return cf;
 }
 
 
 void
-psppire_case_file_replace_flexifile (PsppireCaseFile *cf, struct flexifile *ff)
+psppire_case_file_replace_datasheet (PsppireCaseFile *cf, struct datasheet *ds)
 {
-  cf->flexifile = (struct casefile *) ff;
+  cf->datasheet = ds;
 }
 
 
@@ -173,16 +173,14 @@
 gboolean
 psppire_case_file_delete_cases (PsppireCaseFile *cf, gint n_cases, gint first)
 {
-  int result;
-
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  result =  flexifile_delete_cases (FLEXIFILE (cf->flexifile), n_cases,  
first);
+  datasheet_delete_rows (cf->datasheet, first, n_cases);
 
   g_signal_emit (cf, signals [CASES_DELETED], 0, n_cases, first);
 
-  return result;
+  return TRUE;
 }
 
 /* Insert case CC into the case file before POSN */
@@ -191,12 +189,14 @@
                              struct ccase *cc,
                              gint posn)
 {
+  struct ccase tmp;
   bool result ;
 
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  result = flexifile_insert_case (FLEXIFILE (cf->flexifile), cc, posn);
+  case_clone (&tmp, cc);
+  result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
 
   if ( result )
     g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
@@ -212,15 +212,17 @@
 psppire_case_file_append_case (PsppireCaseFile *cf,
                              struct ccase *c)
 {
+  struct ccase tmp;
   bool result ;
   gint posn ;
 
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  posn = casefile_get_case_cnt (cf->flexifile);
+  posn = datasheet_get_row_cnt (cf->datasheet);
 
-  result = casefile_append (cf->flexifile, c);
+  case_clone (&tmp, c);
+  result = datasheet_insert_rows (cf->datasheet, posn, &tmp, 1);
 
   g_signal_emit (cf, signals [CASE_INSERTED], 0, posn);
 
@@ -233,69 +235,68 @@
 {
   g_return_val_if_fail (cf, FALSE);
 
-  if ( ! cf->flexifile)
+  if ( ! cf->datasheet)
     return 0;
 
-  return casefile_get_case_cnt (cf->flexifile);
+  return datasheet_get_row_cnt (cf->datasheet);
 }
 
-/* Return the IDXth value from case CASENUM.
-   The return value must not be freed or written to
- */
-const union value *
-psppire_case_file_get_value (const PsppireCaseFile *cf, gint casenum, gint idx)
+/* Copies the IDXth value from case CASENUM into VALUE.
+   If VALUE is null, then memory is allocated is allocated with
+   malloc.  Returns the value if successful, NULL on failure. */
+union value *
+psppire_case_file_get_value (const PsppireCaseFile *cf,
+                             casenumber casenum, size_t idx,
+                             union value *value, int width)
 {
-  const union value *v;
-  struct ccase c;
-
-  g_return_val_if_fail (cf, NULL);
-  g_return_val_if_fail (cf->flexifile, NULL);
-
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), NULL);
+  bool allocated;
+  
+  g_return_val_if_fail (cf, false);
+  g_return_val_if_fail (cf->datasheet, false);
 
-  flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &c);
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), false);
 
-  v = case_data_idx (&c, idx);
-  case_destroy (&c);
-
-  return v;
+  if (value == NULL) 
+    {
+      value = xnmalloc (value_cnt_from_width (width), sizeof *value);
+      allocated = true;
+    }
+  else
+    allocated = false;
+  if (!datasheet_get_value (cf->datasheet, casenum, idx, value, width))
+    {
+      if (allocated) 
+        free (value);
+      value = NULL;
+    }
+  return value;
 }
 
 void
 psppire_case_file_clear (PsppireCaseFile *cf)
 {
-  casefile_destroy (cf->flexifile);
-  cf->flexifile = 0;
+  datasheet_destroy (cf->datasheet);
+  cf->datasheet = NULL;
   g_signal_emit (cf, signals [CASES_DELETED], 0, 0, -1);
 }
 
-/* Set the IDXth value of case C to SYSMIS/EMPTY */
+/* Set the IDXth value of case C to V.
+   Returns true if successful, false on I/O error. */
 gboolean
 psppire_case_file_set_value (PsppireCaseFile *cf, gint casenum, gint idx,
                            union value *v, gint width)
 {
-  struct ccase cc ;
-  int bytes;
+  bool ok;
 
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
-
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
-    return FALSE;
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
 
-  if ( width == 0 )
-    bytes = MAX_SHORT_STRING;
-  else
-    bytes = DIV_RND_UP (width, MAX_SHORT_STRING) * MAX_SHORT_STRING ;
-
-  /* Cast away const in flagrant abuse of the casefile */
-  memcpy ((union value *)case_data_idx (&cc, idx), v, bytes);
-
-  g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
-
-  return TRUE;
+  ok = datasheet_put_value (cf->datasheet, casenum, idx, v, width);
+  if (ok)
+    g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+  return ok;
 }
 
 
@@ -305,49 +306,43 @@
 psppire_case_file_data_in (PsppireCaseFile *cf, gint casenum, gint idx,
                           struct substring input, const struct fmt_spec *fmt)
 {
-  struct ccase cc ;
+  union value *value;
+  int width;
+  bool ok;
 
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  g_return_val_if_fail (idx < casefile_get_value_cnt (cf->flexifile), FALSE);
+  g_return_val_if_fail (idx < datasheet_get_column_cnt (cf->datasheet), FALSE);
 
-  if ( ! flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, &cc) )
-    return FALSE;
+  width = fmt_var_width (fmt);
+  value = xallocsa (value_cnt_from_width (width) * sizeof *value);
+  ok = (datasheet_get_value (cf->datasheet, casenum, idx, value, width)
+        && data_in (input, fmt->type, 0, 0, value, width)
+        && datasheet_put_value (cf->datasheet, casenum, idx, value, width));
 
-  /* Cast away const in flagrant abuse of the casefile */
-  if (!data_in (input, fmt->type, 0, 0,
-                (union value *) case_data_idx (&cc, idx), fmt_var_width (fmt)))
-    g_warning ("Cant set value\n");
+  if (ok)
+    g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
 
-  g_signal_emit (cf, signals [CASE_CHANGED], 0, casenum);
+  freesa (value);
 
   return TRUE;
 }
 
 
 void
-psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria *sc)
+psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *ordering)
 {
+  struct casereader *sorted_data;
   gint c;
 
-  struct casereader *reader = casefile_get_reader (cf->flexifile, NULL);
-  struct casefile *cfile;
-
-  struct casefile_factory *factory  = flexifile_factory_create ();
-
-  cfile = sort_execute (reader, sc, factory);
-
-  casefile_destroy (cf->flexifile);
-
-  cf->flexifile = cfile;
+  sorted_data = sort_execute (datasheet_make_reader (cf->datasheet), ordering);
+  cf->datasheet = datasheet_create (sorted_data);
 
   /* FIXME: Need to have a signal to change a range of cases, instead of
      calling a signal many times */
-  for ( c = 0 ; c < casefile_get_case_cnt (cf->flexifile) ; ++c )
+  for ( c = 0 ; c < datasheet_get_row_cnt (cf->datasheet) ; ++c )
     g_signal_emit (cf, signals [CASE_CHANGED], 0, c);
-
-  flexifile_factory_destroy (factory);
 }
 
 
@@ -357,16 +352,17 @@
 psppire_case_file_insert_values (PsppireCaseFile *cf,
                                 gint n_values, gint before)
 {
+  union value *values;
   g_return_val_if_fail (cf, FALSE);
 
-  if ( ! cf->flexifile )
-    {
-      cf->flexifile = flexifile_create (n_values);
+  if ( ! cf->datasheet )
+    cf->datasheet = datasheet_create (NULL);
 
-      return TRUE;
-    }
+  values = xcalloc (n_values, sizeof *values);
+  datasheet_insert_columns (cf->datasheet, values, n_values, before);
+  free (values);
 
-  return flexifile_resize (FLEXIFILE (cf->flexifile), n_values, before);
+  return TRUE;
 }
 
 /* Fills C with the CASENUMth case.
@@ -377,7 +373,7 @@
                           struct ccase *c)
 {
   g_return_val_if_fail (cf, FALSE);
-  g_return_val_if_fail (cf->flexifile, FALSE);
+  g_return_val_if_fail (cf->datasheet, FALSE);
 
-  return flexifile_get_case (FLEXIFILE (cf->flexifile), casenum, c);
+  return datasheet_get_row (cf->datasheet, casenum, c);
 }
Index: merge/src/ui/gui/psppire-case-file.h
===================================================================
--- merge.orig/src/ui/gui/psppire-case-file.h   2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/ui/gui/psppire-case-file.h        2007-06-05 09:18:06.000000000 
-0700
@@ -26,6 +26,7 @@
 #include <glib.h>
 
 #include <libpspp/str.h>
+#include <data/case.h>
 
 
 
@@ -55,7 +56,7 @@
 {
   GObject             parent;
 
-  struct casefile *flexifile;
+  struct datasheet *datasheet;
 };
 
 
@@ -75,8 +76,9 @@
 gint psppire_case_file_get_case_count (const PsppireCaseFile *cf);
 
 
-const union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
-                                             gint c, gint idx);
+union value * psppire_case_file_get_value (const PsppireCaseFile *cf,
+                                           casenumber, size_t idx,
+                                           union value *, int width);
 
 struct fmt_spec;
 
@@ -95,14 +97,14 @@
 
 gboolean psppire_case_file_insert_values (PsppireCaseFile *cf, gint n_values, 
gint before);
 
-struct sort_criteria;
-void psppire_case_file_sort (PsppireCaseFile *cf, const struct sort_criteria 
*);
+struct case_ordering;
+void psppire_case_file_sort (PsppireCaseFile *cf, struct case_ordering *);
 
 gboolean psppire_case_file_get_case (const PsppireCaseFile *cf, gint casenum,
                                    struct ccase *c);
 
-void psppire_case_file_replace_flexifile (PsppireCaseFile *,
-                                         struct flexifile *);
+void psppire_case_file_replace_datasheet (PsppireCaseFile *,
+                                          struct datasheet *);
 
 
 
Index: merge/src/ui/gui/psppire-data-store.c
===================================================================
--- merge.orig/src/ui/gui/psppire-data-store.c  2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/ui/gui/psppire-data-store.c       2007-06-05 09:18:06.000000000 
-0700
@@ -25,8 +25,8 @@
 #define _(msgid) gettext (msgid)
 #define N_(msgid) msgid
 
-#include <data/casefile.h>
-#include <data/case.h>
+#include <data/casewriter.h>
+#include <data/datasheet.h>
 #include <data/data-out.h>
 #include <data/variable.h>
 
@@ -454,7 +454,7 @@
 
 
   /* Opportunity for optimisation exists here when creating a blank case */
-  val_cnt = casefile_get_value_cnt (ds->case_file->flexifile) ;
+  val_cnt = datasheet_get_column_cnt (ds->case_file->datasheet) ;
 
   case_create (&cc, val_cnt);
 
@@ -484,7 +484,7 @@
   char *text;
   const struct fmt_spec *fp ;
   const struct variable *pv ;
-  const union value *v ;
+  union value *v ;
   GString *s;
   PsppireDataStore *store = PSPPIRE_DATA_STORE (model);
 
@@ -505,19 +505,19 @@
 
   g_assert (idx >= 0);
 
-  v = psppire_case_file_get_value (store->case_file, row, idx);
-
+  v = psppire_case_file_get_value (store->case_file, row, idx, NULL,
+                                   var_get_width (pv));
+  
   g_return_val_if_fail (v, NULL);
 
   if ( store->show_labels)
     {
-      const struct val_labs * vl = var_get_value_labels (pv);
-
-      const gchar *label;
-      if ( (label = val_labs_find (vl, *v)) )
-       {
+      const gchar *label = var_lookup_value_label (pv, v);
+      if (label)
+        {
+          free (v);
          return pspp_locale_to_utf8 (label, -1, 0);
-       }
+        }
     }
 
   fp = var_get_write_format (pv);
@@ -539,6 +539,7 @@
 
   g_strchomp (text);
 
+  free (v);
   return text;
 }
 
@@ -649,7 +650,7 @@
     3 /* version */
   };
 
-  struct sfm_writer *writer ;
+  struct casewriter *writer;
 
   g_assert (handle);
 
@@ -664,15 +665,10 @@
   for (i = 0 ; i < psppire_case_file_get_case_count (store->case_file); ++i )
     {
       struct ccase c;
-
-      case_create (&c, var_cnt);
       psppire_case_file_get_case (store->case_file, i, &c);
-      sfm_write_case (writer, &c);
-
-      case_destroy (&c);
+      casewriter_write (writer, &c);
     }
-
-  sfm_close_writer (writer);
+  casewriter_destroy (writer);
 }
 
 
Index: merge/src/ui/gui/psppire.c
===================================================================
--- merge.orig/src/ui/gui/psppire.c     2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/gui/psppire.c  2007-06-05 09:18:06.000000000 -0700
@@ -29,16 +29,15 @@
 #include "psppire.h"
 
 
+#include <data/casereader.h>
+#include <data/datasheet.h>
 #include <data/file-handle-def.h>
 #include <data/format.h>
-#include <data/storage-stream.h>
-#include <data/case-source.h>
 #include <data/settings.h>
 #include <data/file-name.h>
 #include <data/procedure.h>
 #include <libpspp/getl.h>
 #include <language/lexer/lexer.h>
-#include <ui/flexifile.h>
 #include <libpspp/version.h>
 
 #include <gtk/gtk.h>
@@ -50,7 +49,6 @@
 #include "data-sheet.h"
 #include "var-sheet.h"
 #include "message-dialog.h"
-#include "flexifile-factory.h"
 
 PsppireDataStore *the_data_store = 0;
 PsppireVarStore *the_var_store = 0;
@@ -68,28 +66,17 @@
 
 
 static void
-replace_flexifile (struct case_source *s)
+replace_casereader (struct casereader *s)
 {
-  if ( NULL == s )
-    psppire_case_file_replace_flexifile (the_data_store->case_file,
-                                        (struct flexifile *) flexifile_create 
(0));
-  else
-    {
-      if ( ! case_source_is_class (s, &storage_source_class))
-       return ;
-
-      psppire_case_file_replace_flexifile (the_data_store->case_file,
-                                          (struct flexifile *)
-                                          storage_source_get_casefile (s));
-    }
-}
-
+  struct datasheet *datasheet = datasheet_create (s);
 
+  psppire_case_file_replace_datasheet (the_data_store->case_file,
+                                       datasheet);
+}
 
 void
 initialize (void)
 {
-  struct casefile_factory *factory;
   PsppireDict *dictionary = 0;
 
   /* gtk_init messes with the locale.
@@ -105,14 +92,12 @@
   fmt_init ();
   settings_init ();
   fh_init ();
-  factory = flexifile_factory_create ();
   the_source_stream =
     create_source_stream (
                          fn_getenv_default ("STAT_INCLUDE_PATH", include_path)
                          );
 
-  the_dataset = create_dataset (factory,
-                               replace_flexifile,
+  the_dataset = create_dataset (replace_casereader,
                                replace_dictionary);
 
   message_dialog_init (the_source_stream);
@@ -127,12 +112,12 @@
   /* Create the model for the var_sheet */
   the_var_store = psppire_var_store_new (dictionary);
 
-
   the_data_store = psppire_data_store_new (dictionary);
 
-  proc_set_source (the_dataset,
-                  storage_source_create (the_data_store->case_file->flexifile)
-                  );
+
+  proc_set_active_file_data (the_dataset,
+                            datasheet_make_reader 
(the_data_store->case_file->datasheet));
+
 
   create_icon_factory ();
 
Index: merge/src/ui/gui/var-sheet.c
===================================================================
--- merge.orig/src/ui/gui/var-sheet.c   2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/gui/var-sheet.c        2007-06-05 09:18:06.000000000 -0700
@@ -33,6 +33,9 @@
 
 #include <stdlib.h>
 #include <string.h>
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
 
 #include <data/value.h>
 
@@ -40,7 +43,6 @@
 #include <gtksheet/gsheet-hetero-column.h>
 #include <gtksheet/gsheet-uniform-row.h>
 
-#include "localcharset.h"
 #include "psppire-var-store.h"
 #include "helper.h"
 #include "psppire-dict.h"
@@ -416,7 +418,9 @@
                               gchar *string2,
                               gint int1, gint int2)
 {
+#if HAVE_LANGINFO_H
   gchar *codeset;
+#endif
   gint i;
   GtkWidget *sheet;
 
@@ -444,11 +448,12 @@
   gtk_sheet_set_model (GTK_SHEET (sheet), G_SHEET_MODEL (the_var_store));
 
 
+#if HAVE_LANGINFO_H
   /* Since this happens inside glade_xml_new, we must prevent strings from
    * being re-encoded twice */
   codeset = bind_textdomain_codeset (PACKAGE, 0);
-  bind_textdomain_codeset (PACKAGE, locale_charset ());
-
+  bind_textdomain_codeset (PACKAGE, nl_langinfo (CODESET));
+#endif
   for (i = 0 ; i < n_COLS ; ++i )
     {
       g_sheet_hetero_column_set_button_label (G_SHEET_HETERO_COLUMN (geo), i,
@@ -457,8 +462,9 @@
       g_sheet_hetero_column_set_width (G_SHEET_HETERO_COLUMN (geo), i,
                                               column_def[i].width);
     }
-
+#if HAVE_LANGINFO_H
   bind_textdomain_codeset (PACKAGE, codeset);
+#endif
 
   gtk_widget_show (sheet);
 
Index: merge/src/ui/terminal/main.c
===================================================================
--- merge.orig/src/ui/terminal/main.c   2007-06-05 09:16:11.000000000 -0700
+++ merge/src/ui/terminal/main.c        2007-06-05 09:18:06.000000000 -0700
@@ -27,7 +27,6 @@
 #include "progname.h"
 #include "read-line.h"
 
-#include <data/fastfile-factory.h>
 #include <data/dictionary.h>
 #include <data/file-handle-def.h>
 #include <libpspp/getl.h>
@@ -86,7 +81,6 @@
 int
 main (int argc, char **argv)
 {
-  struct casefile_factory *factory;
   signal (SIGABRT, bug_handler);
   signal (SIGSEGV, bug_handler);
   signal (SIGFPE, bug_handler);
@@ -111,9 +105,7 @@
   settings_init ();
   random_init ();
 
-  factory = fastfile_factory_create ();
-
-  the_dataset = create_dataset (factory, NULL, NULL);
+  the_dataset = create_dataset (NULL, NULL);
 
   if (parse_command_line (argc, argv, the_source_stream))
     {
Index: merge/tests/automake.mk
===================================================================
--- merge.orig/tests/automake.mk        2007-06-05 09:16:11.000000000 -0700
+++ merge/tests/automake.mk     2007-06-05 09:18:06.000000000 -0700
@@ -118,7 +118,6 @@
        tests/bugs/temp-freq.sh \
        tests/bugs/print-crash.sh \
        tests/bugs/keep-all.sh \
-       tests/xforms/casefile.sh \
        tests/xforms/recode.sh \
        tests/stats/descript-basic.sh \
        tests/stats/descript-missing.sh \
Index: merge/src/ui/gui/val-labs-dialog.h
===================================================================
--- merge.orig/src/ui/gui/val-labs-dialog.h     2007-06-05 09:16:11.000000000 
-0700
+++ merge/src/ui/gui/val-labs-dialog.h  2007-06-05 09:18:06.000000000 -0700
@@ -29,6 +29,7 @@
 
 #include <gtk/gtk.h>
 #include <glade/glade.h>
+#include <data/variable.h>
 
 
 struct val_labs;

--





reply via email to

[Prev in Thread] Current Thread [Next in Thread]