bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: SPAMX: Re: SPAMX: Re: Feature request: gzip/bzip support for split


From: Chandrakumar Muthaiah
Subject: Re: SPAMX: Re: SPAMX: Re: Feature request: gzip/bzip support for split
Date: Mon, 02 Feb 2009 18:04:09 -0500
User-agent: Thunderbird 2.0.0.19 (Windows/20081209)


Jim Meyering wrote:
I love it, that sounds like a good idea. I can make another patch if
no one has done it already.


Alfred M. Szmidt wrote:
   I would like to propose a feature that allows to gzip/bzip on its way
   out during the split and I am also including the patch for the same.

I think a better approach would be to add a --on-output-hook=PROGRAM
command, then one can call any arbitrary command when split outputs a
file.

Thanks for contributing.
Do you have a copyright assignment on file with the Free Software
Foundation?  If not, you'll need to start that process.
Details are here, as well as other important contribution guidelines:

    http://git.sv.gnu.org/cgit/coreutils.git/plain/HACKING
I do not have a copyright assignment on file I will work on that.

Any way I have added the support for --on-output-hook and --on-output-hook-suf

and below is the patch.

--- split-orig.c    2007-03-18 17:36:43.000000000 -0400
+++ split.c    2009-02-02 17:56:24.000000000 -0500
@@ -75,6 +75,13 @@
   output file is opened. */
static bool verbose;

+/* gzip/bzip2 the output file. */
+static int zipoutfile = 0;
+static int outputhook = 0;
+static char *outhook;
+static int outputhooksuf = 0;
+static char *outhooksuf;
+
/* For long options that have no equivalent short option, use a
   non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
enum
@@ -82,6 +89,14 @@
  VERBOSE_OPTION = CHAR_MAX + 1
};

+struct strvars
+{
+    const char *val;
+};
+
+static struct strvars const zsuffixes[] = {{""},  {".gz"}, {".bz2"}};
+static struct strvars const zipcmds[]   = {{""},  {"gzip"}, {"bzip2"}};
+
static struct option const longopts[] =
{
  {"bytes", required_argument, NULL, 'b'},
@@ -90,6 +105,10 @@
  {"suffix-length", required_argument, NULL, 'a'},
  {"numeric-suffixes", no_argument, NULL, 'd'},
  {"verbose", no_argument, NULL, VERBOSE_OPTION},
+  {"gzip", no_argument, NULL, 'z'},
+  {"bzip2", no_argument, NULL, 'j'},
+  {"on-output-hook", required_argument, NULL, 'k'},
+  {"on-output-hook-suf", required_argument, NULL, 's'},
  {GETOPT_HELP_OPTION_DECL},
  {GETOPT_VERSION_OPTION_DECL},
  {NULL, 0, NULL, 0}
@@ -117,11 +136,15 @@
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
      fprintf (stdout, _("\
-  -a, --suffix-length=N   use suffixes of length N (default %d)\n\
-  -b, --bytes=SIZE        put SIZE bytes per output file\n\
- -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
-  -d, --numeric-suffixes  use numeric suffixes instead of alphabetic\n\
-  -l, --lines=NUMBER      put NUMBER lines per output file\n\
+  -a, --suffix-length=N    use suffixes of length N (default %d)\n\
+  -b, --bytes=SIZE         put SIZE bytes per output file\n\
+ -C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
+  -d, --numeric-suffixes   use numeric suffixes instead of alphabetic\n\
+  -l, --lines=NUMBER       put NUMBER lines per output file\n\
+  -z, --gzip               gzip output files\n\
+  -j, --bzip2              bzip2 output files\n\
+  -k, --on-output-hook     custom output compression hook command \n\
+  -s, --on-output-hook-suf custom compresion output extension\n\
"), DEFAULT_SUFFIX_LENGTH);
      fputs (_("\
      --verbose           print a diagnostic to standard error just\n\
@@ -194,6 +217,164 @@
    }
}

+/* Opens a new fd based on the file type seletion
+ */
+
+#define EXIT_FAILURE 1
+#define O_BINARY 0
+
+static int
+fdpopen ( const char *command, const char *mode)
+{
+    int parent_end, child_end;
+    int pipe_fds[2];
+    pid_t child_pid;
+
+    int do_read = 0;
+    int do_write = 0;
+    int do_cloexec = 0;
+
+    while (*mode != '\0')
+    {
+        switch (*mode++)
+        {
+            case 'r':
+                do_read = 1;
+                break;
+            case 'w':
+                do_write = 1;
+                break;
+            case 'e':
+                do_cloexec = 1;
+                break;
+            default:
+            errout:
+                errno = EINVAL;
+                return -1;
+        }
+    }
+
+    if ((do_read ^ do_write) == 0)
+        goto errout;
+
+    if (pipe (pipe_fds) < 0)
+        return -1;
+
+    if (do_read)
+    {
+        parent_end = pipe_fds[0];
+        child_end = pipe_fds[1];
+    }
+    else
+    {
+        parent_end = pipe_fds[1];
+        child_end = pipe_fds[0];
+    }
+    child_pid = fork ();
+
+    if (child_pid == 0)
+    {
+ int child_std_end = do_read ? 1 : 0; /* Make this as the stdin/stdout file descriptor */
+        close (parent_end);
+
+        if (child_end != child_std_end)
+         {
+             dup2 (child_end, child_std_end);
+            close (child_end);
+        }
+
+        execl ("/bin/sh", "sh", "-c", command, (char *) 0);
+        _exit (127);
+    }
+
+    close (child_end);
+    if (child_pid < 0)
+    {
+        close (parent_end);
+        return -1;
+    }
+
+    if (do_cloexec)
+        fcntl (parent_end, F_SETFD, FD_CLOEXEC);
+
+     return parent_end;
+}
+
+static void
+new_fd_pipe()
+{
+    const char* zipcmd = zipcmds[zipoutfile].val;
+    const char* zsuf   = zsuffixes[zipoutfile].val;
+
+    if(outputhook)
+    {
+        zipcmd = outhook;
+        zsuf   = outhooksuf;
+    }
+
+    /* 'gzip > /1/2/3/4/5/outputfile.gz' */
+
+    size_t outzlength = strlen (zipcmd);
+    size_t outlength  = strlen (outfile);
+    size_t zsuflength = strlen (zsuf);
+    size_t tlength    = outzlength + outlength + zsuflength + 3;
+
+    char* outfilez    = xmalloc (tlength + 1);
+    char* ptrpos = outfilez;
+
+    memcpy (ptrpos, zipcmd, outzlength);
+    ptrpos += outzlength;
+    memcpy (ptrpos, " > ", 3);
+    ptrpos += 3;
+    memcpy (ptrpos, outfile, outlength);
+    ptrpos += outlength;
+    memcpy (ptrpos, zsuf, zsuflength);
+    outfile[tlength] = 0;
+
+    if (verbose)
+      fprintf (stderr, _("creating file %s\n"), quote (outfilez));
+
+    output_desc = fdpopen ( outfilez, "we");
+
+    if (output_desc < 0)
+      error (EXIT_FAILURE, errno, "%s", outfilez);
+}
+
+static void
+new_fd_file()
+{
+    if (output_desc >= 0 && close (output_desc) < 0)
+      error (EXIT_FAILURE, errno, "%s", outfile);
+
+    next_file_name ();
+
+    if(!zipoutfile && !outputhook )
+    {
+    if (verbose)
+      fprintf (stderr, _("creating file %s\n"), quote (outfile));
+
+    output_desc = open (outfile,
+          O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+          (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
+           | S_IROTH | S_IWOTH));
+    if (output_desc < 0)
+      error (EXIT_FAILURE, errno, "%s", outfile);
+    }
+    else
+    {
+        new_fd_pipe();
+    }
+}
+
+static void
+new_fd(bool new_file_flag)
+{
+    if (!new_file_flag)
+      return;
+
+    new_fd_file();
+}
+
/* Write BYTES bytes at BP to an output file.
   If NEW_FILE_FLAG is true, open the next output file.
   Otherwise add to the same output file already in use.  */
@@ -201,21 +382,7 @@
static void
cwrite (bool new_file_flag, const char *bp, size_t bytes)
{
-  if (new_file_flag)
-    {
-      if (output_desc >= 0 && close (output_desc) < 0)
-    error (EXIT_FAILURE, errno, "%s", outfile);
-
-      next_file_name ();
-      if (verbose)
-    fprintf (stderr, _("creating file %s\n"), quote (outfile));
-      output_desc = open (outfile,
-              O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-              (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
-               | S_IROTH | S_IWOTH));
-      if (output_desc < 0)
-    error (EXIT_FAILURE, errno, "%s", outfile);
-    }
+    new_fd(new_file_flag);
  if (full_write (output_desc, bp, bytes) != bytes)
    error (EXIT_FAILURE, errno, "%s", outfile);
}
@@ -405,7 +572,7 @@
      /* This is the argv-index of the option we will read next.  */
      int this_optind = optind ? optind : 1;

-      c = getopt_long (argc, argv, "0123456789C:a:b:dl:", longopts, NULL);
+ c = getopt_long (argc, argv, "0123456789C:a:b:dl:k:s:zj", longopts, NULL);
      if (c == -1)
    break;

@@ -493,6 +660,24 @@
      suffix_alphabet = "0123456789";
      break;

+    case 'z':
+      zipoutfile = 1;
+      break;
+
+    case 'j':
+      zipoutfile = 2;
+      break;
+
+    case 'k':
+        outputhook = 1;
+        outhook = optarg;
+      break;
+
+    case 's':
+        outputhooksuf = 1;
+        outhooksuf = optarg;
+        break;
+
    case VERBOSE_OPTION:
      verbose = true;
      break;
@@ -506,6 +691,17 @@
    }
    }

+    if (outputhook && zipoutfile)
+      {
+ error (0, 0, _("Cannot use both gzip/bzip2 switches with on-output-hook"));
+        usage (EXIT_FAILURE);
+      }
+
+    if (outputhook && !outputhooksuf)
+    {
+ error (0, 0, _("Need a suffix for the split output files when used with on-output-hook"));
+        usage (EXIT_FAILURE);
+    }
  /* Handle default case.  */
  if (split_type == type_undef)
    {






reply via email to

[Prev in Thread] Current Thread [Next in Thread]