coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Splitting search results from a "find -print0"


From: Markus Elfring
Subject: Re: Splitting search results from a "find -print0"
Date: Fri, 09 Jan 2015 20:53:23 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.3.0

>> Would you like to recommend any other software tools
>> which provide the desired functionality for splitting
>> of files which contain zero-terminated text lines?
> 
> If you don't mind a bit of glue work, you can achieve the same results
> with existing tools:
> 
> tr '\n\0' '\0\n' < orig > mod
> split mod mod.
> for each f in mod.*; do
>   tr '\n\0' '\0\n' < $f > orig.${f##*.}
> done

I have experimented a bit more with the Python programming language.

Would you like to try the following approach out?


import sys

def split(number, dir):
   """Try to distribute input data over some output files."""
   import io
   input = io.FileIO(sys.stdin.fileno()).readall()

   if number == 1:
      import os
      with open("/".join([os.path.abspath(dir), "x1.part"]), "wb") as output:
         output.write(input)
   else:
      if input == "":
         sys.exit("The standard input was empty.")
      else:
         import os
         abs = os.path.abspath(dir)
         name = ["x", "", ".part"]
         delimiter = b'\0'
         parts = input.split(delimiter)
         count = len(parts)
         
         if count < number:
            map = {}
            
            for key in range(0, number):
               map[key] = []
            
            key = 0

            for part in parts:
               map[key].append(part)
               key += 1

               if key == number:
                  key = 0

            for item in map.items():
               name[1] = str(item[0])
               with open("/".join([abs, ''.join(name)]), "wb") as output:
                  if item[1]:
                     output.write(delimiter.join(item[1]))
                  else:
                     output.write(b"")
         else:
            dm = divmod(count, number)
            portion = dm[0]
            end = portion + dm[1] # remainder handling
            with open("/".join([abs, "x0.part"]), "wb") as output:
               output.write(delimiter.join(parts[0:end]))

            for x in range(1, number):
               start = end
               end += portion
               name[1] = str(x)
               with open("/".join([abs, ''.join(name)]), "wb") as output:
                  output.write(delimiter.join(parts[start:end]))

def main():
   if sys.__stdin__ == None:
      sys.exit("The standard input channel was not usable.")
   else:
      import getopt

      try:
         options, args = getopt.gnu_getopt(sys.argv[1:], "g:o:", ["groups=", 
"output-dir="])
      except getopt.GetoptError as ex:
         sys.exit(ex)

      od = ""
      g = 2

      for parameter, value in options:
         if parameter in ["-g", "--groups"]:
            g = int(value)

            if g < 1:
               sys.exit("invalid parameter value")
         elif parameter in ["-o", "--output-dir"]:
            if value:
               od = value
            else:
               sys.exit("invalid specification for an output directory")
         else:
            sys.exit(": ".join(["unhandled option", parameter]))

      split(g, od)

if __name__ == "__main__":
   main()


Regards,
Markus




reply via email to

[Prev in Thread] Current Thread [Next in Thread]