diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/ChangeLog coreutils-cvs-2-sort-with-isaac/ChangeLog --- coreutils-cvs-1-shred-isaac-split/ChangeLog 2005-11-24 20:10:35.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/ChangeLog 2005-12-01 03:07:15.000000000 +0000 @@ -1,3 +1,22 @@ +2005-11-25 Frederik Eaton + + * src/shred.c: + * src/isaac.h: + * src/isaac.c: transfer ISAAC code (for cryptographic random + number generation) from shred.c to new files isaac.c, isaac.h + + * src/Makefile.am (shred_SOURCES): isaac.c is now a source of shred + + * src/Makefile.am (sort_SOURCES, sort_LDADD): changes to reflect + that sort uses ISAAC + + * src/sort.c (short_options, long_options, rand_state, HASH_SIZE, + HASH_WORDS, get_hash, keycompare, main): add options --random-sort + and --seed to implement a random shuffle + + * doc/coreutils.texi: document new --random-hash option, provide + an example + 2005-11-24 Jim Meyering * Version 6.0-cvs. diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi --- coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi 2005-11-24 20:10:37.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi 2005-12-01 03:08:49.000000000 +0000 @@ -3396,6 +3396,15 @@ Reverse the result of comparison, so that lines with greater key values appear earlier in the output instead of later. address@hidden -R address@hidden --random-sort address@hidden -R address@hidden --random-sort address@hidden random sort + +Sort by random hash, i.e. perform a shuffle. This is done by hashing +the input keys and sorting based on the results. + @end table Other options are: @@ -3529,6 +3538,11 @@ reliably handle arbitrary file names (even those containing blanks or other special characters). address@hidden --seed @var{tempdir} address@hidden --seed address@hidden specify seed for random hash +Specify a seed for the @option{--random-sort} option. + @end table Historical (BSD and System V) implementations of @command{sort} have @@ -3695,6 +3709,20 @@ @c printf 'c\n\nb\n\na\n'|perl -0pe 's/\n\n/\n\0/g'|sort -z|perl -0pe 's/\0/\n/g' @c @end example address@hidden + +Shuffle a list of directories, but preserve the order of files within +each directory. (For instance, one could use this to generate a music +playlist in which albums are shuffled but the songs of each album are +played in order) + +Assumes that each file is nested two levels deep, i.e. that the paths +output by @command{find} are are of the form @samp{./a/b/c}. + address@hidden +find . -type f | ./sort -t/ -k1,3R -k4 address@hidden example + @end itemize diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/Makefile.am coreutils-cvs-2-sort-with-isaac/src/Makefile.am --- coreutils-cvs-1-shred-isaac-split/src/Makefile.am 2005-11-26 22:48:20.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/src/Makefile.am 2005-11-30 12:48:37.000000000 +0000 @@ -69,7 +69,7 @@ vdir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ## If necessary, add -lm to resolve use of pow in lib/strtod.c. -sort_LDADD = $(LDADD) $(POW_LIB) +sort_LDADD = $(LDADD) $(POW_LIB) $(LIB_GETHRXTIME) # for get_date and gettime date_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) @@ -167,6 +167,7 @@ chown_SOURCES = chown.c chown-core.c chgrp_SOURCES = chgrp.c chown-core.c shred_SOURCES = shred.c rand-isaac.c +sort_SOURCES = sort.c rand-isaac.c mv_SOURCES = mv.c copy.c cp-hash.c remove.c rm_SOURCES = rm.c remove.c diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/sort.c coreutils-cvs-2-sort-with-isaac/src/sort.c --- coreutils-cvs-1-shred-isaac-split/src/sort.c 2005-10-07 19:48:28.000000000 +0100 +++ coreutils-cvs-2-sort-with-isaac/src/sort.c 2005-11-30 12:48:44.000000000 +0000 @@ -30,8 +30,10 @@ #include "error.h" #include "hard-locale.h" #include "inttostr.h" +#include "md5.h" #include "physmem.h" #include "posixver.h" +#include "rand-isaac.h" #include "quote.h" #include "stdlib--.h" #include "stdio--.h" @@ -147,6 +149,7 @@ bool numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ + bool random_hash; /* Shuffle by sorting on random hash of key */ bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ bool month; /* Flag for comparison by month name. */ @@ -303,6 +306,7 @@ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ -n, --numeric-sort compare according to string numerical value\n\ + -R, --random-sort sort by random hash of keys\n\ -r, --reverse reverse the result of comparisons\n\ \n\ "), stdout); @@ -325,6 +329,7 @@ "), DEFAULT_TMPDIR); fputs (_("\ -z, --zero-terminated end lines with 0 byte, not newline\n\ + --seed use specified seed for random number generator\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -353,7 +358,11 @@ exit (status); } -static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z"; +static char const short_options[] = "-bcdfgik:mMno:rRsS:t:T:uy:z"; +enum +{ + SEED_OPTION = CHAR_MAX + 1 +}; static struct option const long_options[] = { @@ -367,6 +376,7 @@ {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, + {"random-sort", no_argument, NULL, 'R'}, {"output", required_argument, NULL, 'o'}, {"reverse", no_argument, NULL, 'r'}, {"stable", no_argument, NULL, 's'}, @@ -375,6 +385,7 @@ {"temporary-directory", required_argument, NULL, 'T'}, {"unique", no_argument, NULL, 'u'}, {"zero-terminated", no_argument, NULL, 'z'}, + {"seed", required_argument, NULL, SEED_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0}, @@ -1152,6 +1163,28 @@ return 0; } +static struct isaac_state *rand_state; + +#define HASH_WORDS 1 +#define HASH_SIZE (HASH_WORDS * sizeof (uint32_t)) + +static void +get_hash (char const* text, size_t len, uint32_t resbuf[/*HASH_WORDS*/]) +{ + struct isaac_state s; + int i; + memcpy (&s, rand_state, sizeof s); + isaac_seed_data (&s, text, len); + /* we can call isaac_seed_finish multiple times, but should only + call isaac_seed_start once */ + isaac_seed_finish (&s); + /* getting an irand_state and drawing random numbers would be more + kosher here, but also slower. so we just peek at the ISAAC state + array instead */ + for (i = 0; i < HASH_WORDS; i++) + resbuf[i] = s.mm[ISAAC_WORDS - 1 - i]; +} + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -1188,6 +1221,14 @@ (texta, textb)); *lima = savea, *limb = saveb; } + else if (key->random_hash) + { + uint32_t diga[HASH_SIZE]; + uint32_t digb[HASH_SIZE]; + get_hash (texta, lena, diga); + get_hash (textb, lenb, digb); + diff = memcmp (diga, digb, sizeof (diga)); + } else if (key->month) diff = getmonth (texta, lena) - getmonth (textb, lenb); /* Sorting like this may become slow, so in a simple locale the user @@ -1989,6 +2030,7 @@ { error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), _(msgid), quote (spec)); + /* added to satisfy compiler for NORETURN: */ abort (); } @@ -2081,6 +2123,9 @@ case 'n': key->numeric = true; break; + case 'R': + key->random_hash = true; + break; case 'r': key->reverse = true; break; @@ -2109,6 +2154,8 @@ int c = 0; bool checkonly = false; bool mergeonly = false; + char const *randseed = 0; + bool need_rand = false; size_t nfiles = 0; bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); bool obsolete_usage = (posix2_version () < 200112); @@ -2187,7 +2234,7 @@ gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; + gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month = gkey.reverse = false; gkey.skipsblanks = gkey.skipeblanks = false; files = xnmalloc (argc, sizeof *files); @@ -2263,6 +2310,7 @@ case 'M': case 'n': case 'r': + case 'R': { char str[2]; str[0] = c; @@ -2404,6 +2452,10 @@ eolchar = 0; break; + case SEED_OPTION: + randseed = xstrdup (optarg); + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -2413,26 +2465,46 @@ } } + need_rand |= gkey.random_hash; /* Inheritance of global options to individual keys. */ for (key = keylist; key; key = key->next) - if (! (key->ignore || key->translate - || (key->skipsblanks | key->reverse - | key->skipeblanks | key->month | key->numeric - | key->general_numeric))) - { - key->ignore = gkey.ignore; - key->translate = gkey.translate; - key->skipsblanks = gkey.skipsblanks; - key->skipeblanks = gkey.skipeblanks; - key->month = gkey.month; - key->numeric = gkey.numeric; - key->general_numeric = gkey.general_numeric; - key->reverse = gkey.reverse; - } + { + need_rand |= key->random_hash; + if (! (key->ignore || key->translate + || (key->skipsblanks | key->reverse + | key->skipeblanks | key->month | key->numeric + | key->general_numeric + | key->random_hash))) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->general_numeric = gkey.general_numeric; + key->random_hash = gkey.random_hash; + key->reverse = gkey.reverse; + } + } + + if (need_rand) + { + rand_state = xzalloc (sizeof *rand_state); + if (randseed) + { + isaac_seed_start (rand_state); + isaac_seed_data (rand_state, randseed, strlen (randseed)); + isaac_seed_finish (rand_state); + } + else + isaac_seed (rand_state); + } if (!keylist && (gkey.ignore || gkey.translate || (gkey.skipsblanks | gkey.skipeblanks | gkey.month - | gkey.numeric | gkey.general_numeric))) + | gkey.numeric | gkey.general_numeric + | gkey.random_hash))) insertkey (&gkey); reverse = gkey.reverse;