>From f82a746c8e8ca342273e31ccd49f6f75f14ee2e6 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= Date: Tue, 11 May 2010 18:46:21 +0100 Subject: [PATCH] sort: --debug: output data independent key warnings * src/sort.c (usage): Mention --debug can output warnings to stderr. (default_key_compare): A new function refactored from main(), and now also called from the new key_warnings() function. (key_to_opts): A new function refactored from incompatible_options(), and now also called from the new key_warnings() function. (key_numeric): A new function refactored to test if key is numeric. (key_warnings): A new function to output warnings to stderr, about questionable use of various options. Currently it warns about zero length keys and ineffective global options. (incompatible_options): Refactor out key_to_opts() (main): Use key_init() to initialize gkey. Refactor out default_key_compare(). Call key_warnings() in debug mode. * doc/coreutils.texi (sort invocation): Mention that warnings are output by --debug. * tests/misc/sort-debug-warn: A new test for debug warnings. * tests/Makefile.am: Reference the new test. * NEWS: Mention the new feature --- NEWS | 2 +- doc/coreutils.texi | 1 + src/sort.c | 227 +++++++++++++++++++++++++++++++++----------- tests/Makefile.am | 1 + tests/misc/sort-debug-warn | 91 ++++++++++++++++++ 5 files changed, 264 insertions(+), 58 deletions(-) create mode 100755 tests/misc/sort-debug-warn diff --git a/NEWS b/NEWS index 4c2da67..5d7b81f 100644 --- a/NEWS +++ b/NEWS @@ -5,7 +5,7 @@ GNU coreutils NEWS -*- outline -*- ** New features sort now accepts the --debug option, to highlight the part of the - line significant in the sort. + line significant in the sort, and warn about questionable options. ** Changes in behavior diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 6714ada..cd99bd0 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3944,6 +3944,7 @@ of the line being used in the sort. @item --debug Highlight the portion of each line used for sorting. +Also issue warnings about questionable usage to stderr. @item address@hidden @opindex --batch-size diff --git a/src/sort.c b/src/sort.c index 38c5ed2..f2b1124 100644 --- a/src/sort.c +++ b/src/sort.c @@ -131,6 +131,9 @@ static bool hard_LC_COLLATE; static bool hard_LC_TIME; #endif +/* Nonzero if the obsolescent key option format is used. */ +static bool obsolete_used; + #define NONZERO(x) ((x) != 0) /* The kind of blanks for '-b' to skip in various options. */ @@ -375,7 +378,8 @@ Other options:\n\ -C, --check=quiet, --check=silent like -c, but do not report first bad line\n\ --compress-program=PROG compress temporaries with PROG;\n\ decompress them with PROG -d\n\ - --debug annotate the part of the line used to sort\n\ + --debug annotate the part of the line used to sort,\n\ + and warn about questionable usage to stderr\n\ --files0-from=F read input from the files specified by\n\ NUL-terminated names in file F;\n\ If F is - then read names from standard input\n\ @@ -2158,6 +2162,143 @@ debug_key (char const *sline, char const *sfield, char const *efield, mark_key (offset, width); } +/* Testing if a key is numeric is done in various places. */ + +static inline bool +key_numeric (struct keyfield const *key) +{ + return (key->numeric || key->general_numeric || key->human_numeric); +} + +/* Return whether sorting options specified for key. */ + +static bool +default_key_compare (struct keyfield const *key) +{ + return ! (key->ignore + || key->translate + || key->skipsblanks + || key->skipeblanks + || key_numeric (key) + || key->month + || key->version + || key->random + /* || key->reverse */ + ); +} + +/* Convert a key to the short options used to specify it. + The returned string must be freed. */ + +static char* +key_to_opts (struct keyfield const *key) +{ + /* The following is too big, but guaranteed to be "big enough". */ + char *opts = xstrdup (short_options); + char *p = opts; + + if (key->skipsblanks || key->skipeblanks) + *p++ = 'b';/* either disables global -b */ + if (key->ignore == nondictionary) + *p++ = 'd'; + if (key->ignore == nonprinting) + *p++ = 'i'; + if (key->translate) + *p++ = 'f'; + if (key->general_numeric) + *p++ = 'g'; + if (key->human_numeric) + *p++ = 'h'; + if (key->month) + *p++ = 'M'; + if (key->numeric) + *p++ = 'n'; + if (key->random) + *p++ = 'R'; + if (key->reverse) + *p++ = 'r'; + if (key->version) + *p++ = 'V'; + *p = '\0'; + + return opts; +} + +/* Output data independent key warnings to stderr. */ + +static void +key_warnings (struct keyfield const *gkey, bool gkey_only) +{ + struct keyfield const *key; + struct keyfield ugkey = *gkey; + size_t keynum = 1; + + if (obsolete_used) + error (0, 0, _("obsolescent key formats used. Consider using `-k'")); + + for (key = keylist; key; key = key->next, keynum++) + { + /* Warn about field specs that will never match. */ + if (key->sword != SIZE_MAX && key->eword < key->sword) + error (0, 0, _("key %zu has zero width and will be ignored"), keynum); + + /* Warn about significant leading blanks. */ + if (!gkey_only && tab == TAB_DEFAULT && !key->skipsblanks + && !key_numeric (key) && !key->month) + error (0, 0, _("leading blanks are significant in key %zu. Consider " + "also specifying `b'"), keynum); + + + /* Warn about numeric comparisons spanning fields, + as field delimiters could be interpreted as part + of the number (maybe only in other locales). */ + if (!gkey_only && key_numeric (key)) + { + size_t sword = key->sword + 1; + size_t eword = key->eword + 1; + if (!sword) + sword++; + if (sword != eword) + error (0, 0, _("key %zu is numeric and spans multiple fields"), + keynum); + } + + /* Flag global options not copied or specified in any key. */ + if (ugkey.ignore && (ugkey.ignore == key->ignore)) + ugkey.ignore = NULL; + if (ugkey.translate && (ugkey.translate == key->translate)) + ugkey.translate = NULL; + ugkey.skipsblanks &= !key->skipsblanks; + ugkey.skipeblanks &= !key->skipeblanks; + ugkey.month &= !key->month; + ugkey.numeric &= !key->numeric; + ugkey.general_numeric &= !key->general_numeric; + ugkey.human_numeric &= !key->human_numeric; + ugkey.random &= !key->random; + ugkey.version &= !key->version; + ugkey.reverse &= !key->reverse; + } + + /* Warn about ignored global options flagged above. + Note if gkey is the only one in the list, all flags are cleared. */ + if (!default_key_compare (&ugkey) + || (ugkey.reverse && (stable || unique) && keylist)) + { + bool ugkey_reverse = ugkey.reverse; + if (!(stable || unique)) + ugkey.reverse = false; + char *opts = key_to_opts (&ugkey); + error (0, 0, + ngettext ("option `-%s' is ignored", + "options `-%s' are ignored", + select_plural (strlen (opts))), opts); + free (opts); + ugkey.reverse = ugkey_reverse; + } + if (ugkey.reverse && !(stable || unique) && keylist) + error (0, 0, _("option `-r' only applies to last-resort comparison")); +} + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -2193,7 +2334,7 @@ keycompare (const struct line *a, const struct line *b, bool show_debug) if (key->random) diff = compare_random (texta, lena, textb, lenb); - else if (key->numeric || key->general_numeric || key->human_numeric) + else if (key_numeric (key)) { char savea = *lima, saveb = *limb; char const* ea = lima; @@ -3235,36 +3376,18 @@ incompatible_options (char const *opts) static void check_ordering_compatibility (void) { - struct keyfield const *key; + struct keyfield *key; for (key = keylist; key; key = key->next) - if ((1 < (key->random + key->numeric + key->general_numeric + key->month - + key->version + !!key->ignore + key->human_numeric)) + if ((1 < (key->random + key_numeric (key) + key->month + key->version + + !!key->ignore)) || (key->random && key->translate)) { - /* The following is too big, but guaranteed to be "big enough". */ - char opts[sizeof short_options]; - char *p = opts; - if (key->ignore == nondictionary) - *p++ = 'd'; - if (key->translate) - *p++ = 'f'; - if (key->general_numeric) - *p++ = 'g'; - if (key->human_numeric) - *p++ = 'h'; - if (key->ignore == nonprinting) - *p++ = 'i'; - if (key->month) - *p++ = 'M'; - if (key->numeric) - *p++ = 'n'; - if (key->version) - *p++ = 'V'; - if (key->random) - *p++ = 'R'; - *p = '\0'; + /* Clear flags we're not interested in. */ + key->skipsblanks = key->skipeblanks = key->reverse = false; + char *opts = key_to_opts (key); incompatible_options (opts); + free (opts); } } @@ -3391,6 +3514,7 @@ main (int argc, char **argv) struct keyfield *key; struct keyfield key_buf; struct keyfield gkey; + bool gkey_only = false; char const *s; int c = 0; char checkonly = 0; @@ -3494,14 +3618,8 @@ main (int argc, char **argv) /* The signal mask is known, so it is safe to invoke exit_cleanup. */ atexit (exit_cleanup); - gkey.sword = gkey.eword = SIZE_MAX; - gkey.ignore = NULL; - gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.human_numeric = false; - gkey.iec_present = -1; - gkey.random = gkey.version = false; - gkey.month = gkey.reverse = false; - gkey.skipsblanks = gkey.skipeblanks = false; + key_init (&gkey); + gkey.sword = SIZE_MAX; files = xnmalloc (argc, sizeof *files); @@ -3574,6 +3692,7 @@ main (int argc, char **argv) N_("stray character in field spec")); } insertkey (key); + obsolete_used = true; } } } @@ -3836,17 +3955,7 @@ main (int argc, char **argv) /* Inheritance of global options to individual keys. */ for (key = keylist; key; key = key->next) { - if (! (key->ignore - || key->translate - || (key->skipsblanks - || key->reverse - || key->skipeblanks - || key->month - || key->numeric - || key->version - || key->general_numeric - || key->human_numeric - || key->random))) + if (default_key_compare (key) && !key->reverse) { key->ignore = gkey.ignore; key->translate = gkey.translate; @@ -3856,25 +3965,17 @@ main (int argc, char **argv) key->numeric = gkey.numeric; key->general_numeric = gkey.general_numeric; key->human_numeric = gkey.human_numeric; + key->version = gkey.version; key->random = gkey.random; key->reverse = gkey.reverse; - key->version = gkey.version; } need_random |= key->random; } - if (!keylist && (gkey.ignore - || gkey.translate - || (gkey.skipsblanks - || gkey.skipeblanks - || gkey.month - || gkey.numeric - || gkey.general_numeric - || gkey.human_numeric - || gkey.random - || gkey.version))) + if (!keylist && !default_key_compare (&gkey)) { + gkey_only = true; insertkey (&gkey); need_random |= gkey.random; } @@ -3884,6 +3985,18 @@ main (int argc, char **argv) if (debug && outfile) error (SORT_FAILURE, 0, _("options -o and --debug are incompatible")); + if (debug) + { + /* Always output the locale in debug mode, since this + is such a common source of confusion. */ + if (hard_LC_COLLATE) + error (0, 0, _("using %s sorting rules"), + quote (setlocale (LC_COLLATE, NULL))); + else + error (0, 0, _("using simple byte comparison")); + key_warnings (&gkey, gkey_only); + } + reverse = gkey.reverse; if (need_random) diff --git a/tests/Makefile.am b/tests/Makefile.am index 46d388a..a732c63 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -225,6 +225,7 @@ TESTS = \ misc/sort-compress \ misc/sort-continue \ misc/sort-debug-keys \ + misc/sort-debug-warn \ misc/sort-files0-from \ misc/sort-float \ misc/sort-merge \ diff --git a/tests/misc/sort-debug-warn b/tests/misc/sort-debug-warn new file mode 100755 index 0000000..6da3d9b --- /dev/null +++ b/tests/misc/sort-debug-warn @@ -0,0 +1,91 @@ +#!/bin/sh +# Test warnings for sort options + +# Copyright (C) 2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +if test "$VERBOSE" = yes; then + set -x + sort --version +fi + +. $srcdir/test-lib.sh + +cat <<\EOF > exp +sort: using simple byte comparison +sort: key 1 has zero width and will be ignored +sort: leading blanks are significant in key 1. Consider specifying `b' also +sort: using simple byte comparison +sort: options `-bghMRrV' are ignored +sort: using simple byte comparison +sort: options `-bghMRV' are ignored +sort: option `-r' only applies to last-resort comparison +sort: using simple byte comparison +sort: option `-r' only applies to last-resort comparison +sort: using simple byte comparison +sort: leading blanks are significant in key 2. Consider specifying `b' also +sort: options `-bg' are ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: option `-b' is ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: leading blanks are significant in key 1. Consider specifying `b' also +sort: using simple byte comparison +sort: leading blanks are significant in key 1. Consider specifying `b' also +sort: using simple byte comparison +sort: leading blanks are significant in key 1. Consider specifying `b' also +sort: option `-d' is ignored +sort: using simple byte comparison +sort: leading blanks are significant in key 1. Consider specifying `b' also +sort: option `-i' is ignored +sort: using simple byte comparison +sort: using simple byte comparison +sort: using simple byte comparison +EOF + +sort -s -k2,1 --debug /dev/null 2>>out +sort -s -rRVMhgb -k1,1n --debug /dev/null 2>>out +sort -rRVMhgb -k1,1n --debug /dev/null 2>>out +sort -r -k1,1n --debug /dev/null 2>>out +sort -gbr -k1,1n -k1,1r --debug /dev/null 2>>out +sort -b -k1b,1bn --debug /dev/null 2>>out # no warning +sort -b -k1,1bn --debug /dev/null 2>>out +sort -b -k1,1bn -k2b,2 --debug /dev/null 2>>out # no warning +sort -r -k1,1r --debug /dev/null 2>>out # no warning for redundant options +sort -i -k1,1i --debug /dev/null 2>>out # no warning +sort -d -k1,1b --debug /dev/null 2>>out +sort -i -k1,1d --debug /dev/null 2>>out +sort -r --debug /dev/null 2>>out #no warning +sort -rM --debug /dev/null 2>>out #no warning +sort -rM -k1,1 --debug /dev/null 2>>out #no warning + +compare out exp || fail=1 + +cat <<\EOF > exp +sort: using simple byte comparison +sort: Obsolescent key formats used. Consider using `-k' +sort: key 1 is numeric and spans multiple fields +sort: key 2 has zero width and will be ignored +sort: leading blanks are significant in key 2. Consider specifying `b' also +sort: option `-b' is ignored +sort: option `-r' only applies to last-resort comparison +EOF + +sort --debug -rb -k2n +2 -1b /dev/null 2>out + +compare out exp || fail=1 + +Exit $fail -- 1.6.2.5