gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, master, updated. gawk-4.1.0-3898-ga28cbc8


From: Arnold Robbins
Subject: [SCM] gawk branch, master, updated. gawk-4.1.0-3898-ga28cbc8
Date: Sat, 22 Feb 2020 13:24:29 -0500 (EST)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  a28cbc85b64d5f8a3d318cea5c30bdb57338256c (commit)
      from  49a3b8595db2c6d265f3e6635e4deb7accff8ced (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=a28cbc85b64d5f8a3d318cea5c30bdb57338256c

commit a28cbc85b64d5f8a3d318cea5c30bdb57338256c
Author: Arnold D. Robbins <address@hidden>
Date:   Sun Feb 2 20:31:21 2020 +0200

    Add long option support to getopt function.

diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index 6b1f4c5..69944f3 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -1,9 +1,11 @@
 # getopt.awk --- Do C library getopt(3) function in awk
+#                Also supports long options.
 #
 # Arnold Robbins, address@hidden, Public Domain
 #
 # Initial version: March, 1991
 # Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
 
 # External variables:
 #    Optind -- index in ARGV of first nonoption argument
@@ -14,14 +16,14 @@
 # Returns:
 #    -1     at end of options
 #    "?"    for unrecognized option
-#    <c>    a character representing the current option
+#    <s>    a string representing the current option
 
 # Private Data:
 #    _opti  -- index in multiflag option, e.g., -abc
-function getopt(argc, argv, options,    thisopt, i)
+function getopt(argc, argv, options, longopts,    thisopt, i, j)
 {
-    if (length(options) == 0)    # no options given
-        return -1
+    if (length(options) == 0 && length(longopts) == 0)
+        return -1                # no options given
 
     if (argv[Optind] == "--") {  # all done
         Optind++
@@ -31,36 +33,61 @@ function getopt(argc, argv, options,    thisopt, i)
         _opti = 0
         return -1
     }
-    if (_opti == 0)
-        _opti = 2
-    thisopt = substr(argv[Optind], _opti, 1)
-    Optopt = thisopt
-    i = index(options, thisopt)
-    if (i == 0) {
-        if (Opterr)
-            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
-        if (_opti >= length(argv[Optind])) {
+    if (argv[Optind] !~ /^--/) {        # if this is a short option
+        if (_opti == 0)
+            _opti = 2
+        thisopt = substr(argv[Optind], _opti, 1)
+        Optopt = thisopt
+        i = index(options, thisopt)
+        if (i == 0) {
+            if (Opterr)
+                printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+            if (_opti >= length(argv[Optind])) {
+                Optind++
+                _opti = 0
+            } else
+                _opti++
+            return "?"
+        }
+        if (substr(options, i + 1, 1) == ":") {
+            # get option argument
+            if (length(substr(argv[Optind], _opti + 1)) > 0)
+                Optarg = substr(argv[Optind], _opti + 1)
+            else
+                Optarg = argv[++Optind]
+            _opti = 0
+        } else
+            Optarg = ""
+        if (_opti == 0 || _opti >= length(argv[Optind])) {
             Optind++
             _opti = 0
         } else
             _opti++
-        return "?"
-    }
-    if (substr(options, i + 1, 1) == ":") {
-        # get option argument
-        if (length(substr(argv[Optind], _opti + 1)) > 0)
-            Optarg = substr(argv[Optind], _opti + 1)
+        return thisopt
+    } else {
+        j = index(argv[Optind], "=")
+        if (j > 0)
+            thisopt = substr(argv[Optind], 3, j - 3)
         else
-            Optarg = argv[++Optind]
-        _opti = 0
-    } else
-        Optarg = ""
-    if (_opti == 0 || _opti >= length(argv[Optind])) {
+            thisopt = substr(argv[Optind], 3)
+        Optopt = thisopt
+        i = match(longopts, "(^|,)" thisopt "($|[,:])")
+        if (i == 0) {
+            if (Opterr)
+                 printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+            Optind++
+            return "?"
+        }
+        if (substr(longopts, i+1+length(thisopt), 1) == ":") {
+            if (j > 0)
+                Optarg = substr(argv[Optind], j + 1)
+            else
+                Optarg = argv[++Optind]
+        } else
+            Optarg = ""
         Optind++
-        _opti = 0
-    } else
-        _opti++
-    return thisopt
+        return thisopt
+    }
 }
 BEGIN {
     Opterr = 1    # default is to diagnose
@@ -68,12 +95,13 @@ BEGIN {
 
     # test program
     if (_getopt_test) {
-        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
-            printf("c = <%c>, Optarg = <%s>\n",
-                                       _go_c, Optarg)
+        _myshortopts = "ab:cd"
+        _mylongopts = "longa,longb:,otherc,otherd"
+
+        while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+            printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
         printf("non-option arguments:\n")
         for (; Optind < ARGC; Optind++)
-            printf("\tARGV[%d] = <%s>\n",
-                                    Optind, ARGV[Optind])
+            printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
     }
 }
diff --git a/doc/ChangeLog b/doc/ChangeLog
index c826543..01ee092 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,8 @@
+2020-02-02         Arnold D. Robbins     <address@hidden>
+
+       * gawktexi.in (Getopt Function): Add support for long options,
+       contributed by Greg Minshall <address@hidden>.
+
 2020-01-23         Arnold D. Robbins     <address@hidden>
 
        * gawktexi.in: Document arry sorting by value for FUNCTAB.
diff --git a/doc/gawk.info b/doc/gawk.info
index 999ee49..ed11747 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -16514,20 +16514,29 @@ command-line arguments for 'awk':
          ...
      }
 
+   The GNU project's version of the original Unix utilities popularized
+the use of long command line options.  For example, '--help' in addition
+to '-h'.  Arguments to long options are either provided as separate
+command line arguments ('--source 'PROGRAM-TEXT'') or separated from the
+option with an '=' sign ('--source='PROGRAM-TEXT'').
+
    As a side point, 'gawk' actually uses the GNU 'getopt_long()'
 function to process both normal and GNU-style long options (*note
 Options::).
 
    The abstraction provided by 'getopt()' is very useful and is quite
 handy in 'awk' programs as well.  Following is an 'awk' version of
-'getopt()'.  This function highlights one of the greatest weaknesses in
-'awk', which is that it is very poor at manipulating single characters.
-Repeated calls to 'substr()' are necessary for accessing individual
-characters (*note String Functions::).(1)
+'getopt()' that accepts both short and long options.
+
+   This function highlights one of the greatest weaknesses in 'awk',
+which is that it is very poor at manipulating single characters.  The
+function needs repeated calls to 'substr()' in order to access
+individual characters (*note String Functions::).(1)
 
    The discussion that follows walks through the code a bit at a time:
 
      # getopt.awk --- Do C library getopt(3) function in awk
+     #                Also supports long options.
 
      # External variables:
      #    Optind -- index in ARGV of first nonoption argument
@@ -16538,7 +16547,7 @@ characters (*note String Functions::).(1)
      # Returns:
      #    -1     at end of options
      #    "?"    for unrecognized option
-     #    <c>    a character representing the current option
+     #    <s>    a string representing the current option
 
      # Private Data:
      #    _opti  -- index in multiflag option, e.g., -abc
@@ -16550,13 +16559,13 @@ documentation is essential for any program, and 
particularly for library
 functions.
 
    The 'getopt()' function first checks that it was indeed called with a
-string of options (the 'options' parameter).  If 'options' has a zero
-length, 'getopt()' immediately returns -1:
+string of options (the 'options' parameter).  If both 'options' and
+'longoptions' have a zero length, 'getopt()' immediately returns -1:
 
-     function getopt(argc, argv, options,    thisopt, i)
+     function getopt(argc, argv, options, longopts,    thisopt, i, j)
      {
-         if (length(options) == 0)    # no options given
-             return -1
+         if (length(options) == 0 && length(longopts) == 0)
+             return -1                # no options given
 
          if (argv[Optind] == "--") {  # all done
              Optind++
@@ -16569,30 +16578,34 @@ length, 'getopt()' immediately returns -1:
 
    The next thing to check for is the end of the options.  A '--' ends
 the command-line options, as does any command-line argument that does
-not begin with a '-'.  'Optind' is used to step through the array of
-command-line arguments; it retains its value across calls to 'getopt()',
-because it is a global variable.
-
-   The regular expression that is used, '/^-[^:[:space:]/', checks for a
-'-' followed by anything that is not whitespace and not a colon.  If the
-current command-line argument does not match this pattern, it is not an
-option, and it ends option processing.  Continuing on:
-
-         if (_opti == 0)
-             _opti = 2
-         thisopt = substr(argv[Optind], _opti, 1)
-         Optopt = thisopt
-         i = index(options, thisopt)
-         if (i == 0) {
-             if (Opterr)
-                 printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
-             if (_opti >= length(argv[Optind])) {
-                 Optind++
-                 _opti = 0
-             } else
-                 _opti++
-             return "?"
-         }
+not begin with a '-' (unless it is an argument to a preceding option).
+'Optind' steps through the array of command-line arguments; it retains
+its value across calls to 'getopt()', because it is a global variable.
+
+   The regular expression '/^-[^:[:space:]/' checks for a '-' followed
+by anything that is not whitespace and not a colon.  If the current
+command-line argument does not match this pattern, it is not an option,
+and it ends option processing.  Now, we check to see if we are
+processing a short (single letter) option, or a long option (indicated
+by two dashes, e.g., '--filename').  If it is a short option, we
+continue on:
+
+         if (argv[Optind] !~ /^--/) {        # if this is a short option
+             if (_opti == 0)
+                 _opti = 2
+             thisopt = substr(argv[Optind], _opti, 1)
+             Optopt = thisopt
+             i = index(options, thisopt)
+             if (i == 0) {
+                 if (Opterr)
+                     printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+                 if (_opti >= length(argv[Optind])) {
+                     Optind++
+                     _opti = 0
+                 } else
+                     _opti++
+                 return "?"
+             }
 
    The '_opti' variable tracks the position in the current command-line
 argument ('argv[Optind]').  If multiple options are grouped together
@@ -16620,15 +16633,15 @@ incremented.
 The main program can examine 'Optopt' if it needs to know what the
 invalid option letter actually is.  Continuing on:
 
-         if (substr(options, i + 1, 1) == ":") {
-             # get option argument
-             if (length(substr(argv[Optind], _opti + 1)) > 0)
-                 Optarg = substr(argv[Optind], _opti + 1)
-             else
-                 Optarg = argv[++Optind]
-             _opti = 0
-         } else
-             Optarg = ""
+             if (substr(options, i + 1, 1) == ":") {
+                 # get option argument
+                 if (length(substr(argv[Optind], _opti + 1)) > 0)
+                     Optarg = substr(argv[Optind], _opti + 1)
+                 else
+                     Optarg = argv[++Optind]
+                 _opti = 0
+             } else
+                 Optarg = ""
 
    If the option requires an argument, the option letter is followed by
 a colon in the 'options' string.  If there are remaining characters in
@@ -16638,20 +16651,79 @@ argument is used ('-xFOO' versus '-x FOO').  In 
either case, '_opti' is
 reset to zero, because there are no more characters left to examine in
 the current command-line argument.  Continuing:
 
-         if (_opti == 0 || _opti >= length(argv[Optind])) {
+             if (_opti == 0 || _opti >= length(argv[Optind])) {
+                 Optind++
+                 _opti = 0
+             } else
+                 _opti++
+             return thisopt
+
+   Finally, for a short option, if '_opti' is either zero or greater
+than the length of the current command-line argument, it means this
+element in 'argv' is through being processed, so 'Optind' is incremented
+to point to the next element in 'argv'.  If neither condition is true,
+then only '_opti' is incremented, so that the next option letter can be
+processed on the next call to 'getopt()'.
+
+   On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+         } else {
+             j = index(argv[Optind], "=")
+             if (j > 0)
+                 thisopt = substr(argv[Optind], 3, j - 3)
+             else
+                 thisopt = substr(argv[Optind], 3)
+             Optopt = thisopt
+
+   First, we search this option for a possible embedded equal sign, as
+the specification of long options allows an argument to an option
+'--someopt:' to be specified as '--someopt=answer' as well as
+'--someopt answer'.
+
+             i = match(longopts, "(^|,)" thisopt "($|[,:])")
+             if (i == 0) {
+                 if (Opterr)
+                      printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+                 Optind++
+                 return "?"
+             }
+
+   Next, we try to find the current option in 'longopts'.  The regular
+expression givent to 'match()', '"(^|,)" thisopt "($|[,:])"', matches
+this option at the beginninng of 'longopts', or at the beginning of a
+subsequent long option (the previous long option would have been
+terminated by a comma), and, in any case, either at the end of the
+'longopts' string ('$'), or followed by a comma (separating this option
+from a subsequent option) or a colon (indicating this long option takes
+an argument ('[,:]').
+
+   Using this regular expression, we check to see if the current option
+might possibly be in 'longopts' (if 'longopts' is not specified, this
+test will also fail).  In case of an error, we possibly print an error
+message and then return '"?"'.  Continuing on:
+
+             if (substr(longopts, i+1+length(thisopt), 1) == ":") {
+                 if (j > 0)
+                     Optarg = substr(argv[Optind], j + 1)
+                 else
+                     Optarg = argv[++Optind]
+             } else
+                 Optarg = ""
+
+   We now check to see if this option takes an argument and, if so, we
+set 'Optarg' to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
              Optind++
-             _opti = 0
-         } else
-             _opti++
-         return thisopt
+             return thisopt
+         }
      }
 
-   Finally, if '_opti' is either zero or greater than the length of the
-current command-line argument, it means this element in 'argv' is
-through being processed, so 'Optind' is incremented to point to the next
-element in 'argv'.  If neither condition is true, then only '_opti' is
-incremented, so that the next option letter can be processed on the next
-call to 'getopt()'.
+   We increase 'Optind' (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
 
    The 'BEGIN' rule initializes both 'Opterr' and 'Optind' to one.
 'Opterr' is set to one, because the default behavior is for 'getopt()'
@@ -16665,18 +16737,19 @@ which is in 'ARGV[0]':
 
          # test program
          if (_getopt_test) {
-             while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
-                 printf("c = <%c>, Optarg = <%s>\n",
-                                            _go_c, Optarg)
+             _myshortopts = "ab:cd"
+             _mylongopts = "longa,longb:,otherc,otherd"
+
+             while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != 
-1)
+                 printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
              printf("non-option arguments:\n")
              for (; Optind < ARGC; Optind++)
-                 printf("\tARGV[%d] = <%s>\n",
-                                         Optind, ARGV[Optind])
+                 printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
          }
      }
 
    The rest of the 'BEGIN' rule is a simple test program.  Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
 
      $ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
      -| c = <a>, Optarg = <>
@@ -16694,7 +16767,19 @@ results of two sample runs of the test program:
      -|         ARGV[4] = <xyz>
      -|         ARGV[5] = <abc>
 
-   In both runs, the first '--' terminates the arguments to 'awk', so
+     $ awk -f getopt.awk -v _getopt_test=1 -- -a \
+     > --longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2
+     -| c = <a>, Optarg = <>
+     -| c = <longa>, Optarg = <>
+     -| c = <b>, Optarg = <xx>
+     -| c = <longb>, Optarg = <foo=bar>
+     -| c = <otherd>, Optarg = <>
+     -| c = <otherc>, Optarg = <>
+     -| non-option arguments:
+     -|        ARGV[8] = <arg1>
+     -|        ARGV[9] = <arg2>
+
+   In all the runs, the first '--' terminates the arguments to 'awk', so
 that it does not try to interpret the '-a', etc., as its own options.
 
      NOTE: After 'getopt()' is through, user-level code must clear out
@@ -35560,8 +35645,8 @@ Index
 * getlocaltime() user-defined function:  Getlocaltime Function.
                                                               (line  16)
 * getopt() function (C library):         Getopt Function.     (line  15)
-* getopt() user-defined function:        Getopt Function.     (line 108)
-* getopt() user-defined function <1>:    Getopt Function.     (line 134)
+* getopt() user-defined function:        Getopt Function.     (line 116)
+* getopt() user-defined function <1>:    Getopt Function.     (line 143)
 * getpwent() function (C library):       Passwd Functions.    (line  16)
 * getpwent() function (C library) <1>:   Passwd Functions.    (line 196)
 * getpwent() user-defined function:      Passwd Functions.    (line  16)
@@ -37007,8 +37092,8 @@ Index
 * user-defined, function, beginfile():   Filetrans Function.  (line  62)
 * user-defined, function, endfile():     Filetrans Function.  (line  62)
 * user-defined, function, rewind():      Rewind Function.     (line  15)
-* user-defined, function, getopt():      Getopt Function.     (line 108)
-* user-defined, function, getopt() <1>:  Getopt Function.     (line 134)
+* user-defined, function, getopt():      Getopt Function.     (line 116)
+* user-defined, function, getopt() <1>:  Getopt Function.     (line 143)
 * user-defined, function, getpwent():    Passwd Functions.    (line  16)
 * user-defined, function, _pw_init():    Passwd Functions.    (line 105)
 * user-defined, function, getpwnam():    Passwd Functions.    (line 180)
@@ -37446,285 +37531,285 @@ Ref: File Checking-Footnote-1680647
 Node: Empty Files680848
 Node: Ignoring Assigns682827
 Node: Getopt Function684377
-Ref: Getopt Function-Footnote-1695846
-Node: Passwd Functions696046
-Ref: Passwd Functions-Footnote-1704885
-Node: Group Functions704973
-Ref: Group Functions-Footnote-1712871
-Node: Walking Arrays713078
-Node: Library Functions Summary716086
-Node: Library Exercises717492
-Node: Sample Programs717957
-Node: Running Examples718727
-Node: Clones719455
-Node: Cut Program720679
-Node: Egrep Program730608
-Ref: Egrep Program-Footnote-1738120
-Node: Id Program738230
-Node: Split Program741910
-Ref: Split Program-Footnote-1745368
-Node: Tee Program745497
-Node: Uniq Program748287
-Node: Wc Program755908
-Ref: Wc Program-Footnote-1760163
-Node: Miscellaneous Programs760257
-Node: Dupword Program761470
-Node: Alarm Program763500
-Node: Translate Program768355
-Ref: Translate Program-Footnote-1772920
-Node: Labels Program773190
-Ref: Labels Program-Footnote-1776541
-Node: Word Sorting776625
-Node: History Sorting780697
-Node: Extract Program782922
-Node: Simple Sed790976
-Node: Igawk Program794050
-Ref: Igawk Program-Footnote-1808381
-Ref: Igawk Program-Footnote-2808583
-Ref: Igawk Program-Footnote-3808705
-Node: Anagram Program808820
-Node: Signature Program811882
-Node: Programs Summary813129
-Node: Programs Exercises814343
-Ref: Programs Exercises-Footnote-1818472
-Node: Advanced Features818563
-Node: Nondecimal Data820553
-Node: Array Sorting822144
-Node: Controlling Array Traversal822844
-Ref: Controlling Array Traversal-Footnote-1831212
-Node: Array Sorting Functions831330
-Ref: Array Sorting Functions-Footnote-1836421
-Node: Two-way I/O836617
-Ref: Two-way I/O-Footnote-1844338
-Ref: Two-way I/O-Footnote-2844525
-Node: TCP/IP Networking844607
-Node: Profiling847725
-Node: Advanced Features Summary856740
-Node: Internationalization858584
-Node: I18N and L10N860064
-Node: Explaining gettext860751
-Ref: Explaining gettext-Footnote-1866643
-Ref: Explaining gettext-Footnote-2866828
-Node: Programmer i18n866993
-Ref: Programmer i18n-Footnote-1871942
-Node: Translator i18n871991
-Node: String Extraction872785
-Ref: String Extraction-Footnote-1873917
-Node: Printf Ordering874003
-Ref: Printf Ordering-Footnote-1876789
-Node: I18N Portability876853
-Ref: I18N Portability-Footnote-1879309
-Node: I18N Example879372
-Ref: I18N Example-Footnote-1882647
-Ref: I18N Example-Footnote-2882720
-Node: Gawk I18N882829
-Node: I18N Summary883478
-Node: Debugger884819
-Node: Debugging885819
-Node: Debugging Concepts886260
-Node: Debugging Terms888069
-Node: Awk Debugging890644
-Ref: Awk Debugging-Footnote-1891589
-Node: Sample Debugging Session891721
-Node: Debugger Invocation892255
-Node: Finding The Bug893641
-Node: List of Debugger Commands900115
-Node: Breakpoint Control901448
-Node: Debugger Execution Control905142
-Node: Viewing And Changing Data908504
-Node: Execution Stack912045
-Node: Debugger Info913682
-Node: Miscellaneous Debugger Commands917753
-Node: Readline Support922815
-Node: Limitations923711
-Node: Debugging Summary926265
-Node: Namespaces927544
-Node: Global Namespace928655
-Node: Qualified Names930053
-Node: Default Namespace931052
-Node: Changing The Namespace931793
-Node: Naming Rules933407
-Node: Internal Name Management935255
-Node: Namespace Example936297
-Node: Namespace And Features938859
-Node: Namespace Summary940294
-Node: Arbitrary Precision Arithmetic941771
-Node: Computer Arithmetic943258
-Ref: table-numeric-ranges947024
-Ref: table-floating-point-ranges947517
-Ref: Computer Arithmetic-Footnote-1948175
-Node: Math Definitions948232
-Ref: table-ieee-formats951548
-Ref: Math Definitions-Footnote-1952151
-Node: MPFR features952256
-Node: FP Math Caution953974
-Ref: FP Math Caution-Footnote-1955046
-Node: Inexactness of computations955415
-Node: Inexact representation956375
-Node: Comparing FP Values957735
-Node: Errors accumulate958976
-Node: Getting Accuracy960409
-Node: Try To Round963119
-Node: Setting precision964018
-Ref: table-predefined-precision-strings964715
-Node: Setting the rounding mode966545
-Ref: table-gawk-rounding-modes966919
-Ref: Setting the rounding mode-Footnote-1970850
-Node: Arbitrary Precision Integers971029
-Ref: Arbitrary Precision Integers-Footnote-1974204
-Node: Checking for MPFR974353
-Node: POSIX Floating Point Problems975827
-Ref: POSIX Floating Point Problems-Footnote-1980112
-Node: Floating point summary980150
-Node: Dynamic Extensions982340
-Node: Extension Intro983893
-Node: Plugin License985159
-Node: Extension Mechanism Outline985956
-Ref: figure-load-extension986395
-Ref: figure-register-new-function987960
-Ref: figure-call-new-function989052
-Node: Extension API Description991114
-Node: Extension API Functions Introduction992756
-Ref: table-api-std-headers994592
-Node: General Data Types998457
-Ref: General Data Types-Footnote-11006818
-Node: Memory Allocation Functions1007117
-Ref: Memory Allocation Functions-Footnote-11011327
-Node: Constructor Functions1011426
-Node: Registration Functions1015012
-Node: Extension Functions1015697
-Node: Exit Callback Functions1021019
-Node: Extension Version String1022269
-Node: Input Parsers1022932
-Node: Output Wrappers1035653
-Node: Two-way processors1040165
-Node: Printing Messages1042430
-Ref: Printing Messages-Footnote-11043601
-Node: Updating ERRNO1043754
-Node: Requesting Values1044493
-Ref: table-value-types-returned1045230
-Node: Accessing Parameters1046166
-Node: Symbol Table Access1047401
-Node: Symbol table by name1047913
-Ref: Symbol table by name-Footnote-11050937
-Node: Symbol table by cookie1051065
-Ref: Symbol table by cookie-Footnote-11055250
-Node: Cached values1055314
-Ref: Cached values-Footnote-11058850
-Node: Array Manipulation1059003
-Ref: Array Manipulation-Footnote-11060094
-Node: Array Data Types1060131
-Ref: Array Data Types-Footnote-11062789
-Node: Array Functions1062881
-Node: Flattening Arrays1067379
-Node: Creating Arrays1074355
-Node: Redirection API1079122
-Node: Extension API Variables1081955
-Node: Extension Versioning1082666
-Ref: gawk-api-version1083095
-Node: Extension GMP/MPFR Versioning1084826
-Node: Extension API Informational Variables1086454
-Node: Extension API Boilerplate1087527
-Node: Changes from API V11091501
-Node: Finding Extensions1093073
-Node: Extension Example1093632
-Node: Internal File Description1094430
-Node: Internal File Ops1098510
-Ref: Internal File Ops-Footnote-11109860
-Node: Using Internal File Ops1110000
-Ref: Using Internal File Ops-Footnote-11112383
-Node: Extension Samples1112657
-Node: Extension Sample File Functions1114186
-Node: Extension Sample Fnmatch1121835
-Node: Extension Sample Fork1123322
-Node: Extension Sample Inplace1124540
-Node: Extension Sample Ord1128165
-Node: Extension Sample Readdir1129001
-Ref: table-readdir-file-types1129890
-Node: Extension Sample Revout1130957
-Node: Extension Sample Rev2way1131546
-Node: Extension Sample Read write array1132286
-Node: Extension Sample Readfile1134228
-Node: Extension Sample Time1135323
-Node: Extension Sample API Tests1137075
-Node: gawkextlib1137567
-Node: Extension summary1140485
-Node: Extension Exercises1144187
-Node: Language History1145429
-Node: V7/SVR3.11147085
-Node: SVR41149237
-Node: POSIX1150671
-Node: BTL1152051
-Node: POSIX/GNU1152780
-Node: Feature History1158558
-Node: Common Extensions1174751
-Node: Ranges and Locales1176034
-Ref: Ranges and Locales-Footnote-11180650
-Ref: Ranges and Locales-Footnote-21180677
-Ref: Ranges and Locales-Footnote-31180912
-Node: Contributors1181133
-Node: History summary1187086
-Node: Installation1188466
-Node: Gawk Distribution1189410
-Node: Getting1189894
-Node: Extracting1190857
-Node: Distribution contents1192495
-Node: Unix Installation1198975
-Node: Quick Installation1199657
-Node: Shell Startup Files1202071
-Node: Additional Configuration Options1203160
-Node: Configuration Philosophy1205475
-Node: Non-Unix Installation1207844
-Node: PC Installation1208304
-Node: PC Binary Installation1209142
-Node: PC Compiling1209577
-Node: PC Using1210694
-Node: Cygwin1214247
-Node: MSYS1215471
-Node: VMS Installation1215972
-Node: VMS Compilation1216763
-Ref: VMS Compilation-Footnote-11217992
-Node: VMS Dynamic Extensions1218050
-Node: VMS Installation Details1219735
-Node: VMS Running1221988
-Node: VMS GNV1226267
-Node: VMS Old Gawk1227002
-Node: Bugs1227473
-Node: Bug address1228136
-Node: Usenet1231118
-Node: Maintainers1232122
-Node: Other Versions1233383
-Node: Installation summary1240471
-Node: Notes1241673
-Node: Compatibility Mode1242467
-Node: Additions1243249
-Node: Accessing The Source1244174
-Node: Adding Code1245611
-Node: New Ports1251830
-Node: Derived Files1256205
-Ref: Derived Files-Footnote-11261865
-Ref: Derived Files-Footnote-21261900
-Ref: Derived Files-Footnote-31262498
-Node: Future Extensions1262612
-Node: Implementation Limitations1263270
-Node: Extension Design1264453
-Node: Old Extension Problems1265597
-Ref: Old Extension Problems-Footnote-11267115
-Node: Extension New Mechanism Goals1267172
-Ref: Extension New Mechanism Goals-Footnote-11270536
-Node: Extension Other Design Decisions1270725
-Node: Extension Future Growth1272838
-Node: Notes summary1273674
-Node: Basic Concepts1274832
-Node: Basic High Level1275513
-Ref: figure-general-flow1275795
-Ref: figure-process-flow1276480
-Ref: Basic High Level-Footnote-11279781
-Node: Basic Data Typing1279966
-Node: Glossary1283294
-Node: Copying1315132
-Node: GNU Free Documentation License1352675
-Node: Index1377795
+Ref: Getopt Function-Footnote-1699591
+Node: Passwd Functions699791
+Ref: Passwd Functions-Footnote-1708630
+Node: Group Functions708718
+Ref: Group Functions-Footnote-1716616
+Node: Walking Arrays716823
+Node: Library Functions Summary719831
+Node: Library Exercises721237
+Node: Sample Programs721702
+Node: Running Examples722472
+Node: Clones723200
+Node: Cut Program724424
+Node: Egrep Program734353
+Ref: Egrep Program-Footnote-1741865
+Node: Id Program741975
+Node: Split Program745655
+Ref: Split Program-Footnote-1749113
+Node: Tee Program749242
+Node: Uniq Program752032
+Node: Wc Program759653
+Ref: Wc Program-Footnote-1763908
+Node: Miscellaneous Programs764002
+Node: Dupword Program765215
+Node: Alarm Program767245
+Node: Translate Program772100
+Ref: Translate Program-Footnote-1776665
+Node: Labels Program776935
+Ref: Labels Program-Footnote-1780286
+Node: Word Sorting780370
+Node: History Sorting784442
+Node: Extract Program786667
+Node: Simple Sed794721
+Node: Igawk Program797795
+Ref: Igawk Program-Footnote-1812126
+Ref: Igawk Program-Footnote-2812328
+Ref: Igawk Program-Footnote-3812450
+Node: Anagram Program812565
+Node: Signature Program815627
+Node: Programs Summary816874
+Node: Programs Exercises818088
+Ref: Programs Exercises-Footnote-1822217
+Node: Advanced Features822308
+Node: Nondecimal Data824298
+Node: Array Sorting825889
+Node: Controlling Array Traversal826589
+Ref: Controlling Array Traversal-Footnote-1834957
+Node: Array Sorting Functions835075
+Ref: Array Sorting Functions-Footnote-1840166
+Node: Two-way I/O840362
+Ref: Two-way I/O-Footnote-1848083
+Ref: Two-way I/O-Footnote-2848270
+Node: TCP/IP Networking848352
+Node: Profiling851470
+Node: Advanced Features Summary860485
+Node: Internationalization862329
+Node: I18N and L10N863809
+Node: Explaining gettext864496
+Ref: Explaining gettext-Footnote-1870388
+Ref: Explaining gettext-Footnote-2870573
+Node: Programmer i18n870738
+Ref: Programmer i18n-Footnote-1875687
+Node: Translator i18n875736
+Node: String Extraction876530
+Ref: String Extraction-Footnote-1877662
+Node: Printf Ordering877748
+Ref: Printf Ordering-Footnote-1880534
+Node: I18N Portability880598
+Ref: I18N Portability-Footnote-1883054
+Node: I18N Example883117
+Ref: I18N Example-Footnote-1886392
+Ref: I18N Example-Footnote-2886465
+Node: Gawk I18N886574
+Node: I18N Summary887223
+Node: Debugger888564
+Node: Debugging889564
+Node: Debugging Concepts890005
+Node: Debugging Terms891814
+Node: Awk Debugging894389
+Ref: Awk Debugging-Footnote-1895334
+Node: Sample Debugging Session895466
+Node: Debugger Invocation896000
+Node: Finding The Bug897386
+Node: List of Debugger Commands903860
+Node: Breakpoint Control905193
+Node: Debugger Execution Control908887
+Node: Viewing And Changing Data912249
+Node: Execution Stack915790
+Node: Debugger Info917427
+Node: Miscellaneous Debugger Commands921498
+Node: Readline Support926560
+Node: Limitations927456
+Node: Debugging Summary930010
+Node: Namespaces931289
+Node: Global Namespace932400
+Node: Qualified Names933798
+Node: Default Namespace934797
+Node: Changing The Namespace935538
+Node: Naming Rules937152
+Node: Internal Name Management939000
+Node: Namespace Example940042
+Node: Namespace And Features942604
+Node: Namespace Summary944039
+Node: Arbitrary Precision Arithmetic945516
+Node: Computer Arithmetic947003
+Ref: table-numeric-ranges950769
+Ref: table-floating-point-ranges951262
+Ref: Computer Arithmetic-Footnote-1951920
+Node: Math Definitions951977
+Ref: table-ieee-formats955293
+Ref: Math Definitions-Footnote-1955896
+Node: MPFR features956001
+Node: FP Math Caution957719
+Ref: FP Math Caution-Footnote-1958791
+Node: Inexactness of computations959160
+Node: Inexact representation960120
+Node: Comparing FP Values961480
+Node: Errors accumulate962721
+Node: Getting Accuracy964154
+Node: Try To Round966864
+Node: Setting precision967763
+Ref: table-predefined-precision-strings968460
+Node: Setting the rounding mode970290
+Ref: table-gawk-rounding-modes970664
+Ref: Setting the rounding mode-Footnote-1974595
+Node: Arbitrary Precision Integers974774
+Ref: Arbitrary Precision Integers-Footnote-1977949
+Node: Checking for MPFR978098
+Node: POSIX Floating Point Problems979572
+Ref: POSIX Floating Point Problems-Footnote-1983857
+Node: Floating point summary983895
+Node: Dynamic Extensions986085
+Node: Extension Intro987638
+Node: Plugin License988904
+Node: Extension Mechanism Outline989701
+Ref: figure-load-extension990140
+Ref: figure-register-new-function991705
+Ref: figure-call-new-function992797
+Node: Extension API Description994859
+Node: Extension API Functions Introduction996501
+Ref: table-api-std-headers998337
+Node: General Data Types1002202
+Ref: General Data Types-Footnote-11010563
+Node: Memory Allocation Functions1010862
+Ref: Memory Allocation Functions-Footnote-11015072
+Node: Constructor Functions1015171
+Node: Registration Functions1018757
+Node: Extension Functions1019442
+Node: Exit Callback Functions1024764
+Node: Extension Version String1026014
+Node: Input Parsers1026677
+Node: Output Wrappers1039398
+Node: Two-way processors1043910
+Node: Printing Messages1046175
+Ref: Printing Messages-Footnote-11047346
+Node: Updating ERRNO1047499
+Node: Requesting Values1048238
+Ref: table-value-types-returned1048975
+Node: Accessing Parameters1049911
+Node: Symbol Table Access1051146
+Node: Symbol table by name1051658
+Ref: Symbol table by name-Footnote-11054682
+Node: Symbol table by cookie1054810
+Ref: Symbol table by cookie-Footnote-11058995
+Node: Cached values1059059
+Ref: Cached values-Footnote-11062595
+Node: Array Manipulation1062748
+Ref: Array Manipulation-Footnote-11063839
+Node: Array Data Types1063876
+Ref: Array Data Types-Footnote-11066534
+Node: Array Functions1066626
+Node: Flattening Arrays1071124
+Node: Creating Arrays1078100
+Node: Redirection API1082867
+Node: Extension API Variables1085700
+Node: Extension Versioning1086411
+Ref: gawk-api-version1086840
+Node: Extension GMP/MPFR Versioning1088571
+Node: Extension API Informational Variables1090199
+Node: Extension API Boilerplate1091272
+Node: Changes from API V11095246
+Node: Finding Extensions1096818
+Node: Extension Example1097377
+Node: Internal File Description1098175
+Node: Internal File Ops1102255
+Ref: Internal File Ops-Footnote-11113605
+Node: Using Internal File Ops1113745
+Ref: Using Internal File Ops-Footnote-11116128
+Node: Extension Samples1116402
+Node: Extension Sample File Functions1117931
+Node: Extension Sample Fnmatch1125580
+Node: Extension Sample Fork1127067
+Node: Extension Sample Inplace1128285
+Node: Extension Sample Ord1131910
+Node: Extension Sample Readdir1132746
+Ref: table-readdir-file-types1133635
+Node: Extension Sample Revout1134702
+Node: Extension Sample Rev2way1135291
+Node: Extension Sample Read write array1136031
+Node: Extension Sample Readfile1137973
+Node: Extension Sample Time1139068
+Node: Extension Sample API Tests1140820
+Node: gawkextlib1141312
+Node: Extension summary1144230
+Node: Extension Exercises1147932
+Node: Language History1149174
+Node: V7/SVR3.11150830
+Node: SVR41152982
+Node: POSIX1154416
+Node: BTL1155796
+Node: POSIX/GNU1156525
+Node: Feature History1162303
+Node: Common Extensions1178496
+Node: Ranges and Locales1179779
+Ref: Ranges and Locales-Footnote-11184395
+Ref: Ranges and Locales-Footnote-21184422
+Ref: Ranges and Locales-Footnote-31184657
+Node: Contributors1184878
+Node: History summary1190831
+Node: Installation1192211
+Node: Gawk Distribution1193155
+Node: Getting1193639
+Node: Extracting1194602
+Node: Distribution contents1196240
+Node: Unix Installation1202720
+Node: Quick Installation1203402
+Node: Shell Startup Files1205816
+Node: Additional Configuration Options1206905
+Node: Configuration Philosophy1209220
+Node: Non-Unix Installation1211589
+Node: PC Installation1212049
+Node: PC Binary Installation1212887
+Node: PC Compiling1213322
+Node: PC Using1214439
+Node: Cygwin1217992
+Node: MSYS1219216
+Node: VMS Installation1219717
+Node: VMS Compilation1220508
+Ref: VMS Compilation-Footnote-11221737
+Node: VMS Dynamic Extensions1221795
+Node: VMS Installation Details1223480
+Node: VMS Running1225733
+Node: VMS GNV1230012
+Node: VMS Old Gawk1230747
+Node: Bugs1231218
+Node: Bug address1231881
+Node: Usenet1234863
+Node: Maintainers1235867
+Node: Other Versions1237128
+Node: Installation summary1244216
+Node: Notes1245418
+Node: Compatibility Mode1246212
+Node: Additions1246994
+Node: Accessing The Source1247919
+Node: Adding Code1249356
+Node: New Ports1255575
+Node: Derived Files1259950
+Ref: Derived Files-Footnote-11265610
+Ref: Derived Files-Footnote-21265645
+Ref: Derived Files-Footnote-31266243
+Node: Future Extensions1266357
+Node: Implementation Limitations1267015
+Node: Extension Design1268198
+Node: Old Extension Problems1269342
+Ref: Old Extension Problems-Footnote-11270860
+Node: Extension New Mechanism Goals1270917
+Ref: Extension New Mechanism Goals-Footnote-11274281
+Node: Extension Other Design Decisions1274470
+Node: Extension Future Growth1276583
+Node: Notes summary1277419
+Node: Basic Concepts1278577
+Node: Basic High Level1279258
+Ref: figure-general-flow1279540
+Ref: figure-process-flow1280225
+Ref: Basic High Level-Footnote-11283526
+Node: Basic Data Typing1283711
+Node: Glossary1287039
+Node: Copying1318877
+Node: GNU Free Documentation License1356420
+Node: Index1381540
 
 End Tag Table
 
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 1a41f88..d646175 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -23443,16 +23443,25 @@ main(int argc, char *argv[])
 @}
 @end example
 
+The GNU project's version of the original Unix utilities popularized
+the use of long command line options.  For example, @option{--help}
+in addition to @option{-h}. Arguments to long options are either provided
+as separate command line arguments (@samp{--source '@var{program-text}'})
+or separated from the option with an @samp{=} sign
+(@samp{--source='@var{program-text}'}).
+
 As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
 function to process both normal and GNU-style long options
 (@pxref{Options}).
 
 The abstraction provided by @code{getopt()} is very useful and is quite
 handy in @command{awk} programs as well.  Following is an @command{awk}
-version of @code{getopt()}.  This function highlights one of the
+version of @code{getopt()} that accepts both short and long options.
+
+This function highlights one of the
 greatest weaknesses in @command{awk}, which is that it is very poor at
-manipulating single characters.  Repeated calls to @code{substr()} are
-necessary for accessing individual characters
+manipulating single characters.  The function needs repeated calls to
+@code{substr()} in order to access individual characters
 (@pxref{String Functions}).@footnote{This
 function was written before @command{gawk} acquired the ability to
 split strings into single characters using @code{""} as the separator.
@@ -23465,6 +23474,7 @@ The discussion that follows walks through the code a 
bit at a time:
 @example
 @c file eg/lib/getopt.awk
 # getopt.awk --- Do C library getopt(3) function in awk
+#                Also supports long options.
 @c endfile
 @ignore
 @c file eg/lib/getopt.awk
@@ -23473,6 +23483,7 @@ The discussion that follows walks through the code a 
bit at a time:
 #
 # Initial version: March, 1991
 # Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
 @c endfile
 @end ignore
 @c file eg/lib/getopt.awk
@@ -23486,7 +23497,7 @@ The discussion that follows walks through the code a 
bit at a time:
 # Returns:
 #    -1     at end of options
 #    "?"    for unrecognized option
-#    <c>    a character representing the current option
+#    <s>    a string representing the current option
 
 # Private Data:
 #    _opti  -- index in multiflag option, e.g., -abc
@@ -23500,17 +23511,18 @@ are ``private'' to this library function.  Such 
documentation is essential
 for any program, and particularly for library functions.
 
 The @code{getopt()} function first checks that it was indeed called with
-a string of options (the @code{options} parameter).  If @code{options}
-has a zero length, @code{getopt()} immediately returns @minus{}1:
+a string of options (the @code{options} parameter).  If both
+@code{options} and @code{longoptions} have a zero length,
+@code{getopt()} immediately returns @minus{}1:
 
 @cindex @code{getopt()} user-defined function
 @cindex user-defined @subentry function @subentry @code{getopt()}
 @example
 @c file eg/lib/getopt.awk
-function getopt(argc, argv, options,    thisopt, i)
+function getopt(argc, argv, options, longopts,    thisopt, i, j)
 @{
-    if (length(options) == 0)    # no options given
-        return -1
+    if (length(options) == 0 && length(longopts) == 0)
+        return -1                # no options given
 
 @group
     if (argv[Optind] == "--") @{  # all done
@@ -23527,33 +23539,39 @@ function getopt(argc, argv, options,    thisopt, i)
 
 The next thing to check for is the end of the options.  A @option{--}
 ends the command-line options, as does any command-line argument that
-does not begin with a @samp{-}.  @code{Optind} is used to step through
+does not begin with a @samp{-} (unless it is an argument to a preceding
+option).  @code{Optind} steps through
 the array of command-line arguments; it retains its value across calls
 to @code{getopt()}, because it is a global variable.
 
-The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+The regular expression @code{@w{/^-[^:[:space:]/}}
 checks for a @samp{-} followed by anything
 that is not whitespace and not a colon.
 If the current command-line argument does not match this pattern,
-it is not an option, and it ends option processing. Continuing on:
+it is not an option, and it ends option processing.
+Now, we
+check to see if we are processing a short (single letter) option, or a
+long option (indicated by two dashes, e.g., @samp{--filename}).  If it
+is a short option, we continue on:
 
 @example
 @c file eg/lib/getopt.awk
-    if (_opti == 0)
-        _opti = 2
-    thisopt = substr(argv[Optind], _opti, 1)
-    Optopt = thisopt
-    i = index(options, thisopt)
-    if (i == 0) @{
-        if (Opterr)
-            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
-        if (_opti >= length(argv[Optind])) @{
-            Optind++
-            _opti = 0
-        @} else
-            _opti++
-        return "?"
-    @}
+    if (argv[Optind] !~ /^--/) @{        # if this is a short option
+        if (_opti == 0)
+            _opti = 2
+        thisopt = substr(argv[Optind], _opti, 1)
+        Optopt = thisopt
+        i = index(options, thisopt)
+        if (i == 0) @{
+            if (Opterr)
+                printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+            if (_opti >= length(argv[Optind])) @{
+                Optind++
+                _opti = 0
+            @} else
+                _opti++
+            return "?"
+        @}
 @c endfile
 @end example
 
@@ -23586,15 +23604,15 @@ invalid option letter actually is. Continuing on:
 
 @example
 @c file eg/lib/getopt.awk
-    if (substr(options, i + 1, 1) == ":") @{
-        # get option argument
-        if (length(substr(argv[Optind], _opti + 1)) > 0)
-            Optarg = substr(argv[Optind], _opti + 1)
-        else
-            Optarg = argv[++Optind]
-        _opti = 0
-    @} else
-        Optarg = ""
+        if (substr(options, i + 1, 1) == ":") @{
+            # get option argument
+            if (length(substr(argv[Optind], _opti + 1)) > 0)
+                Optarg = substr(argv[Optind], _opti + 1)
+            else
+                Optarg = argv[++Optind]
+            _opti = 0
+        @} else
+            Optarg = ""
 @c endfile
 @end example
 
@@ -23608,22 +23626,97 @@ examine in the current command-line argument. 
Continuing:
 
 @example
 @c file eg/lib/getopt.awk
-    if (_opti == 0 || _opti >= length(argv[Optind])) @{
+        if (_opti == 0 || _opti >= length(argv[Optind])) @{
+            Optind++
+            _opti = 0
+        @} else
+            _opti++
+        return thisopt
+@c endfile
+@end example
+
+Finally, for a short option, if @code{_opti} is either zero or greater
+than the length of the current command-line argument, it means this
+element in @code{argv} is through being processed, so @code{Optind} is
+incremented to point to the next element in @code{argv}.  If neither
+condition is true, then only @code{_opti} is incremented, so that the
+next option letter can be processed on the next call to @code{getopt()}.
+
+On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+@example
+@c file eg/lib/getopt.awk
+    @} else @{
+        j = index(argv[Optind], "=")
+        if (j > 0)
+            thisopt = substr(argv[Optind], 3, j - 3)
+        else
+            thisopt = substr(argv[Optind], 3)
+        Optopt = thisopt
+@c endfile
+@end example
+
+First, we search this option for a possible embedded equal sign, as the
+specification of long options allows an argument to an option
+@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as
+@samp{@w{--someopt answer}}.
+
+@example
+@c file eg/lib/getopt.awk
+        i = match(longopts, "(^|,)" thisopt "($|[,:])")
+        if (i == 0) @{
+            if (Opterr)
+                 printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+            Optind++
+            return "?"
+        @}
+@c endfile
+@end example
+
+Next, we try to find the current option in @code{longopts}.  The regular
+expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}},
+matches this option at the beginninng of @code{longopts}, or at the
+beginning of a subsequent long option (the previous long option would
+have been terminated by a comma), and, in any case, either at the end of
+the @code{longopts} string (@samp{$}), or followed by a comma
+(separating this option from a subsequent option) or a colon (indicating
+this long option takes an argument (@samp{@w{[,:]}}).
+
+Using this regular expression, we check to see if the current option
+might possibly be in @code{longopts} (if @code{longopts} is not
+specified, this test will also fail).  In case of an error, we possibly
+print an error message and then return @code{"?"}. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+        if (substr(longopts, i+1+length(thisopt), 1) == ":") @{
+            if (j > 0)
+                Optarg = substr(argv[Optind], j + 1)
+            else
+                Optarg = argv[++Optind]
+        @} else
+            Optarg = ""
+@c endfile
+@end example
+
+We now check to see if this option takes an argument and, if so, we set
+@code{Optarg} to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
+@example
+@c file eg/lib/getopt.awk
         Optind++
-        _opti = 0
-    @} else
-        _opti++
-    return thisopt
+        return thisopt
+    @}
 @}
 @c endfile
 @end example
 
-Finally, if @code{_opti} is either zero or greater than the length of the
-current command-line argument, it means this element in @code{argv} is
-through being processed, so @code{Optind} is incremented to point to the
-next element in @code{argv}.  If neither condition is true, then only
-@code{_opti} is incremented, so that the next option letter can be processed
-on the next call to @code{getopt()}.
+We increase @code{Optind} (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
 
 The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
 @code{Opterr} is set to one, because the default behavior is for 
@code{getopt()}
@@ -23639,20 +23732,21 @@ BEGIN @{
 
     # test program
     if (_getopt_test) @{
-        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
-            printf("c = <%c>, Optarg = <%s>\n",
-                                       _go_c, Optarg)
+        _myshortopts = "ab:cd"
+        _mylongopts = "longa,longb:,otherc,otherd"
+
+        while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+            printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
         printf("non-option arguments:\n")
         for (; Optind < ARGC; Optind++)
-            printf("\tARGV[%d] = <%s>\n",
-                                    Optind, ARGV[Optind])
+            printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
     @}
 @}
 @c endfile
 @end example
 
 The rest of the @code{BEGIN} rule is a simple test program.  Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
 
 @example
 $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
@@ -23670,9 +23764,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- 
xyz abc}
 @print{} non-option arguments:
 @print{}         ARGV[4] = <xyz>
 @print{}         ARGV[5] = <abc>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \}
+> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2}
+@print{} c = <a>, Optarg = <>
+@print{} c = <longa>, Optarg = <>
+@print{} c = <b>, Optarg = <xx>
+@print{} c = <longb>, Optarg = <foo=bar>
+@print{} c = <otherd>, Optarg = <>
+@print{} c = <otherc>, Optarg = <>
+@print{} non-option arguments:
+@print{}       ARGV[8] = <arg1>
+@print{}       ARGV[9] = <arg2>
 @end example
 
-In both runs, the first @option{--} terminates the arguments to
+In all the runs, the first @option{--} terminates the arguments to
 @command{awk}, so that it does not try to interpret the @option{-a},
 etc., as its own options.
 
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index fe2cc17..2d4409b 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -22453,16 +22453,25 @@ main(int argc, char *argv[])
 @}
 @end example
 
+The GNU project's version of the original Unix utilities popularized
+the use of long command line options.  For example, @option{--help}
+in addition to @option{-h}. Arguments to long options are either provided
+as separate command line arguments (@samp{--source '@var{program-text}'})
+or separated from the option with an @samp{=} sign
+(@samp{--source='@var{program-text}'}).
+
 As a side point, @command{gawk} actually uses the GNU @code{getopt_long()}
 function to process both normal and GNU-style long options
 (@pxref{Options}).
 
 The abstraction provided by @code{getopt()} is very useful and is quite
 handy in @command{awk} programs as well.  Following is an @command{awk}
-version of @code{getopt()}.  This function highlights one of the
+version of @code{getopt()} that accepts both short and long options.
+
+This function highlights one of the
 greatest weaknesses in @command{awk}, which is that it is very poor at
-manipulating single characters.  Repeated calls to @code{substr()} are
-necessary for accessing individual characters
+manipulating single characters.  The function needs repeated calls to
+@code{substr()} in order to access individual characters
 (@pxref{String Functions}).@footnote{This
 function was written before @command{gawk} acquired the ability to
 split strings into single characters using @code{""} as the separator.
@@ -22475,6 +22484,7 @@ The discussion that follows walks through the code a 
bit at a time:
 @example
 @c file eg/lib/getopt.awk
 # getopt.awk --- Do C library getopt(3) function in awk
+#                Also supports long options.
 @c endfile
 @ignore
 @c file eg/lib/getopt.awk
@@ -22483,6 +22493,7 @@ The discussion that follows walks through the code a 
bit at a time:
 #
 # Initial version: March, 1991
 # Revised: May, 1993
+# Long options added by Greg Minshall, January 2020
 @c endfile
 @end ignore
 @c file eg/lib/getopt.awk
@@ -22496,7 +22507,7 @@ The discussion that follows walks through the code a 
bit at a time:
 # Returns:
 #    -1     at end of options
 #    "?"    for unrecognized option
-#    <c>    a character representing the current option
+#    <s>    a string representing the current option
 
 # Private Data:
 #    _opti  -- index in multiflag option, e.g., -abc
@@ -22510,17 +22521,18 @@ are ``private'' to this library function.  Such 
documentation is essential
 for any program, and particularly for library functions.
 
 The @code{getopt()} function first checks that it was indeed called with
-a string of options (the @code{options} parameter).  If @code{options}
-has a zero length, @code{getopt()} immediately returns @minus{}1:
+a string of options (the @code{options} parameter).  If both
+@code{options} and @code{longoptions} have a zero length,
+@code{getopt()} immediately returns @minus{}1:
 
 @cindex @code{getopt()} user-defined function
 @cindex user-defined @subentry function @subentry @code{getopt()}
 @example
 @c file eg/lib/getopt.awk
-function getopt(argc, argv, options,    thisopt, i)
+function getopt(argc, argv, options, longopts,    thisopt, i, j)
 @{
-    if (length(options) == 0)    # no options given
-        return -1
+    if (length(options) == 0 && length(longopts) == 0)
+        return -1                # no options given
 
 @group
     if (argv[Optind] == "--") @{  # all done
@@ -22537,33 +22549,39 @@ function getopt(argc, argv, options,    thisopt, i)
 
 The next thing to check for is the end of the options.  A @option{--}
 ends the command-line options, as does any command-line argument that
-does not begin with a @samp{-}.  @code{Optind} is used to step through
+does not begin with a @samp{-} (unless it is an argument to a preceding
+option).  @code{Optind} steps through
 the array of command-line arguments; it retains its value across calls
 to @code{getopt()}, because it is a global variable.
 
-The regular expression that is used, @code{@w{/^-[^:[:space:]/}},
+The regular expression @code{@w{/^-[^:[:space:]/}}
 checks for a @samp{-} followed by anything
 that is not whitespace and not a colon.
 If the current command-line argument does not match this pattern,
-it is not an option, and it ends option processing. Continuing on:
+it is not an option, and it ends option processing.
+Now, we
+check to see if we are processing a short (single letter) option, or a
+long option (indicated by two dashes, e.g., @samp{--filename}).  If it
+is a short option, we continue on:
 
 @example
 @c file eg/lib/getopt.awk
-    if (_opti == 0)
-        _opti = 2
-    thisopt = substr(argv[Optind], _opti, 1)
-    Optopt = thisopt
-    i = index(options, thisopt)
-    if (i == 0) @{
-        if (Opterr)
-            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
-        if (_opti >= length(argv[Optind])) @{
-            Optind++
-            _opti = 0
-        @} else
-            _opti++
-        return "?"
-    @}
+    if (argv[Optind] !~ /^--/) @{        # if this is a short option
+        if (_opti == 0)
+            _opti = 2
+        thisopt = substr(argv[Optind], _opti, 1)
+        Optopt = thisopt
+        i = index(options, thisopt)
+        if (i == 0) @{
+            if (Opterr)
+                printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+            if (_opti >= length(argv[Optind])) @{
+                Optind++
+                _opti = 0
+            @} else
+                _opti++
+            return "?"
+        @}
 @c endfile
 @end example
 
@@ -22596,15 +22614,15 @@ invalid option letter actually is. Continuing on:
 
 @example
 @c file eg/lib/getopt.awk
-    if (substr(options, i + 1, 1) == ":") @{
-        # get option argument
-        if (length(substr(argv[Optind], _opti + 1)) > 0)
-            Optarg = substr(argv[Optind], _opti + 1)
-        else
-            Optarg = argv[++Optind]
-        _opti = 0
-    @} else
-        Optarg = ""
+        if (substr(options, i + 1, 1) == ":") @{
+            # get option argument
+            if (length(substr(argv[Optind], _opti + 1)) > 0)
+                Optarg = substr(argv[Optind], _opti + 1)
+            else
+                Optarg = argv[++Optind]
+            _opti = 0
+        @} else
+            Optarg = ""
 @c endfile
 @end example
 
@@ -22618,22 +22636,97 @@ examine in the current command-line argument. 
Continuing:
 
 @example
 @c file eg/lib/getopt.awk
-    if (_opti == 0 || _opti >= length(argv[Optind])) @{
+        if (_opti == 0 || _opti >= length(argv[Optind])) @{
+            Optind++
+            _opti = 0
+        @} else
+            _opti++
+        return thisopt
+@c endfile
+@end example
+
+Finally, for a short option, if @code{_opti} is either zero or greater
+than the length of the current command-line argument, it means this
+element in @code{argv} is through being processed, so @code{Optind} is
+incremented to point to the next element in @code{argv}.  If neither
+condition is true, then only @code{_opti} is incremented, so that the
+next option letter can be processed on the next call to @code{getopt()}.
+
+On the other hand, if the earlier test found that this was a long
+option, we take a different branch:
+
+@example
+@c file eg/lib/getopt.awk
+    @} else @{
+        j = index(argv[Optind], "=")
+        if (j > 0)
+            thisopt = substr(argv[Optind], 3, j - 3)
+        else
+            thisopt = substr(argv[Optind], 3)
+        Optopt = thisopt
+@c endfile
+@end example
+
+First, we search this option for a possible embedded equal sign, as the
+specification of long options allows an argument to an option
+@samp{--someopt:} to be specified as @samp{--someopt=answer} as well as
+@samp{@w{--someopt answer}}.
+
+@example
+@c file eg/lib/getopt.awk
+        i = match(longopts, "(^|,)" thisopt "($|[,:])")
+        if (i == 0) @{
+            if (Opterr)
+                 printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
+            Optind++
+            return "?"
+        @}
+@c endfile
+@end example
+
+Next, we try to find the current option in @code{longopts}.  The regular
+expression givent to @code{match()}, @code{@w{"(^|,)" thisopt "($|[,:])"}},
+matches this option at the beginninng of @code{longopts}, or at the
+beginning of a subsequent long option (the previous long option would
+have been terminated by a comma), and, in any case, either at the end of
+the @code{longopts} string (@samp{$}), or followed by a comma
+(separating this option from a subsequent option) or a colon (indicating
+this long option takes an argument (@samp{@w{[,:]}}).
+
+Using this regular expression, we check to see if the current option
+might possibly be in @code{longopts} (if @code{longopts} is not
+specified, this test will also fail).  In case of an error, we possibly
+print an error message and then return @code{"?"}. Continuing on:
+
+@example
+@c file eg/lib/getopt.awk
+        if (substr(longopts, i+1+length(thisopt), 1) == ":") @{
+            if (j > 0)
+                Optarg = substr(argv[Optind], j + 1)
+            else
+                Optarg = argv[++Optind]
+        @} else
+            Optarg = ""
+@c endfile
+@end example
+
+We now check to see if this option takes an argument and, if so, we set
+@code{Optarg} to the value of that argument (either a value after an
+equal sign specified on the command line, immediately adjoining the long
+option string, or as the next argument on the command line).
+
+@example
+@c file eg/lib/getopt.awk
         Optind++
-        _opti = 0
-    @} else
-        _opti++
-    return thisopt
+        return thisopt
+    @}
 @}
 @c endfile
 @end example
 
-Finally, if @code{_opti} is either zero or greater than the length of the
-current command-line argument, it means this element in @code{argv} is
-through being processed, so @code{Optind} is incremented to point to the
-next element in @code{argv}.  If neither condition is true, then only
-@code{_opti} is incremented, so that the next option letter can be processed
-on the next call to @code{getopt()}.
+We increase @code{Optind} (which we already increased once if a required
+argument was separated from its option by an equal sign), and return the
+long option (minus its leading dashes).
 
 The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
 @code{Opterr} is set to one, because the default behavior is for 
@code{getopt()}
@@ -22649,20 +22742,21 @@ BEGIN @{
 
     # test program
     if (_getopt_test) @{
-        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
-            printf("c = <%c>, Optarg = <%s>\n",
-                                       _go_c, Optarg)
+        _myshortopts = "ab:cd"
+        _mylongopts = "longa,longb:,otherc,otherd"
+
+        while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
+            printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
         printf("non-option arguments:\n")
         for (; Optind < ARGC; Optind++)
-            printf("\tARGV[%d] = <%s>\n",
-                                    Optind, ARGV[Optind])
+            printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
     @}
 @}
 @c endfile
 @end example
 
 The rest of the @code{BEGIN} rule is a simple test program.  Here are the
-results of two sample runs of the test program:
+results of some sample runs of the test program:
 
 @example
 $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
@@ -22680,9 +22774,21 @@ $ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- 
xyz abc}
 @print{} non-option arguments:
 @print{}         ARGV[4] = <xyz>
 @print{}         ARGV[5] = <abc>
+
+$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a \}
+> @kbd{--longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2}
+@print{} c = <a>, Optarg = <>
+@print{} c = <longa>, Optarg = <>
+@print{} c = <b>, Optarg = <xx>
+@print{} c = <longb>, Optarg = <foo=bar>
+@print{} c = <otherd>, Optarg = <>
+@print{} c = <otherc>, Optarg = <>
+@print{} non-option arguments:
+@print{}       ARGV[8] = <arg1>
+@print{}       ARGV[9] = <arg2>
 @end example
 
-In both runs, the first @option{--} terminates the arguments to
+In all the runs, the first @option{--} terminates the arguments to
 @command{awk}, so that it does not try to interpret the @option{-a},
 etc., as its own options.
 

-----------------------------------------------------------------------

Summary of changes:
 awklib/eg/lib/getopt.awk |  94 ++++--
 doc/ChangeLog            |   5 +
 doc/gawk.info            | 773 ++++++++++++++++++++++++++---------------------
 doc/gawk.texi            | 214 +++++++++----
 doc/gawktexi.in          | 214 +++++++++----
 5 files changed, 815 insertions(+), 485 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]