gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, gawk_performance, updated. 9f4b6c25cd7ef


From: John Haque
Subject: [gawk-diffs] [SCM] gawk branch, gawk_performance, updated. 9f4b6c25cd7ef4532bb9ec2e53026bf10a44708c
Date: Sat, 20 Aug 2011 14:16:33 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk_performance has been updated
       via  9f4b6c25cd7ef4532bb9ec2e53026bf10a44708c (commit)
      from  dae23ec25804903a6c2b7094a2ebd5541e92bb88 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=9f4b6c25cd7ef4532bb9ec2e53026bf10a44708c

commit 9f4b6c25cd7ef4532bb9ec2e53026bf10a44708c
Author: john haque <address@hidden>
Date:   Sat Aug 20 09:01:47 2011 -0500

    Add the new files.

diff --git a/cint_array.c b/cint_array.c
new file mode 100644
index 0000000..e7eb09f
--- /dev/null
+++ b/cint_array.c
@@ -0,0 +1,1225 @@
+/*
+ * cint_array.c - routines for arrays of (mostly) consecutive positive integer 
indices.
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
USA
+ */
+
+#include "awk.h"
+
+extern FILE *output_fp;
+extern void indent(int indent_level);
+extern NODE **is_integer(NODE *symbol, NODE *subs);
+
+/*
+ * NHAT         ---  maximum size of a leaf array (2^NHAT).
+ * THRESHOLD    ---  Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
+ */
+
+static int NHAT = 10; 
+static long THRESHOLD;
+
+/* What is the optimium NHAT ? timing results suggest that 10 is a good choice,
+ * although differences aren't that significant for > 10.
+ */
+
+
+static NODE **cint_array_init(NODE *symbol, NODE *subs);
+static NODE **is_uinteger(NODE *symbol, NODE *subs);
+static NODE **cint_lookup(NODE *symbol, NODE *subs);
+static NODE **cint_exists(NODE *symbol, NODE *subs);
+static NODE **cint_clear(NODE *symbol, NODE *subs);
+static NODE **cint_remove(NODE *symbol, NODE *subs);
+static NODE **cint_list(NODE *symbol, NODE *t);
+static NODE **cint_copy(NODE *symbol, NODE *newsymb);
+static NODE **cint_dump(NODE *symbol, NODE *ndump);
+#ifdef ARRAYDEBUG
+static NODE **cint_option(NODE *opt, NODE *val);
+static void cint_print(NODE *symbol);
+#endif
+
+array_ptr cint_array_func[] = {
+       cint_array_init,
+       is_uinteger,
+       cint_lookup,
+       cint_exists,
+       cint_clear,
+       cint_remove,
+       cint_list,
+       cint_copy,
+       cint_dump,
+#ifdef ARRAYDEBUG
+       cint_option,
+#endif
+};
+
+static inline int cint_hash(long k);
+static inline NODE **cint_find(NODE *symbol, long k, int h1);
+
+static inline NODE *make_node(NODETYPE type);
+
+static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
+static NODE **tree_exists(NODE *tree, long k);
+static void tree_clear(NODE *tree);
+static int tree_remove(NODE *symbol, NODE *tree, long k);
+static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
+static long tree_list(NODE *tree, NODE **list, unsigned int flags);
+static inline NODE **tree_find(NODE *tree, long k, int i);
+static void tree_info(NODE *tree, NODE *ndump, const char *aname);
+static size_t tree_kilobytes(NODE *tree);
+#ifdef ARRAYDEBUG
+static void tree_print(NODE *tree, size_t bi, int indent_level);
+#endif
+
+static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, 
long base);
+static inline NODE **leaf_exists(NODE *array, long k);
+static void leaf_clear(NODE *array);
+static int leaf_remove(NODE *symbol, NODE *array, long k);
+static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
+static long leaf_list(NODE *array, NODE **list, unsigned int flags);
+static void leaf_info(NODE *array, NODE *ndump, const char *aname);
+#ifdef ARRAYDEBUG
+static void leaf_print(NODE *array, size_t bi, int indent_level);
+#endif
+
+/* powers of 2 table upto 2^30 */ 
+static const long power_two_table[] = {
+       1, 2, 4, 8, 16, 32, 64,
+       128, 256, 512, 1024, 2048, 4096,
+       8192, 16384, 32768, 65536, 131072, 262144,
+       524288, 1048576, 2097152, 4194304, 8388608, 16777216,
+       33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
+};
+
+
+#define ISUINT(a, s)   ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != 
NULL) \
+                                    && (s)->numbr >= 0)
+
+/*
+ * To store 2^n integers, allocate top-level array of size n, elements
+ * of which are 1-Dimensional (leaf-array) of geometrically increasing
+ * size (power of 2).   
+ *
+ *  [0]   -->  [ 0 ]
+ *  [1]   -->  [ 1 ]
+ *  |2|   -->  [ 2 | 3 ]
+ *  |3|   -->  [ 4 | 5 | 6 | 7 ]
+ *  |.|
+ *  |k|   -->  [ 2^(k - 1)| ...  | 2^k - 1 ]
+ *  ...
+ *
+ * For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)). 
+ *
+ * The idea for the geometrically increasing array sizes is from:
+ *     Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
+ *     Bagwell, Phil (2002).
+ *     http://infoscience.epfl.ch/record/64410/files/techlists.pdf
+ *
+ * Disadvantage:
+ * Worst case memory waste > 99% and will happen when each of the
+ * leaf arrays contains only a single element. Even with consecutive
+ * integers, memory waste can be as high as 50%.
+ *
+ * Solution: Hashed Array Trees (HATs).
+ *
+ */
+
+/* cint_array_init ---  check relevant environment variables */
+
+static NODE **
+cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
+{
+       long newval;
+
+       if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
+               NHAT = newval;
+       THRESHOLD = power_two_table[NHAT + 1];
+       return (NODE **) ! NULL;
+}
+
+
+/* is_uinteger --- test if the subscript is an integer >= 0 */
+
+NODE **
+is_uinteger(NODE *symbol, NODE *subs)
+{
+       if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
+               return (NODE **) ! NULL;
+       return NULL;
+}
+
+
+/* cint_lookup --- Find the subscript in the array; Install it if it isn't 
there. */
+
+static NODE **
+cint_lookup(NODE *symbol, NODE *subs)
+{
+       NODE **lhs;
+       long k;
+       int h1 = -1, m, li;
+       NODE *tn, *xn;
+       long cint_size, capacity;
+
+       k = -1;
+       if (ISUINT(symbol, subs)) {
+               k = subs->numbr;        /* k >= 0 */
+               h1 = cint_hash(k);      /* h1 >= NHAT */
+               if ((lhs = cint_find(symbol, k, h1)) != NULL)
+                       return lhs;
+       }
+       xn = symbol->xarray;
+       if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
+               return lhs;
+
+       /* It's not there, install it */
+
+       if (k < 0)
+               goto xinstall;
+
+       m = h1 - 1;     /* m >= (NHAT- 1) */
+
+       /* Estimate capacity upper bound.
+        * capacity upper bound = current capacity + leaf array size.
+        */
+       li = m > NHAT ? m : NHAT;
+       while (li >= NHAT) {
+               /* leaf-array of a HAT */
+               li = (li + 1) / 2;
+       }
+       capacity = symbol->array_capacity + power_two_table[li];
+
+       cint_size = (xn == NULL) ? symbol->table_size
+                               : (symbol->table_size - xn->table_size);
+       assert(cint_size >= 0);
+       if ((capacity - cint_size) > THRESHOLD)
+               goto xinstall;
+
+       if (symbol->nodes == NULL) {
+               symbol->array_capacity = 0;
+               assert(symbol->table_size == 0);
+
+               /* nodes[0] .. nodes[NHAT- 1] not used */
+               emalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), 
"cint_lookup");
+               memset(symbol->nodes, '\0', INT32_BIT * sizeof(NODE *));
+       }
+
+       symbol->table_size++;   /* one more element in array */
+
+       tn = symbol->nodes[h1];
+       if (tn == NULL) {
+               tn = make_node(Node_array_tree);
+               symbol->nodes[h1] = tn;
+       }
+
+       if (m < NHAT)
+               return tree_lookup(symbol, tn, k, NHAT, 0);
+       return tree_lookup(symbol, tn, k, m, power_two_table[m]);
+
+xinstall:
+
+       symbol->table_size++;
+       if (xn == NULL) {
+               extern array_ptr int_array_func[];
+               extern array_ptr str_array_func[];
+
+               xn = symbol->xarray = make_array();
+               xn->vname = symbol->vname;      /* shallow copy */
+
+               /* Avoid using assoc_lookup(xn, subs) which may lead
+                * to infinite recursion.
+                */
+
+               if (is_integer(xn, subs))
+                       xn->array_funcs = int_array_func;
+               else
+                       xn->array_funcs = str_array_func;
+               xn->flags |= XARRAY;
+       }
+       return xn->alookup(xn, subs);
+}
+
+
+/* cint_exists --- test whether an index is in the array or not. */
+
+static NODE **
+cint_exists(NODE *symbol, NODE *subs)
+{
+       NODE *xn;
+
+       if (ISUINT(symbol, subs)) {
+               long k = subs->numbr;
+               NODE **lhs;
+               if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
+                       return lhs;
+       }
+       if ((xn = symbol->xarray) == NULL)
+               return NULL;
+       return xn->aexists(xn, subs);
+}
+
+
+/* cint_clear --- flush all the values in symbol[] */
+
+static NODE **
+cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
+{
+       size_t i;
+       NODE *tn;
+
+       assert(symbol->nodes != NULL);
+
+       if (symbol->xarray != NULL) {
+               NODE *xn = symbol->xarray;
+               assoc_clear(xn);
+               freenode(xn);
+               symbol->xarray = NULL;
+       }
+
+       for (i = NHAT; i < INT32_BIT; i++) {
+               tn = symbol->nodes[i];
+               if (tn != NULL) {
+                       tree_clear(tn);
+                       freenode(tn);
+               }
+       }
+
+       efree(symbol->nodes);
+       init_array(symbol);     /* re-initialize symbol */
+       return NULL;
+}
+
+
+/* cint_remove --- remove an index from the array */
+
+static NODE **
+cint_remove(NODE *symbol, NODE *subs)
+{
+       long k;
+       int h1;
+       NODE *tn, *xn = symbol->xarray;
+
+       assert(symbol->nodes != NULL);
+       if (! ISUINT(symbol, subs))
+               goto xremove;
+
+       k = subs->numbr;
+       h1 = cint_hash(k);
+       tn = symbol->nodes[h1];
+       if (tn == NULL || ! tree_remove(symbol, tn, k))
+               goto xremove;
+
+       if (tn->table_size == 0) {
+               freenode(tn);
+               symbol->nodes[h1] = NULL;
+       }
+
+       symbol->table_size--;
+
+       if (xn == NULL && symbol->table_size == 0) {
+               efree(symbol->nodes);
+               init_array(symbol);     /* re-initialize array 'symbol' */
+       } else if(xn != NULL && symbol->table_size == xn->table_size) {
+               /* promote xn to symbol */
+               xn->flags &= ~XARRAY;
+               xn->parent_array = symbol->parent_array;
+               efree(symbol->nodes);
+               *symbol = *xn;
+               freenode(xn);
+       }
+
+       return (NODE **) ! NULL;
+
+xremove:
+       xn = symbol->xarray;
+       if (xn == NULL || xn->aremove(xn, subs) == NULL)
+               return NULL;
+       if (xn->table_size == 0) {
+               freenode(xn);
+               symbol->xarray = NULL;
+       }
+       symbol->table_size--;
+       assert(symbol->table_size > 0);
+
+       return (NODE **) ! NULL;
+}
+
+
+/* cint_copy --- duplicate input array "symbol" */
+
+static NODE **
+cint_copy(NODE *symbol, NODE *newsymb)
+{
+       NODE **old, **new;
+       size_t i;
+
+       assert(symbol->nodes != NULL);
+
+       /* allocate new table */
+       emalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
+       memset(new, '\0', INT32_BIT * sizeof(NODE *));
+
+       old = symbol->nodes;
+       for (i = NHAT; i < INT32_BIT; i++) {
+               if (old[i] == NULL)
+                       continue;
+               new[i] = make_node(Node_array_tree); 
+               tree_copy(newsymb, old[i], new[i]);
+       }
+
+       if (symbol->xarray != NULL) {
+               NODE *xn, *n;
+               xn = symbol->xarray;
+               n = make_array();
+               n->vname = newsymb->vname;
+               (void) xn->acopy(xn, n);
+               newsymb->xarray = n;
+       } else
+               newsymb->xarray = NULL;
+
+       newsymb->nodes = new;
+       newsymb->table_size = symbol->table_size;
+       newsymb->array_capacity = symbol->array_capacity;
+       newsymb->flags = symbol->flags;
+
+       return NULL;
+}
+
+
+/* cint_list --- return a list of items */
+
+static NODE**
+cint_list(NODE *symbol, NODE *t)
+{
+       NODE **list = NULL;
+       NODE *tn, *xn;
+       unsigned long k = 0, num_elems, list_size;
+       size_t j, ja, jd;
+       int elem_size = 1;
+
+       num_elems = symbol->table_size;
+       if (num_elems == 0)
+               return NULL;
+
+       if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
+               num_elems = 1;
+
+       if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
+               elem_size = 2;
+       list_size = num_elems * elem_size;
+
+       if (symbol->xarray != NULL) {
+               xn = symbol->xarray;
+               list = xn->alist(xn, t);
+               assert(list != NULL);
+               t->flags &= ~(AASC|ADESC);
+               if (num_elems == 1 || num_elems == xn->table_size)
+                       return list;
+               erealloc(list, NODE **, list_size * sizeof(NODE *), 
"cint_list");
+               k = elem_size * xn->table_size;
+       } else
+               emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
+
+
+       if ((t->flags & AINUM) == 0)    /* not sorting by "index num" */
+               t->flags &= ~(AASC|ADESC);
+
+       /* populate it with index in ascending or descending order */
+
+       for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
+               j = (t->flags & ADESC) ? jd-- : ja++;
+               tn = symbol->nodes[j];
+               if (tn == NULL)
+                       continue;
+               k += tree_list(tn, list + k, t->flags);
+               if (k >= list_size)
+                       return list;
+       }
+       return list;
+}
+
+
+/* cint_dump --- dump array info */
+
+static NODE **
+cint_dump(NODE *symbol, NODE *ndump)
+{
+       NODE *tn, *xn = NULL;
+       int indent_level;
+       size_t i;
+       long cint_size = 0, int_size = 0;
+       AWKNUM kb = 0;
+       extern AWKNUM int_kilobytes(NODE *symbol);
+
+       indent_level = ndump->alevel;
+
+       if (symbol->xarray != NULL) {
+               xn = symbol->xarray;
+               int_size = xn->table_size;      /* FIXME -- can be int_array or 
str_array */
+       }
+       cint_size = symbol->table_size - int_size;
+       
+       if ((symbol->flags & XARRAY) == 0)
+               fprintf(output_fp, "%s `%s'\n",
+                       (symbol->parent_array == NULL) ? "array" : "sub-array",
+                       array_vname(symbol));
+       indent_level++;
+       indent(indent_level);
+       fprintf(output_fp, "array_func: cint_array_func\n");
+       if (symbol->flags != 0) {
+               indent(indent_level);
+               fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
+       }
+       indent(indent_level);
+       fprintf(output_fp, "NHAT: %d\n", NHAT);
+       indent(indent_level);
+       fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
+       indent(indent_level);
+       fprintf(output_fp, "table_size: %ld (total), %ld (cint), %ld (int + 
str)\n",
+                               symbol->table_size, cint_size, int_size);
+       indent(indent_level);
+       fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) 
symbol->array_capacity);
+       indent(indent_level);
+       fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / 
symbol->array_capacity);
+
+       for (i = NHAT; i < INT32_BIT; i++) {
+               tn = symbol->nodes[i];
+               if (tn == NULL)
+                       continue;
+               /* Node_array_tree  + HAT */
+               kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
+       }
+       kb += (INT32_BIT * sizeof(NODE *)) / 1024.0;    /* symbol->nodes */     
+       kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0;       /* 
value nodes in Node_array_leaf(s) */
+       if (xn != NULL)
+               kb += int_kilobytes(xn);        /* FIXME: can be str_array or 
int_array ? */
+       indent(indent_level);
+       fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
+
+       /* dump elements */
+
+       if (ndump->adepth >= 0) {
+               const char *aname;
+
+               fprintf(output_fp, "\n");
+               aname = make_aname(symbol);
+               for (i = NHAT; i < INT32_BIT; i++) {
+                       tn = symbol->nodes[i];
+                       if (tn != NULL)
+                               tree_info(tn, ndump, aname);
+               }
+       }
+
+       if (xn != NULL) {
+               fprintf(output_fp, "\n");
+               xn->adump(xn, ndump);
+       }
+
+#ifdef ARRAYDEBUG
+       if (ndump->adepth < -999)
+               cint_print(symbol);
+#endif
+
+       return NULL;
+}
+
+
+/* cint_hash --- locate the HAT for a given number 'k' */
+
+static inline int
+cint_hash(long k)
+{
+       uint32_t num, r, shift;
+
+       assert(k >= 0);
+       if (k == 0)
+               return NHAT;
+       num = k;
+
+       /* Find the Floor(log base 2 of 32-bit integer) */
+
+       /* Warren Jr., Henry S. (2002). Hacker's Delight.
+        * Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
+        *
+        *      r = 0;
+        *      if (num >= 1<<16) { num >>= 16; r += 16; }
+        *      if (num >= 1<< 8) { num >>=  8; r +=  8; }
+        *      if (num >= 1<< 4) { num >>=  4; r +=  4; }
+        *      if (num >= 1<< 2) { num >>=  2; r +=  2; }
+        *      if (num >= 1<< 1) {             r +=  1; }
+        */
+
+
+       /* Slightly different code copied from:
+        *
+        * http://www-graphics.stanford.edu/~seander/bithacks.html
+        * Bit Twiddling Hacks
+        * By Sean Eron Anderson
+        * address@hidden
+        * Individually, the code snippets here are in the public domain
+        * (unless otherwise noted) — feel free to use them however you 
please.
+        * The aggregate collection and descriptions are © 1997-2005
+        * Sean Eron Anderson. The code and descriptions are distributed in the
+        * hope that they will be useful, but WITHOUT ANY WARRANTY and without
+        * even the implied warranty of merchantability or fitness for a 
particular
+        * purpose.  
+        *
+        */
+
+       r = (num > 0xFFFF) << 4; num >>= r;
+       shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
+       shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
+       shift = (num > 0x03) << 1; num >>= shift; r |= shift;
+       r |= (num >> 1);
+
+       /* We use a single HAT for 0 <= num < 2^NHAT */
+       if (r < NHAT)
+               return NHAT;
+
+       return (1 + r);
+}
+
+
+/* cint_find --- locate the integer subscript */
+
+static inline NODE **
+cint_find(NODE *symbol, long k, int h1)
+{
+       NODE *tn;
+
+       if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
+               return NULL;
+       return tree_exists(tn, k);
+}
+
+
+#ifdef ARRAYDEBUG
+
+static NODE **
+cint_option(NODE *opt, NODE *val)
+{
+       NODE *tmp;
+       NODE **ret = (NODE **) ! NULL;
+
+       tmp = force_string(opt);
+       (void) force_number(val);
+       if (STREQ(tmp->stptr, "NHAT"))
+               NHAT = (int) val->numbr;
+       else
+               ret = NULL;
+       return ret;
+}
+
+
+/* cint_print --- print structural info */
+
+static void
+cint_print(NODE *symbol)
+{
+       NODE *tn;
+       size_t i;
+
+       fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
+                               (unsigned long) symbol->table_size);
+       for (i = NHAT; i < INT32_BIT; i++) {
+               tn = symbol->nodes[i];
+               if (tn == NULL)
+                       continue;
+               tree_print(tn, i, 1);
+       }
+}
+
+#endif
+
+
+/*------------------------ Hashed Array Trees -----------------------------*/
+
+/*
+ * HATs: Hashed Array Trees
+ * Fast variable-length arrays
+ * Edward Sitarski
+ * http://www.drdobbs.com/architecture-and-design/184409965
+ *
+ *  HAT has a top-level array containing a power of two
+ *  number of leaf arrays. All leaf arrays are the same size as the
+ *  top-level array. A full HAT can hold n^2 elements,
+ *  where n (some power of 2) is the size of each leaf array.
+ *  [i/n][i & (n - 1)] locates the `i th' element in a HAT.
+ *
+ */
+
+/*
+ *  A half HAT is defined here as a HAT with a top-level array of size n^2/2
+ *  and holds the first n^2/2 elements.
+ * 
+ *   1. 2^8 elements can be stored in a full HAT of size 2^4.
+ *   2. 2^9 elements can be stored in a half HAT of size 2^5.     
+ *   3. When the number of elements is some power of 2, it
+ *      can be stored in a full or a half HAT.
+ *   4. When the number of elements is some power of 2, it
+ *      can be stored in a HAT (full or half) with HATs as leaf elements
+ *      (full or half),  and so on (e.g. 2^8 elements in a HAT of size 2^4 
(top-level
+ *      array dimension) with each leaf array being a HAT of size 2^2). 
+ *
+ *  IMPLEMENTATION DETAILS:
+ *    1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
+ *       of Node_array_leaf.
+ *
+ *    2. A HAT of HATS of 2^12 elements needs
+ *       2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
+ *       ~ 2^9 house-keeping NODEs.
+ *
+ *    3. When a leaf array (or leaf HAT) becomes empty, the memory
+ *       is deallocated, and when there is no leaf array (or leaf HAT) left,
+ *       the HAT is deleted.
+ *
+ *    4. A HAT stores the base (first) element, and locates the leaf array/HAT
+ *       for the `i th' element using integer division
+ *       (i - base)/n where n is the size of the top-level array.
+ *
+ */
+
+/* make_node --- initialize a NODE */
+
+static inline NODE *
+make_node(NODETYPE type)
+{
+       NODE *n;
+       getnode(n);
+       memset(n, '\0', sizeof(NODE));
+       n->type = type;
+       return n;
+}
+
+
+/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't 
there */
+
+static NODE **
+tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
+{
+       NODE **lhs;
+       NODE *tn;
+       int i, n;
+       size_t size;
+       long num = k;
+
+       /*
+        * HAT size (size of Top & Leaf array) = 2^n
+        * where n = Floor ((m + 1)/2). For an odd value of m,
+        * only the first half of the HAT is needed.
+        */
+
+       n = (m + 1) / 2;
+       
+       if (tree->table_size == 0) {
+               size_t actual_size;
+               NODE **table;
+
+               assert(tree->nodes == NULL);
+
+               /* initialize top-level array */
+               size = actual_size = power_two_table[n];
+               tree->array_base = base;
+               tree->array_size = size;
+               tree->table_size = 0;   /* # of elements in the array */
+               if (n > m/2) {
+                       /* only first half of the array used */
+                       actual_size /= 2;
+                       tree->flags |= HALFHAT;
+               }
+               emalloc(table, NODE **, actual_size * sizeof(NODE *), 
"tree_lookup");
+               memset(table, '\0', actual_size * sizeof(NODE *));
+               tree->nodes = table;
+       } else
+               size = tree->array_size;
+
+       num -= tree->array_base;
+       i = num / size; /* top-level array index */
+       assert(i >= 0);
+
+       if ((lhs = tree_find(tree, k, i)) != NULL)
+               return lhs;
+
+       /* It's not there, install it */
+
+       tree->table_size++;
+       base += (size * i);
+       tn = tree->nodes[i];
+       if (n > NHAT) {
+               if (tn == NULL)
+                       tn = tree->nodes[i] = make_node(Node_array_tree);
+               return tree_lookup(symbol, tn, k, n, base);
+       } else {
+               if (tn == NULL)
+                       tn = tree->nodes[i] = make_node(Node_array_leaf);
+               return leaf_lookup(symbol, tn, k, size, base);
+       }
+}
+
+
+/* tree_exists --- test whether integer subscript `k' exists or not */
+
+static NODE **
+tree_exists(NODE *tree, long k)
+{
+       int i;
+       NODE *tn;
+
+       i = (k - tree->array_base) / tree->array_size;
+       assert(i >= 0);
+       tn = tree->nodes[i];
+       if (tn == NULL)
+               return NULL;
+       if (tn->type == Node_array_tree)
+               return tree_exists(tn, k);
+       return leaf_exists(tn, k);
+}
+
+/* tree_clear --- flush all the values */
+
+static void
+tree_clear(NODE *tree)
+{
+       NODE *tn;
+       size_t  j, hsize;
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+
+       for (j = 0; j < hsize; j++) {
+               tn = tree->nodes[j];
+               if (tn == NULL)
+                       continue;
+               if (tn->type == Node_array_tree)
+                       tree_clear(tn);
+               else
+                       leaf_clear(tn);
+               freenode(tn);
+       }
+
+       efree(tree->nodes);
+       memset(tree, '\0', sizeof(NODE));
+       tree->type = Node_array_tree;
+}
+
+
+/* tree_remove --- If the integer subscript is in the HAT, remove it */
+
+static int
+tree_remove(NODE *symbol, NODE *tree, long k)
+{
+       int i;
+       NODE *tn;
+
+       i = (k - tree->array_base) / tree->array_size;
+       assert(i >= 0);
+       tn = tree->nodes[i];
+       if (tn == NULL)
+               return FALSE;
+
+       if (tn->type == Node_array_tree
+                       && ! tree_remove(symbol, tn, k))
+               return FALSE;
+       else if (tn->type == Node_array_leaf
+                       && ! leaf_remove(symbol, tn, k))
+               return FALSE;
+
+       if (tn->table_size == 0) {
+               freenode(tn);
+               tree->nodes[i] = NULL;
+       }
+
+       /* one less item in array */
+       if (--tree->table_size == 0) {
+               efree(tree->nodes);
+               memset(tree, '\0', sizeof(NODE));
+               tree->type = Node_array_tree;
+       }
+       return TRUE;
+}
+
+
+/* tree_find --- locate an interger subscript in the HAT */
+
+static inline NODE **
+tree_find(NODE *tree, long k, int i)
+{
+       NODE *tn;
+
+       assert(tree->nodes != NULL);
+       tn = tree->nodes[i];
+       if (tn != NULL) {
+               if (tn->type == Node_array_tree)
+                       return tree_exists(tn, k);
+               return leaf_exists(tn, k);
+       }
+       return NULL;
+}
+
+
+/* tree_list --- return a list of items in the HAT */
+
+static long
+tree_list(NODE *tree, NODE **list, unsigned int flags)
+{
+       NODE *tn;
+       size_t j, cj, hsize;
+       long k = 0;
+
+       assert(list != NULL);
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+
+       for (j = 0; j < hsize; j++) {
+               cj = (flags & ADESC) ? (hsize - 1 - j) : j;
+               tn = tree->nodes[cj];
+               if (tn == NULL)
+                       continue;
+               if (tn->type == Node_array_tree)
+                       k += tree_list(tn, list + k, flags);
+               else
+                       k += leaf_list(tn, list + k, flags);
+               if ((flags & ADELETE) != 0 && k >= 1)
+                       return k;
+       }
+       return k;
+}
+
+
+/* tree_copy --- duplicate a HAT */
+
+static void
+tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
+{ 
+       NODE **old, **new;
+       size_t j, hsize;
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+
+       emalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
+       memset(new, '\0', hsize * sizeof(NODE *));
+       newtree->nodes = new;
+       newtree->array_base = tree->array_base;
+       newtree->array_size = tree->array_size;
+       newtree->table_size = tree->table_size;
+       newtree->flags = tree->flags;
+
+       old = tree->nodes;
+       for (j = 0; j < hsize; j++) {
+               if (old[j] == NULL)
+                       continue;
+               if (old[j]->type == Node_array_tree) {
+                       new[j] = make_node(Node_array_tree);
+                       tree_copy(newsymb, old[j], new[j]);
+               } else {
+                       new[j] = make_node(Node_array_leaf);
+                       leaf_copy(newsymb, old[j], new[j]);
+               }
+       }
+}
+
+
+/* tree_info --- print index, value info */
+
+static void
+tree_info(NODE *tree, NODE *ndump, const char *aname)
+{
+       NODE *tn;
+       size_t j, hsize;
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+
+       for (j = 0; j < hsize; j++) {
+               tn = tree->nodes[j];
+               if (tn == NULL)
+                       continue;
+               if (tn->type == Node_array_tree)
+                       tree_info(tn, ndump, aname);
+               else
+                       leaf_info(tn, ndump, aname);
+       }
+}
+
+
+/* tree_kilobytes --- calculate memory consumption of a HAT */
+
+static size_t
+tree_kilobytes(NODE *tree)
+{
+       NODE *tn;
+       size_t j, hsize;
+       size_t sz = 0;
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+       for (j = 0; j < hsize; j++) {
+               tn = tree->nodes[j];
+               if (tn == NULL)
+                       continue;
+               sz += sizeof(NODE);     /* Node_array_tree or Node_array_leaf */
+               if (tn->type == Node_array_tree)
+                       sz += tree_kilobytes(tn);
+       }
+       sz += hsize * sizeof(NODE *);   /* tree->nodes */
+       return sz;
+}
+
+#ifdef ARRAYDEBUG
+
+/* tree_print --- print the HAT structures */
+
+static void
+tree_print(NODE *tree, size_t bi, int indent_level)
+{
+       NODE *tn;
+       size_t j, hsize;
+
+       indent(indent_level);
+
+       hsize = tree->array_size;
+       if ((tree->flags & HALFHAT) != 0)
+               hsize /= 2;
+       fprintf(output_fp, "%4d:%s[%4lu:%-4lu]\n", bi,
+                       (tree->flags & HALFHAT) ? "HH" : "H",
+                       (unsigned long) hsize, (unsigned long) 
tree->table_size);
+
+       for (j = 0; j < hsize; j++) {
+               tn = tree->nodes[j];
+               if (tn == NULL)
+                       continue;
+               if (tn->type == Node_array_tree)
+                       tree_print(tn, j, indent_level + 1);
+               else
+                       leaf_print(tn, j, indent_level + 1);
+       }
+}
+#endif
+
+/*--------------------- leaf (linear 1-D) array --------------------*/
+
+/* leaf_lookup --- find an integer subscript in the array; Install it if
+       it isn't there.
+*/
+
+static inline NODE **
+leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
+{
+       NODE **lhs;
+
+       if (array->nodes == NULL) {
+               array->table_size = 0;  /* sanity */
+               array->array_size = size;
+               array->array_base = base;
+               emalloc(array->nodes, NODE **, size * sizeof(NODE *), 
"leaf_lookup");
+               memset(array->nodes, '\0', size * sizeof(NODE *));
+               symbol->array_capacity += size;
+       }
+
+       lhs = array->nodes + (k - base); /* leaf element */
+       if (*lhs == NULL) {
+               array->table_size++;    /* one more element in leaf array */
+               *lhs = dupnode(Nnull_string);
+       }
+       return lhs;
+}
+
+
+/* leaf_exists --- check if the array contains an integer subscript */ 
+
+static inline NODE **
+leaf_exists(NODE *array, long k)
+{
+       NODE **lhs;
+       lhs = array->nodes + (k - array->array_base); 
+       return (*lhs != NULL) ? lhs : NULL;
+}
+
+
+/* leaf_clear --- flush all values in the array */
+
+static void
+leaf_clear(NODE *array)
+{
+       long i, size = array->array_size;
+       NODE *r;
+
+       for (i = 0; i < size; i++) {
+               r = array->nodes[i];
+               if (r == NULL)
+                       continue;
+               if (r->type == Node_var_array) {
+                       assoc_clear(r);         /* recursively clear all 
sub-arrays */
+                       efree(r->vname);                        
+                       freenode(r);
+               } else
+                       unref(r);
+       }
+       efree(array->nodes);
+       array->nodes = NULL;
+       array->array_size = array->table_size = 0;
+}
+
+
+/* leaf_remove --- remove an integer subscript from the array */
+
+static int
+leaf_remove(NODE *symbol, NODE *array, long k)
+{
+       NODE **lhs;
+
+       lhs = array->nodes + (k - array->array_base); 
+       if (*lhs == NULL)
+               return FALSE;
+       *lhs = NULL;
+       if (--array->table_size == 0) {
+               efree(array->nodes);
+               array->nodes = NULL;
+               symbol->array_capacity -= array->array_size;
+               array->array_size = 0;  /* sanity */
+       }
+       return TRUE;
+}
+
+
+/* leaf_copy --- duplicate a leaf array */
+
+static void
+leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
+{
+       NODE **old, **new;
+       long size, i;
+
+       size = array->array_size;
+       emalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
+       memset(new, '\0', size * sizeof(NODE *));
+       newarray->nodes = new;
+       newarray->array_size = size;
+       newarray->array_base = array->array_base;
+       newarray->flags = array->flags;
+       newarray->table_size = array->table_size;
+
+       old = array->nodes;
+       for (i = 0; i < size; i++) {
+               if (old[i] == NULL)
+                       continue;
+               if (old[i]->type == Node_val)
+                       new[i] = dupnode(old[i]);
+               else {
+                       NODE *r;
+                       r = make_array();
+                       r->vname = estrdup(old[i]->vname, 
strlen(old[i]->vname));
+                       r->parent_array = newsymb;
+                       new[i] = assoc_copy(old[i], r);
+               }
+       }
+}
+
+
+/* leaf_list --- return a list of items */
+
+static long
+leaf_list(NODE *array, NODE **list, unsigned int flags)
+{
+       NODE *r, *subs;
+       long num, i, ci, k = 0;
+       long size = array->array_size;
+       static char buf[100];
+
+       for (i = 0; i < size; i++) {
+               ci = (flags & ADESC) ? (size - 1 - i) : i;
+               r = array->nodes[ci];
+               if (r == NULL)
+                       continue;
+
+               /* index */
+               num = array->array_base + ci;
+               if (flags & AISTR) {
+                       sprintf(buf, "%ld", num); 
+                       subs = make_string(buf, strlen(buf));
+                       subs->numbr = num;
+                       subs->flags |= (NUMCUR|NUMINT);
+               } else {
+                       subs = make_number((AWKNUM) num);
+                       subs->flags |= (INTIND|NUMINT);
+               }
+               list[k++] = subs;
+
+               /* value */
+               if (flags & AVALUE) {
+                       if (r->type == Node_val) {
+                               if ((flags & AVNUM) != 0)
+                                       (void) force_number(r);
+                               else if ((flags & AVSTR) != 0)
+                                       r = force_string(r);
+                       }
+                       list[k++] = r;
+               }
+               if ((flags & ADELETE) != 0 && k >= 1)
+                       return k;
+       }
+
+       return k;
+}
+
+
+/* leaf_info --- print index, value info */
+
+static void
+leaf_info(NODE *array, NODE *ndump, const char *aname)
+{
+       NODE *subs, *val;
+       size_t i, size;
+
+       size = array->array_size;
+
+       subs = make_number((AWKNUM) 0.0);
+       subs->flags |= (INTIND|NUMINT);
+       for (i = 0; i < size; i++) {
+               val = array->nodes[i];
+               if (val == NULL)
+                       continue;
+               subs->numbr = array->array_base + i;
+               assoc_info(subs, val, ndump, aname);
+       }
+       unref(subs);
+}
+
+#ifdef ARRAYDEBUG
+
+/* leaf_print --- print the leaf-array structure */
+
+
+static void
+leaf_print(NODE *array, size_t bi, int indent_level)
+{
+       indent(indent_level);
+       fprintf(output_fp, "%4d:L[%4lu:%-4lu]\n", bi,
+                       (unsigned long) array->array_size,
+                       (unsigned long) array->table_size);
+}
+#endif
diff --git a/int_array.c b/int_array.c
new file mode 100644
index 0000000..913e154
--- /dev/null
+++ b/int_array.c
@@ -0,0 +1,833 @@
+/*
+ * int_array.c - routines for arrays of integer indices.
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
USA
+ */
+
+#include "awk.h"
+
+extern FILE *output_fp;
+extern void indent(int indent_level);
+extern NODE **is_integer(NODE *symbol, NODE *subs);
+
+static size_t INT_CHAIN_MAX = 2;
+
+static NODE **int_array_init(NODE *symbol, NODE *subs);
+static NODE **int_lookup(NODE *symbol, NODE *subs);
+static NODE **int_exists(NODE *symbol, NODE *subs);
+static NODE **int_clear(NODE *symbol, NODE *subs);
+static NODE **int_remove(NODE *symbol, NODE *subs);
+static NODE **int_list(NODE *symbol, NODE *t);
+static NODE **int_copy(NODE *symbol, NODE *newsymb);
+static NODE **int_dump(NODE *symbol, NODE *ndump);
+
+#ifdef ARRAYDEBUG
+static NODE **int_option(NODE *opt, NODE *val);
+#endif
+
+static uint32_t int_hash(uint32_t k, uint32_t hsize);
+static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1);
+static NODE **int_insert(NODE *symbol, long k, uint32_t hash1);
+static void grow_int_table(NODE *symbol);
+
+array_ptr int_array_func[] = {
+       int_array_init,
+       is_integer,
+       int_lookup,
+       int_exists,
+       int_clear,
+       int_remove,
+       int_list,
+       int_copy,
+       int_dump,
+#ifdef ARRAYDEBUG
+       int_option,
+#endif
+};
+
+
+/* int_array_init --- check relevant environment variables */
+
+static NODE **
+int_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
+{
+       long newval;
+
+       if ((newval = getenv_long("INT_CHAIN_MAX")) > 0)
+               INT_CHAIN_MAX = newval;
+       return (NODE **) ! NULL;
+}
+
+
+/* is_integer --- check if subscript is an integer */
+
+NODE **
+is_integer(NODE *symbol, NODE *subs)
+{
+       long l;
+       AWKNUM d;
+
+       if (subs == Nnull_string)
+               return NULL;
+
+       if ((subs->flags & NUMINT) != 0)
+               return (NODE **) ! NULL;
+
+       if ((subs->flags & NUMBER) != 0) {
+               d = subs->numbr;
+               if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
+                       subs->flags |= NUMINT;
+                       return (NODE **) ! NULL;
+               }
+               return NULL;
+       }
+
+       /* a[3]=1; print "3" in a    -- TRUE
+        * a[3]=1; print "+3" in a   -- FALSE
+        * a[3]=1; print "03" in a   -- FALSE
+        * a[-3]=1; print "-3" in a  -- TRUE
+        */
+
+       if ((subs->flags & (STRING|STRCUR)) != 0) {
+               char *cp = subs->stptr, *cpend, *ptr;
+               char save;
+               size_t len = subs->stlen;
+
+               if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-'))
+                       return NULL;
+               if (len > 1 && 
+                       ((*cp == '0')           /* "00", "011" .. */
+                               || (*cp == '-' && *(cp + 1) == '0')     /* 
"-0", "-011" .. */
+                       )
+               )
+                       return NULL;
+               if (len == 1 && *cp != '-') {   /* single digit */
+                       subs->numbr = (long) (*cp - '0');
+                       if ((subs->flags & MAYBE_NUM) != 0) {
+                               subs->flags &= ~MAYBE_NUM;
+                               subs->flags |= NUMBER;
+                       }
+                       subs->flags |= (NUMCUR|NUMINT);
+                       return (NODE **) ! NULL;
+               }
+
+               cpend = cp + len;
+               save = *cpend;
+               *cpend = '\0';
+
+               errno = 0;
+               l = strtol(cp, & ptr, 10);
+               *cpend = save;
+               if (errno != 0 || ptr != cpend)
+                       return NULL;
+               subs->numbr = l;
+               if ((subs->flags & MAYBE_NUM) != 0) {
+                       subs->flags &= ~MAYBE_NUM;
+                       subs->flags |= NUMBER;
+               }
+               subs->flags |= NUMCUR;
+               if (l <= INT32_MAX && l >= INT32_MIN) {
+                       subs->flags |= NUMINT;
+                       return (NODE **) ! NULL;
+               }
+       }
+       return NULL;
+}
+
+
+/* int_lookup --- Find SYMBOL[SUBS] in the assoc array.  Install it with value 
""
+ * if it isn't there. Returns a pointer ala get_lhs to where its value is 
stored.
+ */
+
+static NODE **
+int_lookup(NODE *symbol, NODE *subs)
+{
+       uint32_t hash1;
+       long k;
+       unsigned long size;
+       NODE **lhs;
+       NODE *xn;
+
+       /* N.B: symbol->table_size is the total # of non-integers 
(symbol->xarray)
+        *      and integer elements. Also, symbol->xarray must have at least 
one
+        *      item in it, and can not exist if there are no integer elements.
+        *      In that case, symbol->xarray is promoted to 'symbol' (See 
int_remove).
+        */
+
+
+       if (! is_integer(symbol, subs)) {
+               xn = symbol->xarray; 
+               if (xn == NULL) {
+                       xn = symbol->xarray = make_array();
+                       xn->vname = symbol->vname;      /* shallow copy */
+                       xn->flags |= XARRAY;
+               } else if ((lhs = xn->aexists(xn, subs)) != NULL)
+                       return lhs;
+               symbol->table_size++;
+               return assoc_lookup(xn, subs);
+       }
+
+       k = subs->numbr;
+       if (symbol->buckets == NULL)
+               grow_int_table(symbol);
+
+       hash1 = int_hash(k, symbol->array_size);
+       if ((lhs = int_find(symbol, k, hash1)) != NULL)
+               return lhs;
+       
+       /* It's not there, install it */
+       
+       symbol->table_size++;
+
+       /* first see if we would need to grow the array, before installing */
+       size = symbol->table_size;
+       if ((xn = symbol->xarray) != NULL)
+               size -= xn->table_size;
+
+       if ((symbol->flags & ARRAYMAXED) == 0
+                   && (size / symbol->array_size) > INT_CHAIN_MAX) {
+               grow_int_table(symbol);
+               /* have to recompute hash value for new size */
+               hash1 = int_hash(k, symbol->array_size);
+       }
+       
+       return int_insert(symbol, k, hash1);
+}
+
+
+/* int_exists --- test whether the array element symbol[subs] exists or not,
+ *     return pointer to value if it does.
+ */
+
+static NODE **
+int_exists(NODE *symbol, NODE *subs)
+{
+       long k;
+       uint32_t hash1;
+
+       if (! is_integer(symbol, subs)) {
+               NODE *xn = symbol->xarray;
+               if (xn == NULL)
+                       return NULL;
+               return xn->aexists(xn, subs);
+       }
+       if (symbol->buckets == NULL)
+               return NULL;
+
+       k = subs->numbr;
+       hash1 = int_hash(k, symbol->array_size);
+       return int_find(symbol, k, hash1);
+}
+
+/* int_clear --- flush all the values in symbol[] */
+
+static NODE **
+int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
+{
+       unsigned long i;
+       int j;
+       BUCKET *b, *next;
+       NODE *r;
+
+       if (symbol->xarray != NULL) {
+               NODE *xn = symbol->xarray;
+               assoc_clear(xn);
+               freenode(xn);
+               symbol->xarray = NULL;
+       }
+
+       for (i = 0; i < symbol->array_size; i++) {
+               for (b = symbol->buckets[i]; b != NULL; b = next) {
+                       next = b->ainext;
+                       for (j = 0; j < b->aicount; j++) {
+                               r = b->aivalue[j];
+                               if (r->type == Node_var_array) {
+                                       assoc_clear(r); /* recursively clear 
all sub-arrays */
+                                       efree(r->vname);                        
+                                       freenode(r);
+                               } else
+                                       unref(r);
+                       }
+                       freebucket(b);
+               }
+               symbol->buckets[i] = NULL;
+       }
+       if (symbol->buckets != NULL)
+               efree(symbol->buckets);
+       init_array(symbol);     /* re-initialize symbol */
+       symbol->flags &= ~ARRAYMAXED;
+       return NULL;
+}
+
+
+/* int_remove --- If SUBS is already in the table, remove it. */
+
+static NODE **
+int_remove(NODE *symbol, NODE *subs)
+{
+       uint32_t hash1;
+       BUCKET *b, *prev = NULL;
+       long k;
+       int i;
+       NODE *xn = symbol->xarray;
+
+       assert(symbol->buckets != NULL);
+
+       if (! is_integer(symbol, subs)) {
+               if (xn == NULL || xn->aremove(xn, subs) == NULL)
+                       return NULL;
+               if (xn->table_size == 0) {
+                       freenode(xn);
+                       symbol->xarray = NULL;
+               }
+               symbol->table_size--;
+               assert(symbol->table_size > 0);
+               return (NODE **) ! NULL;
+       }
+
+       k = subs->numbr;
+       hash1 = int_hash(k, symbol->array_size);
+
+       for (b = symbol->buckets[hash1]; b != NULL; prev = b, b = b->ainext) {
+               for (i = 0; i < b->aicount; i++) {
+                       if (k != b->ainum[i])
+                               continue;
+
+                       /* item found */
+                       if (i == 0 && b->aicount == 2) {
+                               /* removing the 1st item; move 2nd item from 
position 1 to 0 */
+
+                               b->ainum[0] = b->ainum[1];
+                               b->aivalue[0] = b->aivalue[1];
+                       } /* else
+                               removing the only item or the 2nd item */
+
+                       goto removed;
+               }
+       }
+
+       if (b == NULL)  /* item not in array */
+               return NULL;
+
+removed:
+       b->aicount--;
+
+       if (b->aicount == 0) {
+               /* detach bucket */
+               if (prev != NULL)
+                       prev->ainext = b->ainext;
+               else
+                       symbol->buckets[hash1] = b->ainext;
+
+               /* delete bucket */
+               freebucket(b);
+       } else if (b != symbol->buckets[hash1]) {
+               BUCKET *head = symbol->buckets[hash1];
+
+               assert(b->aicount == 1);
+               /* move the last element from head
+                * to bucket to make it full.
+                */
+               i = --head->aicount;    /* head has one less element */
+               b->ainum[1] = head->ainum[i];
+               b->aivalue[1] = head->aivalue[i];
+               b->aicount++;   /* bucket has one more element */
+               if (i == 0) {
+                       /* head is now empty; delete head */
+                       symbol->buckets[hash1] = head->ainext;
+                       freebucket(head);
+               }
+       } /* else
+               do nothing */
+
+       symbol->table_size--;
+       if (xn == NULL && symbol->table_size == 0) {
+               efree(symbol->buckets);
+               init_array(symbol);     /* re-initialize array 'symbol' */
+               symbol->flags &= ~ARRAYMAXED;
+       } else if (xn != NULL && symbol->table_size == xn->table_size) {
+               /* promote xn (str_array) to symbol */
+               xn->flags &= ~XARRAY;
+               xn->parent_array = symbol->parent_array;
+               efree(symbol->buckets);
+               *symbol = *xn;
+               freenode(xn);
+       }
+
+       return (NODE **) ! NULL;        /* return success */
+}
+
+
+/* int_copy --- duplicate input array "symbol" */
+
+static NODE **
+int_copy(NODE *symbol, NODE *newsymb)
+{
+       BUCKET **old, **new, **pnew;
+       BUCKET *chain, *newchain;
+       int j;
+       unsigned long i, cursize;
+
+       assert(symbol->buckets != NULL);
+
+       /* find the current hash size */
+       cursize = symbol->array_size;
+       
+       /* allocate new table */
+       emalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "int_copy");
+       memset(new, '\0', cursize * sizeof(BUCKET *));
+
+       old = symbol->buckets;
+
+       for (i = 0; i < cursize; i++) {
+               for (chain = old[i], pnew = & new[i]; chain != NULL;
+                               chain = chain->ainext
+               ) {
+                       getbucket(newchain);
+                       newchain->aicount = chain->aicount;
+                       for (j = 0; j < chain->aicount; j++) {
+                               NODE *oldval;
+
+                               /*
+                                * copy the corresponding key and
+                                * value from the original input list
+                                */
+                               newchain->ainum[j] = chain->ainum[j];
+
+                               oldval = chain->aivalue[j];
+                               if (oldval->type == Node_val)
+                                       newchain->aivalue[j] = dupnode(oldval);
+                               else {
+                                       NODE *r;
+                                       r = make_array();
+                                       r->vname = estrdup(oldval->vname, 
strlen(oldval->vname));
+                                       r->parent_array = newsymb;
+                                       newchain->aivalue[j] = 
assoc_copy(oldval, r);
+                               }
+                       }
+
+                       *pnew = newchain;
+                       pnew = & newchain->ainext;
+               }
+       }       
+
+       if (symbol->xarray != NULL) {
+               NODE *xn, *n;
+               xn = symbol->xarray;
+               n = make_array();
+               n->vname = newsymb->vname;      /* shallow copy */
+               (void) xn->acopy(xn, n);
+               newsymb->xarray = n;
+       } else
+               newsymb->xarray = NULL;
+
+       newsymb->table_size = symbol->table_size;
+       newsymb->buckets = new;
+       newsymb->array_size = cursize;
+       newsymb->flags = symbol->flags;
+
+       return NULL;
+}
+
+
+/* int_list --- return a list of array items */
+
+static NODE**
+int_list(NODE *symbol, NODE *t)
+{
+       NODE **list = NULL;
+       unsigned long num_elems, list_size, i, k = 0;
+       BUCKET *b;
+       NODE *r, *subs, *xn;
+       int j, elem_size = 1;
+       long num;
+       static char buf[100];
+
+       assert(symbol->table_size > 0);
+
+       num_elems = symbol->table_size;
+       if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
+               num_elems = 1;
+
+       if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
+               elem_size = 2;
+       list_size = elem_size * num_elems;
+       
+       if (symbol->xarray != NULL) {
+               xn = symbol->xarray;
+               list = xn->alist(xn, t);
+               assert(list != NULL);
+               if (num_elems == 1 || num_elems == xn->table_size)
+                       return list;
+               erealloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
+               k = elem_size * xn->table_size;
+       } else
+               emalloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
+
+       /* populate it */
+
+       for (i = 0; i < symbol->array_size; i++) {
+               for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
+                       for (j = 0; j < b->aicount; j++) {
+                               /* index */
+                               num = b->ainum[j];
+                               if (t->flags & AISTR) {
+                                       sprintf(buf, "%ld", num); 
+                                       subs = make_string(buf, strlen(buf));
+                                       subs->numbr = num;
+                                       subs->flags |= (NUMCUR|NUMINT);
+                               } else {
+                                       subs = make_number((AWKNUM) num);
+                                       subs->flags |= (INTIND|NUMINT);
+                               }
+                               list[k++] = subs;
+
+                               /* value */
+                               if (t->flags & AVALUE) {
+                                       r = b->aivalue[j];
+                                       if (r->type == Node_val) {
+                                               if ((t->flags & AVNUM) != 0)
+                                                       (void) force_number(r);
+                                               else if ((t->flags & AVSTR) != 
0)
+                                                       r = force_string(r);
+                                       }
+                                       list[k++] = r;
+                               }
+
+                               if (k >= list_size)
+                                       return list;
+                       }
+               }
+       }
+       return list;
+}
+
+
+/* int_kilobytes --- calculate memory consumption of the assoc array */
+
+AWKNUM
+int_kilobytes(NODE *symbol)
+{
+       unsigned long i, bucket_cnt = 0;
+       BUCKET *b;
+       AWKNUM kb;
+       extern AWKNUM str_kilobytes(NODE *symbol);
+
+       for (i = 0; i < symbol->array_size; i++) {
+               for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
+                       bucket_cnt++;
+       }
+       kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) + 
+                       ((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 
1024.0;
+
+       if (symbol->xarray != NULL)
+               kb += str_kilobytes(symbol->xarray);
+
+       return kb;
+}
+
+
+/* int_dump --- dump array info */
+
+static NODE **
+int_dump(NODE *symbol, NODE *ndump)
+{
+#define HCNT   31
+
+       int indent_level;
+       BUCKET *b;
+       NODE *xn = NULL;
+       unsigned long str_size = 0, int_size = 0;
+       unsigned long i;
+       size_t j, bucket_cnt;
+       static size_t hash_dist[HCNT + 1];
+
+       indent_level = ndump->alevel;
+
+       if (symbol->xarray != NULL) {
+               xn = symbol->xarray;
+               str_size = xn->table_size;
+       }
+       int_size = symbol->table_size - str_size;
+
+       if ((symbol->flags & XARRAY) == 0)
+               fprintf(output_fp, "%s `%s'\n",
+                               (symbol->parent_array == NULL) ? "array" : 
"sub-array",
+                               array_vname(symbol));
+
+       indent_level++;
+       indent(indent_level);
+       fprintf(output_fp, "array_func: int_array_func\n");
+       if (symbol->flags != 0) {
+               indent(indent_level);
+               fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
+       }
+       indent(indent_level);
+       fprintf(output_fp, "INT_CHAIN_MAX: %d\n", INT_CHAIN_MAX);
+       indent(indent_level);
+       fprintf(output_fp, "array_size: %lu (int)\n", (unsigned long) 
symbol->array_size);
+       indent(indent_level);
+       fprintf(output_fp, "table_size: %lu (total), %lu (int), %lu (str)\n",
+                       (unsigned long) symbol->table_size, int_size, str_size);
+       indent(indent_level);
+       fprintf(output_fp, "Avg # of items per chain (int): %.2g\n",
+                       ((AWKNUM) int_size) / symbol->array_size);
+
+       indent(indent_level);
+       fprintf(output_fp, "memory: %.2g kB (total)\n", int_kilobytes(symbol));
+
+       /* hash value distribution */
+
+       memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
+       for (i = 0; i < symbol->array_size; i++) {
+               bucket_cnt = 0;
+               for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
+                       bucket_cnt += b->aicount;
+               if (bucket_cnt >= HCNT)
+                       bucket_cnt = HCNT;
+               hash_dist[bucket_cnt]++;
+       }
+
+       indent(indent_level);
+       fprintf(output_fp, "Hash distribution:\n");
+       indent_level++;
+       for (j = 0; j <= HCNT; j++) {
+               if (hash_dist[j] > 0) {
+                       indent(indent_level);
+                       if (j == HCNT)
+                               fprintf(output_fp, "[>=%lu]:%lu\n",
+                                       (unsigned long) HCNT, (unsigned long) 
hash_dist[j]);
+                       else
+                               fprintf(output_fp, "[%lu]:%lu\n",
+                                       (unsigned long) j, (unsigned long) 
hash_dist[j]);
+               }
+       }
+       indent_level--;
+
+       /* dump elements */
+
+       if (ndump->adepth >= 0) {
+               NODE *subs;
+               const char *aname;
+
+               fprintf(output_fp, "\n");
+
+               aname = make_aname(symbol);
+               subs = make_number((AWKNUM) 0);
+               subs->flags |= (INTIND|NUMINT);
+
+               for (i = 0; i < symbol->array_size; i++) {
+                       for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
+                               for (j = 0; j < b->aicount; j++) {
+                                       subs->numbr = b->ainum[j];
+                                       assoc_info(subs, b->aivalue[j], ndump, 
aname);
+                               }
+                       }
+               }
+               unref(subs);
+       }
+
+       if (xn != NULL) {
+               fprintf(output_fp, "\n");
+               xn->adump(xn, ndump);
+       }
+
+       return NULL;
+
+#undef HCNT
+}
+
+
+/* int_hash --- calculate the hash function of the integer subs */
+
+static uint32_t
+int_hash(uint32_t k, uint32_t hsize)
+{
+
+/*
+ * Bob Jenkins
+ * http://burtleburtle.net/bob/hash/integer.html
+ */
+
+#if 0
+       /* 6-shifts vs 7-shifts below */
+       k = (k+0x7ed55d16) + (k<<12);
+       k = (k^0xc761c23c) ^ (k>>19);
+       k = (k+0x165667b1) + (k<<5);
+       k = (k+0xd3a2646c) ^ (k<<9);
+       k = (k+0xfd7046c5) + (k<<3);
+       k = (k^0xb55a4f09) ^ (k>>16);
+#endif
+
+       k -= (k << 6);
+       k ^= (k >> 17);
+       k -= (k << 9);
+       k ^= (k << 4);
+       k -= (k << 3);
+       k ^= (k << 10);
+       k ^= (k >> 15);
+
+       if (k >= hsize)
+               k %= hsize;
+       return k;
+}
+
+/* assoc_find --- locate symbol[subs] */
+
+static inline NODE **
+int_find(NODE *symbol, long k, uint32_t hash1)
+{
+       BUCKET *b;
+       int i;
+
+       assert(symbol->buckets != NULL);
+       for (b = symbol->buckets[hash1]; b != NULL; b = b->ainext) {
+               for (i = 0; i < b->aicount; i++) {
+                       if (b->ainum[i] == k)
+                               return (b->aivalue + i);
+               }
+       }
+       return NULL;
+}
+
+
+/* int_insert --- install subs in the assoc array */
+
+static NODE **
+int_insert(NODE *symbol, long k, uint32_t hash1)
+{
+       BUCKET *b;
+       int i;
+
+       b = symbol->buckets[hash1];
+
+       /* Only the first bucket in the chain can be partially full,
+        * but is never empty.
+        */ 
+
+       if (b == NULL || (i = b->aicount) == 2) {
+               getbucket(b);
+               b->aicount = 0;
+               b->ainext = symbol->buckets[hash1];
+               symbol->buckets[hash1] = b;
+               i = 0;
+       }
+
+       b->ainum[i] = k;
+       b->aivalue[i] = dupnode(Nnull_string);
+       b->aicount++;
+       return & b->aivalue[i];
+}
+
+
+/* grow_int_table --- grow the hash table */
+
+static void
+grow_int_table(NODE *symbol)
+{
+       BUCKET **old, **new;
+       BUCKET *chain, *next;
+       int i, j;
+       unsigned long oldsize, newsize, k;
+
+       /*
+        * This is an array of primes. We grow the table by an order of
+        * magnitude each time (not just doubling) so that growing is a
+        * rare operation. We expect, on average, that it won't happen
+        * more than twice.  The final size is also chosen to be small
+        * enough so that MS-DOG mallocs can handle it. When things are
+        * very large (> 8K), we just double more or less, instead of
+        * just jumping from 8K to 64K.
+        */
+
+       static const unsigned long sizes[] = {
+               13, 127, 1021, 8191, 16381, 32749, 65497,
+               131101, 262147, 524309, 1048583, 2097169,
+               4194319, 8388617, 16777259, 33554467, 
+               67108879, 134217757, 268435459, 536870923,
+               1073741827
+       };
+
+       /* find next biggest hash size */
+       newsize = oldsize = symbol->array_size;
+
+       for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
+               if (oldsize < sizes[i]) {
+                       newsize = sizes[i];
+                       break;
+               }
+       }
+       if (newsize == oldsize) {       /* table already at max (!) */
+               symbol->flags |= ARRAYMAXED;
+               return;
+       }
+
+       /* allocate new table */
+       emalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_int_table");
+       memset(new, '\0', newsize * sizeof(BUCKET *));
+
+       old = symbol->buckets;
+       symbol->buckets = new;
+       symbol->array_size = newsize;
+
+       /* brand new hash table */
+       if (old == NULL)
+               return;         /* DO NOT initialize symbol->table_size */
+
+       /* old hash table there, move stuff to new, free old */
+       /* note that symbol->table_size does not change if an old array. */
+
+       for (k = 0; k < oldsize; k++) {
+               long num;
+               for (chain = old[k]; chain != NULL; chain = next) {
+                       for (i = 0; i < chain->aicount; i++) {
+                               num = chain->ainum[i];
+                               *int_insert(symbol, num, int_hash(num, 
newsize)) = chain->aivalue[i];
+                       }
+                       next = chain->ainext;
+                       freebucket(chain);
+               }
+       }
+       efree(old);
+}
+
+
+#ifdef ARRAYDEBUG
+
+static NODE **
+int_option(NODE *opt, NODE *val)
+{
+       int newval;
+       NODE *tmp;
+       NODE **ret = (NODE **) ! NULL;
+
+       tmp = force_string(opt);
+       (void) force_number(val);
+       if (STREQ(tmp->stptr, "INT_CHAIN_MAX")) {
+               newval = (int) val->numbr;
+               if (newval > 0)         
+                       INT_CHAIN_MAX = newval;
+       } else
+               ret = NULL;
+       return ret;
+}
+#endif
diff --git a/str_array.c b/str_array.c
new file mode 100644
index 0000000..330280a
--- /dev/null
+++ b/str_array.c
@@ -0,0 +1,759 @@
+/*
+ * str_array.c - routines for associative arrays of string indices.
+ */
+
+/* 
+ * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc.
+ * 
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ * 
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, 
USA
+ */
+
+#include "awk.h"
+
+/*
+ * Tree walks (``for (iggy in foo)'') and array deletions use expensive
+ * linear searching.  So what we do is start out with small arrays and
+ * grow them as needed, so that our arrays are hopefully small enough,
+ * most of the time, that they're pretty full and we're not looking at
+ * wasted space.
+ *
+ * The decision is made to grow the array if the average chain length is
+ * ``too big''. This is defined as the total number of entries in the table
+ * divided by the size of the array being greater than some constant.
+ *
+ * 11/2002: We make the constant a variable, so that it can be tweaked
+ * via environment variable.
+ * 11/2002: Modern machines are bigger, cut this down from 10.
+ */
+
+static size_t STR_CHAIN_MAX = 2;
+
+extern FILE *output_fp;
+extern void indent(int indent_level);
+
+static NODE **str_array_init(NODE *symbol, NODE *subs);
+static NODE **str_lookup(NODE *symbol, NODE *subs);
+static NODE **str_exists(NODE *symbol, NODE *subs);
+static NODE **str_clear(NODE *symbol, NODE *subs);
+static NODE **str_remove(NODE *symbol, NODE *subs);
+static NODE **str_list(NODE *symbol, NODE *subs);
+static NODE **str_copy(NODE *symbol, NODE *newsymb);
+static NODE **str_dump(NODE *symbol, NODE *ndump);
+
+#ifdef ARRAYDEBUG
+static NODE **str_option(NODE *opt, NODE *val);
+#endif
+
+
+array_ptr str_array_func[] = {
+       str_array_init,
+       (array_ptr) 0,
+       str_lookup,
+       str_exists,
+       str_clear,
+       str_remove,
+       str_list,
+       str_copy,
+       str_dump,
+#ifdef ARRAYDEBUG
+       str_option
+#endif
+};
+
+static inline NODE **str_find(NODE *symbol, NODE *s1, size_t code1, unsigned 
long hash1);
+static void grow_table(NODE *symbol);
+
+static unsigned long gst_hash_string(const char *str, size_t len, unsigned 
long hsize, size_t *code);
+static unsigned long scramble(unsigned long x);
+static unsigned long awk_hash(const char *s, size_t len, unsigned long hsize, 
size_t *code);
+
+unsigned long (*hash)(const char *s, size_t len, unsigned long hsize, size_t 
*code) = awk_hash;
+
+
+/* str_array_init --- check relevant environment variables */
+
+static NODE **
+str_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
+{
+       long newval;
+       const char *val;
+
+       if ((newval = getenv_long("STR_CHAIN_MAX")) > 0)
+               STR_CHAIN_MAX = newval;
+       if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
+               hash = gst_hash_string;
+       return (NODE **) ! NULL;
+}
+
+
+/*
+ * assoc_lookup:
+ * Find SYMBOL[SUBS] in the assoc array.  Install it with value "" if it
+ * isn't there. Returns a pointer ala get_lhs to where its value is stored.
+ *
+ * SYMBOL is the address of the node (or other pointer) being dereferenced.
+ * SUBS is a number or string used as the subscript. 
+ */
+
+static NODE **
+str_lookup(NODE *symbol, NODE *subs)
+{
+       unsigned long hash1;
+       NODE **lhs;
+       BUCKET *b;
+       size_t code1;
+
+       subs = force_string(subs);
+
+       if (symbol->buckets == NULL)
+               grow_table(symbol);
+       hash1 = hash(subs->stptr, subs->stlen,
+                       (unsigned long) symbol->array_size, & code1);
+       if ((lhs = str_find(symbol, subs, code1, hash1)) != NULL)
+               return lhs;
+
+       /* It's not there, install it. */
+       /* first see if we would need to grow the array, before installing */
+
+       symbol->table_size++;
+       if ((symbol->flags & ARRAYMAXED) == 0
+                       && (symbol->table_size / symbol->array_size) > 
STR_CHAIN_MAX) {
+               grow_table(symbol);
+               /* have to recompute hash value for new size */
+               hash1 = code1 % (unsigned long) symbol->array_size;
+       }
+
+       if (subs->stfmt != -1) {
+               NODE *tmp;
+
+               /*
+                * Need to freeze this string value --- it must never
+                * change, no matter what happens to the value
+                * that created it or to CONVFMT, etc.; So, get
+                * a private copy.
+                */
+
+               tmp = make_string(subs->stptr, subs->stlen);
+
+               /*
+               * Set the numeric value for the index if it's  available. Useful
+               * for numeric sorting by index.  Do this only if the numeric
+               * value is available, instead of all the time, since doing it
+               * all the time is a big performance hit for something that may
+               * never be used.
+               */
+
+               if (subs->flags & NUMCUR) {
+                       tmp->numbr = subs->numbr;
+                       tmp->flags |= NUMCUR;
+               }
+               subs = tmp;
+       } else {
+               /* string value already "frozen" */     
+
+               subs = dupnode(subs);
+       }
+
+       getbucket(b);
+       b->ahnext = symbol->buckets[hash1];
+       symbol->buckets[hash1] = b;
+       b->ahname = subs;
+       b->ahname_str = subs->stptr;
+       b->ahname_len = subs->stlen;
+       b->ahvalue = dupnode(Nnull_string);
+       b->ahcode = code1;
+       return & (b->ahvalue);
+}
+
+/* str_exists --- test whether the array element symbol[subs] exists or not,
+ *             return pointer to value if it does.
+ */
+
+static NODE **
+str_exists(NODE *symbol, NODE *subs)
+{
+       NODE **lhs;
+       unsigned long hash1;
+       size_t code1;
+
+       assert(symbol->table_size > 0);
+
+       subs = force_string(subs);
+       hash1 = hash(subs->stptr, subs->stlen, (unsigned long) 
symbol->array_size, & code1);
+       lhs = str_find(symbol, subs, code1, hash1);
+       return lhs;
+}
+
+/* str_clear --- flush all the values in symbol[] */
+
+static NODE **
+str_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
+{
+       unsigned long i;
+       BUCKET *b, *next;
+       NODE *r;
+
+       for (i = 0; i < symbol->array_size; i++) {
+               for (b = symbol->buckets[i]; b != NULL; b = next) {
+                       next = b->ahnext;
+                       r = b->ahvalue;
+                       if (r->type == Node_var_array) {
+                               assoc_clear(r); /* recursively clear all 
sub-arrays */
+                               efree(r->vname);                        
+                               freenode(r);
+                       } else
+                               unref(r);
+                       unref(b->ahname);
+                       freebucket(b);
+               }
+               symbol->buckets[i] = NULL;
+       }
+
+       if (symbol->buckets != NULL)
+               efree(symbol->buckets);
+       init_array(symbol);     /* re-initialize symbol */
+       symbol->flags &= ~ARRAYMAXED;
+       return NULL;
+}
+
+
+/* str_remove --- If SUBS is already in the table, remove it. */
+
+static NODE **
+str_remove(NODE *symbol, NODE *subs)
+{
+       unsigned long hash1;
+       BUCKET *b, *prev;
+       NODE *s2;
+       size_t s1_len;
+
+       assert(symbol->table_size > 0);
+
+       s2 = force_string(subs);
+       hash1 = hash(s2->stptr, s2->stlen, (unsigned long) symbol->array_size, 
NULL);
+
+       for (b = symbol->buckets[hash1], prev = NULL; b != NULL;
+                               prev = b, b = b->ahnext) {
+
+               /* Array indexes are strings; compare as such, always! */
+               s1_len = b->ahname_len;
+
+               if (s1_len != s2->stlen)
+                       continue;
+               if (s1_len == 0         /* "" is a valid index */
+                           || memcmp(b->ahname_str, s2->stptr, s1_len) == 0) {
+                       /* item found */
+
+                       unref(b->ahname);
+                       if (prev != NULL)
+                               prev->ahnext = b->ahnext;
+                       else
+                               symbol->buckets[hash1] = b->ahnext;
+
+                       /* delete bucket */
+                       freebucket(b);
+
+                       /* one less element in array */
+                       if (--symbol->table_size == 0) {
+                               if (symbol->buckets != NULL)
+                                       efree(symbol->buckets);
+                               init_array(symbol);     /* re-initialize symbol 
*/
+                               symbol->flags &= ~ARRAYMAXED;
+                       }
+
+                       return (NODE **) ! NULL;        /* return success */
+               }
+       }
+
+       return NULL;
+}
+
+
+/* str_copy --- duplicate input array "symbol" */
+
+static NODE **
+str_copy(NODE *symbol, NODE *newsymb)
+{
+       BUCKET **old, **new, **pnew;
+       BUCKET *chain, *newchain;
+       unsigned long cursize, i;
+       
+       assert(symbol->table_size > 0);
+
+       /* find the current hash size */
+       cursize = symbol->array_size;
+
+       /* allocate new table */
+       emalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "str_copy");
+       memset(new, '\0', cursize * sizeof(BUCKET *));
+
+       old = symbol->buckets;
+
+       for (i = 0; i < cursize; i++) {
+               for (chain = old[i], pnew = & new[i]; chain != NULL;
+                               chain = chain->ahnext
+               ) {
+                       NODE *oldval, *newsubs;
+
+                       getbucket(newchain);
+
+                       /*
+                        * copy the corresponding name and
+                        * value from the original input list
+                        */
+
+                       newsubs = newchain->ahname = dupnode(chain->ahname);
+                       newchain->ahname_str = newsubs->stptr;
+                       newchain->ahname_len = newsubs->stlen;
+
+                       oldval = chain->ahvalue;
+                       if (oldval->type == Node_val)
+                               newchain->ahvalue = dupnode(oldval);
+                       else {
+                               NODE *r;
+
+                               r = make_array();
+                               r->vname = estrdup(oldval->vname, 
strlen(oldval->vname));
+                               r->parent_array = newsymb;
+                               newchain->ahvalue = assoc_copy(oldval, r);
+                       }
+                       newchain->ahcode = chain->ahcode;
+
+                       *pnew = newchain;
+                       pnew = & newchain->ahnext;
+               }
+       }       
+
+       newsymb->table_size = symbol->table_size;
+       newsymb->buckets = new;
+       newsymb->array_size = cursize;
+       newsymb->flags = symbol->flags;
+       return NULL;
+}
+
+
+/* str_list --- return a list of array items */
+
+static NODE**
+str_list(NODE *symbol, NODE *t)
+{
+       NODE **list;
+       NODE *subs, *val;
+       BUCKET *b;
+       unsigned long num_elems, list_size, i, k = 0;
+       int elem_size = 1;
+
+       assert(symbol->table_size > 0);
+
+       if ((t->flags & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
+               elem_size = 2;
+
+       /* allocate space for array */
+       num_elems = symbol->table_size;
+       if ((t->flags & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
+               num_elems = 1;
+       list_size =  elem_size * num_elems;
+       
+       emalloc(list, NODE **, list_size * sizeof(NODE *), "str_list");
+ 
+       /* populate it */
+
+       for (i = 0; i < symbol->array_size; i++) {
+               for (b = symbol->buckets[i]; b != NULL; b = b->ahnext) {
+                       /* index */
+                       subs = b->ahname;
+                       if (t->flags & AINUM)
+                               (void) force_number(subs);
+                       list[k++] = dupnode(subs);
+
+                       /* value */
+                       if (t->flags & AVALUE) {
+                               val = b->ahvalue;
+                               if (val->type == Node_val) {
+                                       if ((t->flags & AVNUM) != 0)
+                                               (void) force_number(val);
+                                       else if ((t->flags & AVSTR) != 0)
+                                               val = force_string(val);
+                               }
+                               list[k++] = val;
+                       }
+                       if (k >= list_size)
+                               return list;
+               }                       
+       }
+       return list;
+}
+
+
+/* str_kilobytes --- calculate memory consumption of the assoc array */
+
+AWKNUM
+str_kilobytes(NODE *symbol)
+{
+       unsigned long bucket_cnt;
+       AWKNUM kb;
+
+       bucket_cnt = symbol->table_size;
+
+       /* This does not include extra memory for indices with stfmt != -1 */
+       kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) + 
+               ((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
+       return kb;
+}
+
+
+/* str_dump --- dump array info */
+
+static NODE **
+str_dump(NODE *symbol, NODE *ndump)
+{
+#define HCNT   31
+
+       int indent_level;
+       unsigned long i, bucket_cnt;
+       BUCKET *b;
+       static size_t hash_dist[HCNT + 1];
+
+       indent_level = ndump->alevel;
+
+       if ((symbol->flags & XARRAY) == 0)
+               fprintf(output_fp, "%s `%s'\n",
+                               (symbol->parent_array == NULL) ? "array" : 
"sub-array",
+                               array_vname(symbol));
+       indent_level++;
+       indent(indent_level);
+       fprintf(output_fp, "array_func: str_array_func\n");
+       if (symbol->flags != 0) {
+               indent(indent_level);
+               fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
+       }
+       indent(indent_level);
+       fprintf(output_fp, "STR_CHAIN_MAX: %d\n", STR_CHAIN_MAX);
+       indent(indent_level);
+       fprintf(output_fp, "array_size: %lu\n", (unsigned long) 
symbol->array_size);
+       indent(indent_level);
+       fprintf(output_fp, "table_size: %lu\n", (unsigned long) 
symbol->table_size);
+       indent(indent_level);
+       fprintf(output_fp, "Avg # of items per chain: %.2g\n",
+                               ((AWKNUM) symbol->table_size) / 
symbol->array_size);
+
+       indent(indent_level);
+       fprintf(output_fp, "memory: %.2g kB\n", str_kilobytes(symbol));
+
+       /* hash value distribution */
+
+       memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
+       for (i = 0; i < symbol->array_size; i++) {
+               bucket_cnt = 0;
+               for (b = symbol->buckets[i]; b != NULL; b = b->ahnext)
+                       bucket_cnt++;
+               if (bucket_cnt >= HCNT)
+                       bucket_cnt = HCNT;
+               hash_dist[bucket_cnt]++;
+       }
+
+       indent(indent_level);
+       fprintf(output_fp, "Hash distribution:\n");
+       indent_level++;
+       for (i = 0; i <= HCNT; i++) {
+               if (hash_dist[i] > 0) {
+                       indent(indent_level);
+                       if (i == HCNT)
+                               fprintf(output_fp, "[>=%lu]:%lu\n",
+                                       (unsigned long) HCNT, (unsigned long) 
hash_dist[i]);
+                       else
+                               fprintf(output_fp, "[%lu]:%lu\n",
+                                       (unsigned long) i, (unsigned long) 
hash_dist[i]);
+               }
+       }
+       indent_level--;
+
+       /* dump elements */
+
+       if (ndump->adepth >= 0) {
+               const char *aname;
+
+               fprintf(output_fp, "\n");
+               aname = make_aname(symbol);
+               for (i = 0; i < symbol->array_size; i++) {
+                       for (b = symbol->buckets[i]; b != NULL; b = b->ahnext)
+                               assoc_info(b->ahname, b->ahvalue, ndump, aname);
+               }
+       }
+
+       return NULL;
+
+#undef HCNT
+}      
+
+
+/* awk_hash --- calculate the hash function of the string in subs */
+
+static unsigned long
+awk_hash(const char *s, size_t len, unsigned long hsize, size_t *code)
+{
+       unsigned long h = 0;
+       unsigned long htmp;
+
+       /*
+        * Ozan Yigit's original sdbm hash, copied from Margo Seltzers
+        * db package.
+        *
+        * This is INCREDIBLY ugly, but fast.  We break the string up into
+        * 8 byte units.  On the first time through the loop we get the
+        * "leftover bytes" (strlen % 8).  On every other iteration, we
+        * perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
+        * saves us 7 cmp & branch instructions.  If this routine is
+        * heavily used enough, it's worth the ugly coding.
+        */
+
+       /*
+        * Even more speed:
+        * #define HASHC   h = *s++ + 65599 * h
+        * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
+        *
+        * 4/2011: Force the results to 32 bits, to get the same
+        * result on both 32- and 64-bit systems. This may be a
+        * bad idea.
+        */
+#define HASHC   htmp = (h << 6);  \
+               h = *s++ + htmp + (htmp << 10) - h ; \
+               htmp &= 0xFFFFFFFF; \
+               h &= 0xFFFFFFFF
+
+       h = 0;
+
+       /* "Duff's Device" */
+       if (len > 0) {
+               size_t loop = (len + 8 - 1) >> 3;
+
+               switch (len & (8 - 1)) {
+               case 0:
+                       do {    /* All fall throughs */
+                               HASHC;
+               case 7:         HASHC;
+               case 6:         HASHC;
+               case 5:         HASHC;
+               case 4:         HASHC;
+               case 3:         HASHC;
+               case 2:         HASHC;
+               case 1:         HASHC;
+                       } while (--loop);
+               }
+       }
+
+       if (code != NULL)
+               *code = h;
+
+       if (h >= hsize)
+               h %= hsize;
+       return h;
+}
+
+
+/* str_find --- locate symbol[subs] */
+
+static inline NODE **
+str_find(NODE *symbol, NODE *s1, size_t code1, unsigned long hash1)
+{
+       BUCKET *b;
+       size_t s2_len;
+
+       for (b = symbol->buckets[hash1]; b != NULL; b = b->ahnext) {
+               /*
+                * This used to use cmp_nodes() here.  That's wrong.
+                * Array indexes are strings; compare as such, always!
+                */
+               s2_len = b->ahname_len;
+
+               if (code1 == b->ahcode
+                       && s1->stlen == s2_len
+                       && (s2_len == 0         /* "" is a valid index */
+                               || memcmp(s1->stptr, b->ahname_str, s2_len) == 
0)
+               )
+                       return & (b->ahvalue);
+       }
+       return NULL;
+}
+
+
+/* grow_table --- grow a hash table */
+
+static void
+grow_table(NODE *symbol)
+{
+       BUCKET **old, **new;
+       BUCKET *chain, *next;
+       int i, j;
+       unsigned long oldsize, newsize, k;
+       unsigned long hash1;
+
+       /*
+        * This is an array of primes. We grow the table by an order of
+        * magnitude each time (not just doubling) so that growing is a
+        * rare operation. We expect, on average, that it won't happen
+        * more than twice.  The final size is also chosen to be small
+        * enough so that MS-DOG mallocs can handle it. When things are
+        * very large (> 8K), we just double more or less, instead of
+        * just jumping from 8K to 64K.
+        */
+ 
+       static const unsigned long sizes[] = {
+               13, 127, 1021, 8191, 16381, 32749, 65497,
+               131101, 262147, 524309, 1048583, 2097169,
+               4194319, 8388617, 16777259, 33554467, 
+               67108879, 134217757, 268435459, 536870923,
+               1073741827
+       };
+
+       /* find next biggest hash size */
+       newsize = oldsize = symbol->array_size;
+
+       for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
+               if (oldsize < sizes[i]) {
+                       newsize = sizes[i];
+                       break;
+               }
+       }
+       if (newsize == oldsize) {       /* table already at max (!) */
+               symbol->flags |= ARRAYMAXED;
+               return;
+       }
+
+       /* allocate new table */
+       emalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_table");
+       memset(new, '\0', newsize * sizeof(BUCKET *));
+
+       old = symbol->buckets;
+       symbol->buckets = new;
+       symbol->array_size = newsize;
+
+       /* brand new hash table, set things up and return */
+       if (old == NULL) {
+               symbol->table_size = 0;
+               return;
+       }
+
+       /* old hash table there, move stuff to new, free old */
+
+       /*
+        * note that symbol->table_size does not change if an old array,
+        * and is explicitly set to 0 if a new one.
+        */
+
+       for (k = 0; k < oldsize; k++) {
+               for (chain = old[k]; chain != NULL; chain = next) {
+                       next = chain->ahnext;
+                       hash1 = chain->ahcode % newsize;
+
+                       /* remove from old list, add to new */
+                       chain->ahnext = new[hash1];
+                       new[hash1] = chain;
+               }
+       }
+       efree(old);
+}
+
+
+#ifdef ARRAYDEBUG
+
+static NODE **
+str_option(NODE *opt, NODE *val)
+{
+       int newval;
+       NODE *tmp;
+       NODE **ret = (NODE **) ! NULL;
+
+       tmp = force_string(opt);
+       (void) force_number(val);
+       if (STREQ(tmp->stptr, "STR_CHAIN_MAX")) {
+               newval = (int) val->numbr;
+               if (newval > 0)         
+                       STR_CHAIN_MAX = newval;
+       } else
+               ret = NULL;
+       return ret;
+}
+#endif
+
+
+/*
+From address@hidden  Mon Oct 28 16:05:26 2002
+Date: Mon, 28 Oct 2002 13:33:03 +0100
+From: Paolo Bonzini <address@hidden>
+To: address@hidden
+Subject: Hash function
+Message-ID: <address@hidden>
+
+Here is the hash function I'm using in GNU Smalltalk.  The scrambling is
+needed if you use powers of two as the table sizes.  If you use primes it
+is not needed.
+
+To use double-hashing with power-of-two size, you should use the
+_gst_hash_string(str, len) as the primary hash and
+scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
+
+Paolo
+
+*/
+/*
+ * ADR: Slightly modified to work w/in the context of gawk.
+ */
+
+static unsigned long
+gst_hash_string(const char *str, size_t len, unsigned long hsize, size_t *code)
+{
+       unsigned long hashVal = 1497032417;    /* arbitrary value */
+       unsigned long ret;
+
+       while (len--) {
+               hashVal += *str++;
+               hashVal += (hashVal << 10);
+               hashVal ^= (hashVal >> 6);
+       }
+
+       ret = scramble(hashVal);
+
+       if (code != NULL)
+               *code = ret;
+
+       if (ret >= hsize)
+               ret %= hsize;
+
+       return ret;
+}
+
+static unsigned long
+scramble(unsigned long x)
+{
+       if (sizeof(long) == 4) {
+               int y = ~x;
+
+               x += (y << 10) | (y >> 22);
+               x += (x << 6)  | (x >> 26);
+               x -= (x << 16) | (x >> 16);
+       } else {
+               x ^= (~x) >> 31;
+               x += (x << 21) | (x >> 11);
+               x += (x << 5) | (x >> 27);
+               x += (x << 27) | (x >> 5);
+               x += (x << 31);
+       }
+
+       return x;
+}
diff --git a/symbol.c b/symbol.c
new file mode 100644
index 0000000..a1c598e
--- /dev/null
+++ b/symbol.c
@@ -0,0 +1,693 @@
+#include "awk.h"
+
+extern SRCFILE *srcfiles;
+extern INSTRUCTION *rule_list;
+
+#define HASHSIZE       1021
+
+static NODE *variables[HASHSIZE];
+static int func_count; /* total number of functions */
+static int var_count;  /* total number of global variables and functions */
+
+static NODE *symbol_list;
+static void (*install_func)(NODE *) = NULL;
+static NODE *make_symbol(char *name, NODETYPE type);
+static NODE *install(char *name, NODE *hp, NODETYPE type);
+static void free_bcpool(INSTRUCTION *pl);
+
+static AWK_CONTEXT *curr_ctxt = NULL;
+static int ctxt_level;
+
+
+/*
+ * install_symbol:
+ * Install a global name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired. 
+ */
+
+NODE *
+install_symbol(char *name, NODETYPE type)
+{
+       return install(name, NULL, type);
+}
+
+
+/* lookup --- find the most recent global or param node for name
+ *     installed by install_symbol
+ */
+
+NODE *
+lookup(const char *name)
+{
+       NODE *hp;
+       size_t len;
+       int hash1;
+
+       len = strlen(name);
+       hash1 = hash(name, len, (unsigned long) HASHSIZE, NULL);
+       for (hp = variables[hash1]; hp != NULL; hp = hp->hnext) {
+               if (hp->hlength == len && strncmp(hp->hname, name, len) == 0)
+                       return hp->hvalue;
+       }
+       return NULL;
+}
+
+/* make_params --- allocate function parameters for the symbol table */
+
+NODE *
+make_params(char **pnames, int pcount)
+{
+       NODE *hp, *parms;
+       int i;
+       
+       if (pcount <= 0 || pnames == NULL)
+               return NULL;
+
+       emalloc(parms, NODE *, pcount * sizeof(NODE), "make_params");
+       memset(parms, '\0', pcount * sizeof(NODE));
+
+       for (i = 0, hp = parms; i < pcount; i++, hp++) {
+               hp->type = Node_param_list;
+               hp->hname = pnames[i];  /* shadows pname and vname */
+               hp->hlength = strlen(pnames[i]);
+               hp->param_cnt = i;
+               hp->hvalue = hp;        /* points to itself */
+       }
+
+       return parms;
+}
+
+/* install_params --- install function parameters into the symbol table */
+
+void
+install_params(NODE *func)
+{
+       int i, pcount;
+       NODE *parms;
+
+       if (func == NULL)
+               return;
+       assert(func->type == Node_func);
+       if ((pcount = func->param_cnt) <= 0
+                       || (parms = func->fparms) == NULL
+       )
+               return;
+       for (i = 0; i < pcount; i++)
+               (void) install(NULL, parms + i, Node_param_list);
+}
+
+
+/*
+ * remove_params --- remove function parameters out of the symbol table.
+ */
+
+void
+remove_params(NODE *func)
+{
+       NODE *parms, *p, *prev, *n;
+       int i, pcount, hash1;
+
+       if (func == NULL)
+               return;
+       assert(func->type == Node_func);
+       if ((pcount = func->param_cnt) <= 0
+                       || (parms = func->fparms) == NULL
+       )
+               return;
+
+       for (i = pcount - 1; i >= 0; i--) {
+               p = parms + i;
+               hash1 = p->hcode;
+               if (hash1 < 0 || hash1 >= HASHSIZE)
+                       continue;
+               for (prev = NULL, n = variables[hash1]; n != NULL;
+                                       prev = n, n = n->hnext) {
+                       if (n == p)
+                               break;
+               }
+               if (n == NULL)
+                       continue;
+               if (prev == NULL)
+                       variables[hash1] = n->hnext;    /* param at the head of 
the chain */
+               else
+                       prev->hnext = n->hnext;         /* param not at the 
head */ 
+       }
+}
+
+
+/* remove_symbol --- remove a symbol from the symbol table */
+
+NODE *
+remove_symbol(NODE *r)
+{
+       NODE *prev, *hp;
+       int hash1;
+       
+       hash1 = hash(r->vname, strlen(r->vname), (unsigned long) HASHSIZE, 
NULL);
+       for (prev = NULL, hp = variables[hash1]; hp != NULL;
+                               prev = hp, hp = hp->hnext) {
+               if (hp->hvalue == r)
+                       break;
+       }
+
+       if (hp == NULL)
+               return NULL;
+       assert(hp->hcode == hash1);
+
+       if (prev == NULL)
+               variables[hash1] = hp->hnext;   /* symbol at the head of chain 
*/
+       else
+               prev->hnext = hp->hnext;        /* symbol not at the head */
+
+       if (r->type == Node_param_list)
+               return r;       /* r == hp */
+       if (r->type == Node_func)
+               func_count--;
+       if (r->type != Node_ext_func)
+               var_count--;
+       freenode(hp);
+       return r;
+}
+
+
+/* destroy_symbol --- remove a symbol from symbol table
+*      and free all associated memory.
+*/
+
+void
+destroy_symbol(NODE *r)
+{
+       r = remove_symbol(r);
+       if (r == NULL)
+               return;
+
+       switch (r->type) {
+       case Node_func:
+               if (r->param_cnt > 0) {
+                       NODE *n;
+                       int i;
+                       int pcount = r->param_cnt;
+                               
+                       /* function parameters of type Node_param_list */       
                        
+                       for (i = 0; i < pcount; i++) {
+                               n = r->fparms + i;
+                               efree(n->param);
+                       }               
+                       efree(r->fparms);
+               }
+               break;
+
+       case Node_ext_func:
+               bcfree(r->code_ptr);
+               break;
+
+       case Node_var_array:
+               assoc_clear(r);
+               break;
+
+       case Node_var: 
+               unref(r->var_value);
+               break;
+
+       default:
+               /* Node_param_list -- YYABORT */
+               return;
+       }
+
+       efree(r->vname);
+       freenode(r);
+}
+
+
+/* make_symbol --- allocates a global symbol for the symbol table. */
+
+static NODE *
+make_symbol(char *name, NODETYPE type)
+{
+       NODE *hp, *r;
+
+       getnode(hp);
+       hp->type = Node_hashnode;
+       hp->hlength = strlen(name);
+       hp->hname = name;
+       getnode(r);
+       memset(r, '\0', sizeof(NODE));
+       hp->hvalue = r;
+       if (type == Node_var_array)
+               init_array(r);
+       else if (type == Node_var)
+               r->var_value = dupnode(Nnull_string);
+       r->vname = name;
+       r->type = type;
+       return hp;
+}
+
+/* install --- install a global name or function parameter in the symbol table 
*/
+
+static NODE *
+install(char *name, NODE *hp, NODETYPE type)
+{
+       int hash1;
+       NODE *r;
+
+       if (hp == NULL) {
+               /* global symbol */
+               hp = make_symbol(name, type);
+               if (type == Node_func)
+                       func_count++;
+               if (type != Node_ext_func)
+                       var_count++;    /* total, includes Node_func */
+       }
+
+       r = hp->hvalue;
+       hash1 = hash(hp->hname, hp->hlength, (unsigned long) HASHSIZE, NULL);
+       hp->hcode = hash1;
+       hp->hnext = variables[hash1];
+       variables[hash1] = hp;
+
+       if (install_func)
+               (*install_func)(r);
+
+       return r;
+}
+
+
+/* comp_symbol --- compare two (variable or function) names */
+
+static int
+comp_symbol(const void *v1, const void *v2)
+{
+       const NODE *const *npp1, *const *npp2;
+       const NODE *n1, *n2;
+
+       npp1 = (const NODE *const *) v1;
+       npp2 = (const NODE *const *) v2;
+       n1 = *npp1;
+       n2 = *npp2;
+
+       return strcmp(n1->vname, n2->vname);
+}
+
+
+typedef enum { FUNCTION = 1, VARIABLE } SYMBOL_TYPE;
+
+/* get_symbols --- return a list of optionally sorted symbols */
+ 
+static NODE **
+get_symbols(SYMBOL_TYPE what, int sort)
+{
+       int i;
+       NODE **table;
+       NODE *hp, *r;
+       long j, count = 0;
+
+       if (what == FUNCTION)
+               count = func_count;
+       else    /* if (what == VARIABLE) */
+               count = var_count;
+
+       emalloc(table, NODE **, (count + 1) * sizeof(NODE *), "symbol_list");
+       if (what == VARIABLE)
+               update_global_values();
+
+       for (i = j = 0; i < HASHSIZE; i++)
+               for (hp = variables[i]; hp != NULL; hp = hp->hnext) {
+                       if (hp->type != Node_hashnode)
+                               continue;
+                       r = hp->hvalue;
+                       if (r->type == Node_ext_func)
+                               continue;
+                       if (what == FUNCTION && r->type == Node_func)
+                               table[j++] = r;
+                       else if (what == VARIABLE)
+                               table[j++] = r;
+               }
+
+       if (sort && count > 1)
+               qsort(table, count, sizeof(NODE *), comp_symbol);       /* 
Shazzam! */
+       table[count] = NULL; /* null terminate the list */
+       return table;
+}
+
+
+/* variable_list --- list of global variables */
+
+NODE **
+variable_list()
+{
+       return get_symbols(VARIABLE, TRUE);
+}
+
+/* function_list --- list of functions */
+
+NODE **
+function_list(int sort)
+{
+       return get_symbols(FUNCTION, sort);
+}
+
+/* print_vars --- print names and values of global variables */ 
+
+void
+print_vars(NODE **table, int (*print_func)(FILE *, const char *, ...), FILE 
*fp)
+{
+       int i;
+       NODE *r;
+
+       assert(table != NULL);
+
+       for (i = 0; (r = table[i]) != NULL; i++) {
+               if (r->type == Node_func || r->type == Node_ext_func)
+                       continue;
+               print_func(fp, "%s: ", r->vname);
+               if (r->type == Node_var_array)
+                       print_func(fp, "array, %ld elements\n", r->table_size);
+               else if (r->type == Node_var_new)
+                       print_func(fp, "untyped variable\n");
+               else if (r->type == Node_var)
+                       valinfo(r->var_value, print_func, fp);
+       }
+}
+
+
+/* foreach_func --- execute given function for each awk function in table. */
+
+int
+foreach_func(NODE **table, int (*pfunc)(INSTRUCTION *, void *), void *data)
+{
+       int i;
+       NODE *r;
+       int ret = 0;
+
+       assert(table != NULL);
+
+       for (i = 0; (r = table[i]) != NULL; i++) {
+               if ((ret = pfunc(r->code_ptr, data)) != 0)
+                       break;
+       }
+       return ret;
+}
+
+/* release_all_vars --- free all variable memory */
+
+void
+release_all_vars()
+{
+       int i;
+       NODE *hp, *r, *next;
+
+       for (i = 0; i < HASHSIZE; i++)
+               for (hp = variables[i]; hp != NULL; hp = next) {
+                       next = hp->hnext;
+                       if (hp->type != Node_hashnode)
+                               continue;
+                       r = hp->hvalue;
+                       if (r->type == Node_func || r->type == Node_ext_func)
+                               continue;
+                       if (r->type == Node_var_array)
+                               assoc_clear(r);
+                       else if (r->type == Node_var)
+                               unref(r->var_value);
+                       efree(r->vname);
+                       freenode(r);
+                       freenode(hp);
+               }
+}
+
+
+/* append_symbol --- append symbol to the list of symbols
+ *     installed in the symbol table.
+ */
+
+void
+append_symbol(NODE *r)
+{
+       NODE *hp;
+
+       getnode(hp);
+       hp->lnode = r;
+       hp->rnode = symbol_list->rnode;
+       symbol_list->rnode = hp;
+}
+
+/* release_symbol --- free symbol list and optionally remove symbol from 
symbol table */
+
+void
+release_symbols(NODE *symlist, int keep_globals)
+{
+       NODE *hp, *next;
+
+       for (hp = symlist->rnode; hp != NULL; hp = next) {
+               if (! keep_globals) {
+                       /* destroys globals, function, and params
+                        * if still in symbol table
+                        */
+                       destroy_symbol(hp->lnode);
+               }
+               next = hp->rnode;
+               freenode(hp);
+       }
+       symlist->rnode = NULL;
+}
+
+#define pool_size      d.dl
+#define freei          x.xi
+static INSTRUCTION *pool_list;
+
+/* INSTR_CHUNK must be > largest code size (3) */
+#define INSTR_CHUNK 127
+
+/* bcfree --- deallocate instruction */
+
+void
+bcfree(INSTRUCTION *cp)
+{
+       cp->opcode = 0;
+       cp->nexti = pool_list->freei;
+       pool_list->freei = cp;
+}      
+
+/* bcalloc --- allocate a new instruction */
+
+INSTRUCTION *
+bcalloc(OPCODE op, int size, int srcline)
+{
+       INSTRUCTION *cp;
+
+       if (size > 1) {
+               /* wide instructions Op_rule, Op_func_call .. */
+               emalloc(cp, INSTRUCTION *, (size + 1) * sizeof(INSTRUCTION), 
"bcalloc");
+               cp->pool_size = size;
+               cp->nexti = pool_list->nexti;
+               pool_list->nexti = cp++;
+       } else {
+               INSTRUCTION *pool;
+
+               pool = pool_list->freei;
+               if (pool == NULL) {
+                       INSTRUCTION *last;
+                       emalloc(cp, INSTRUCTION *, (INSTR_CHUNK + 1) * 
sizeof(INSTRUCTION), "bcalloc");
+
+                       cp->pool_size = INSTR_CHUNK;
+                       cp->nexti = pool_list->nexti;
+                       pool_list->nexti = cp;
+                       pool = ++cp;
+                       last = &pool[INSTR_CHUNK - 1];
+                       for (; cp <= last; cp++) {
+                               cp->opcode = 0;
+                               cp->nexti = cp + 1;
+                       }
+                       --cp;
+                       cp->nexti = NULL;
+               }
+               cp = pool;
+               pool_list->freei = cp->nexti;
+       }
+
+       memset(cp, 0, size * sizeof(INSTRUCTION));
+       cp->opcode = op;
+       cp->source_line = srcline;
+       return cp;
+}
+
+/* new_context --- create a new execution context. */
+
+AWK_CONTEXT *
+new_context()
+{
+       AWK_CONTEXT *ctxt;
+
+       emalloc(ctxt, AWK_CONTEXT *, sizeof(AWK_CONTEXT), "new_context");
+       memset(ctxt, 0, sizeof(AWK_CONTEXT));
+       ctxt->srcfiles.next = ctxt->srcfiles.prev = & ctxt->srcfiles;
+       ctxt->rule_list.opcode = Op_list;
+       ctxt->rule_list.lasti = & ctxt->rule_list;
+       return ctxt;
+}
+
+/* set_context --- change current execution context. */
+
+static void
+set_context(AWK_CONTEXT *ctxt)
+{
+       pool_list = & ctxt->pools;
+       symbol_list = & ctxt->symbols;
+       srcfiles = & ctxt->srcfiles;
+       rule_list = & ctxt->rule_list;
+       install_func = ctxt->install_func;
+       curr_ctxt = ctxt;
+}
+
+/*
+ * push_context:
+ *
+ * Switch to the given context after saving the current one. The set
+ * of active execution contexts forms a stack; the global or main context
+ * is at the bottom of the stack.
+ */
+
+void
+push_context(AWK_CONTEXT *ctxt)
+{
+       ctxt->prev = curr_ctxt;
+       /* save current source and sourceline */
+       if (curr_ctxt != NULL) {
+               curr_ctxt->sourceline = sourceline;
+               curr_ctxt->source = source;
+       }
+       sourceline = 0;
+       source = NULL;
+       set_context(ctxt);
+       ctxt_level++;
+}
+
+/* pop_context --- switch to previous execution context. */ 
+
+void
+pop_context()
+{
+       AWK_CONTEXT *ctxt;
+
+       assert(curr_ctxt != NULL);
+       if (curr_ctxt->prev == NULL)
+               fatal(_("can not pop main context"));
+       ctxt = curr_ctxt->prev;
+       /* restore source and sourceline */
+       sourceline = ctxt->sourceline;
+       source = ctxt->source;
+       set_context(ctxt);
+       ctxt_level--;
+}
+
+/* in_main_context --- are we in the main context ? */
+
+int
+in_main_context()
+{
+       assert(ctxt_level > 0);
+       return (ctxt_level == 1);
+}
+
+/* free_context --- free context structure and related data. */ 
+
+void
+free_context(AWK_CONTEXT *ctxt, int keep_globals)
+{
+       SRCFILE *s, *sn;
+
+       if (ctxt == NULL)
+               return;
+
+       assert(curr_ctxt != ctxt);
+
+       /* free all code including function codes */
+
+       free_bcpool(& ctxt->pools);
+
+       /* free symbols */
+
+       release_symbols(& ctxt->symbols, keep_globals);
+
+       /* free srcfiles */
+
+       for (s = & ctxt->srcfiles; s != & ctxt->srcfiles; s = sn) {
+               sn = s->next;
+               if (s->stype != SRC_CMDLINE && s->stype != SRC_STDIN)
+                       efree(s->fullpath);
+               efree(s->src);
+               efree(s);
+       }
+
+       efree(ctxt);
+}
+
+/* free_bc_internal --- free internal memory of an instruction. */ 
+
+static void
+free_bc_internal(INSTRUCTION *cp)
+{
+       NODE *m;
+
+       switch(cp->opcode) {
+       case Op_func_call:
+               if (cp->func_name != NULL)
+                       efree(cp->func_name);
+               break;
+       case Op_push_re:
+       case Op_match_rec:
+       case Op_match:
+       case Op_nomatch:
+               m = cp->memory;
+               if (m->re_reg != NULL)
+                       refree(m->re_reg);
+               if (m->re_exp != NULL)
+                       unref(m->re_exp);
+               if (m->re_text != NULL)
+                       unref(m->re_text);
+               freenode(m);
+               break;                  
+       case Op_token:
+               /* token lost during error recovery in yyparse */
+               if (cp->lextok != NULL)
+                       efree(cp->lextok);
+               break;
+       case Op_push_i:
+               m = cp->memory;
+               unref(m);
+               break;
+       case Op_store_var:
+               m = cp->initval;
+               if (m != NULL)
+                       unref(m);
+               break;
+       case Op_illegal:
+               cant_happen();
+       default:
+               break;  
+       }
+}
+
+/* free_bcpool --- free list of instruction memory pools */
+
+static void
+free_bcpool(INSTRUCTION *pl)
+{
+       INSTRUCTION *pool, *tmp;
+
+       for (pool = pl->nexti; pool != NULL; pool = tmp) {
+               INSTRUCTION *cp, *last;
+               long psiz;
+               psiz = pool->pool_size;
+               if (psiz == INSTR_CHUNK)
+                       last = pool + psiz;
+               else
+                       last = pool + 1;
+               for (cp = pool + 1; cp <= last ; cp++) {
+                       if (cp->opcode != 0)
+                               free_bc_internal(cp);
+               }
+               tmp = pool->nexti;
+               efree(pool);
+       }
+       memset(pl, 0, sizeof(INSTRUCTION));
+}
diff --git a/symbol.h b/symbol.h
new file mode 100644
index 0000000..d5c2d85
--- /dev/null
+++ b/symbol.h
@@ -0,0 +1,6 @@
+extern NODE *install_symbol(char *name, NODETYPE type);
+extern NODE *lookup(const char *name);
+extern void install_params(NODE *paramtab, int pcount);
+extern void remove_params(NODE *paramtab, int pcount);
+extern NODE *remove_symbol(char *name);
+extern void destroy_symbol(char *name);

-----------------------------------------------------------------------

Summary of changes:
 cint_array.c | 1225 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 int_array.c  |  833 +++++++++++++++++++++++++++++++++++++++
 str_array.c  |  759 ++++++++++++++++++++++++++++++++++++
 symbol.c     |  693 +++++++++++++++++++++++++++++++++
 symbol.h     |    6 +
 5 files changed, 3516 insertions(+), 0 deletions(-)
 create mode 100644 cint_array.c
 create mode 100644 int_array.c
 create mode 100644 str_array.c
 create mode 100644 symbol.c
 create mode 100644 symbol.h


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]