[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: memory leak in gawk 3.1.0
From: |
Aharon Robbins |
Subject: |
Re: memory leak in gawk 3.1.0 |
Date: |
Tue, 14 May 2002 17:24:44 +0300 |
This note is for the list and its archives. Tony received a
preliminary version of this fix.
> Date: Thu, 2 May 2002 01:56:14 -0700
> From: Tony Leneis <address@hidden>
> To: address@hidden
> Subject: memory leak in gawk 3.1.0
>
> There's a rather nasty memory leak in gawk 3.1.0 when a next statement
> is called from within a for (index in array) loop. For example, this gawk
> program eventually consumes over 100 megs after processing around 2 million
> lines:
>
> BEGIN {
> x[1] = 0
> }
> {
> for (i in x)
> next
> }
>
> I assume the for (i in x) construct must allocate some memory that isn't
> freed when the loop is exited via the next statement.
>
> -Tony
>
> --
> e-mail: address@hidden Computerized Vehicle Registration
> phone : 503 402-3531 2525 SW 1st Ave, Suite 450
> FAX : 503 294-1526 Portland, OR 97201-4760
This is indeed a bug. Below is a patch relative to the current
official version, 3.1.1, that fixes the leak.
Arnold
---------------------------------------------------------------------
*** ../gawk-3.1.1/eval.c Tue Apr 16 14:41:14 2002
--- eval.c Wed May 8 10:40:16 2002
***************
*** 33,38 ****
--- 33,42 ----
static NODE *op_assign P((NODE *tree));
static NODE *func_call P((NODE *name, NODE *arg_list));
static NODE *match_op P((NODE *tree));
+ static int forloops_active P((void));
+ static void pop_forloop P((void));
+ static void pop_all_forloops P((void));
+ static void push_forloop P((char *vname, NODE **elems, size_t nelems));
static void push_args P((int count, NODE *arglist, NODE **oldstack,
char *func_name, char **varnames));
static void pop_fcall_stack P((void));
***************
*** 369,376 ****
}
break;
case TAG_CONTINUE: /* NEXT statement */
return 1;
! case TAG_BREAK:
return 0;
default:
cant_happen();
--- 373,388 ----
}
break;
case TAG_CONTINUE: /* NEXT statement */
+ if (forloops_active())
+ pop_all_forloops();
+ if (in_function())
+ pop_fcall_stack();
return 1;
! case TAG_BREAK: /* EXIT statement */
! if (forloops_active())
! pop_all_forloops();
! if (in_function())
! pop_fcall_stack();
return 0;
default:
cant_happen();
***************
*** 509,514 ****
--- 521,527 ----
qsort(list, num_elems, sizeof(NODE *), comp_func); /*
shazzam! */
/* now we can run the loop */
+ push_forloop(array->vname, list, num_elems);
PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
lhs = get_lhs(tree->hakvar, &after_assign, FALSE);
***************
*** 536,551 ****
done:
RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
if (do_lint && num_elems != array->table_size)
lintwarn(_("for loop: array `%s' changed size from %d
to %d during loop execution"),
array->vname, num_elems,
array->table_size);
- for (i = 0; i < num_elems; i++)
- unref(list[i]);
-
- free(list);
-
if (retval == 1)
return 1;
break;
--- 549,560 ----
done:
RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+ pop_forloop();
if (do_lint && num_elems != array->table_size)
lintwarn(_("for loop: array `%s' changed size from %d
to %d during loop execution"),
array->vname, num_elems,
array->table_size);
if (retval == 1)
return 1;
break;
***************
*** 567,574 ****
}
if (! do_traditional || do_posix)
fatal(_("`break' outside a loop is not
allowed"));
- if (in_function())
- pop_fcall_stack();
longjmp(rule_tag, TAG_CONTINUE);
} else
longjmp(loop_tag, TAG_BREAK);
--- 576,581 ----
***************
*** 590,597 ****
}
if (! do_traditional || do_posix)
fatal(_("`continue' outside a loop is not
allowed"));
- if (in_function())
- pop_fcall_stack();
longjmp(rule_tag, TAG_CONTINUE);
} else
longjmp(loop_tag, TAG_CONTINUE);
--- 597,602 ----
***************
*** 623,632 ****
else if (in_end_rule)
fatal(_("`next' cannot be called from an END rule"));
! /* could add a lint check here */
! if (in_function())
! pop_fcall_stack();
!
longjmp(rule_tag, TAG_CONTINUE);
break;
--- 628,634 ----
else if (in_end_rule)
fatal(_("`next' cannot be called from an END rule"));
! /* could add a lint check here for in a loop or function */
longjmp(rule_tag, TAG_CONTINUE);
break;
***************
*** 637,643 ****
else if (in_end_rule)
fatal(_("`nextfile' cannot be called from an END
rule"));
! /* could add a lint check here */
if (in_function())
pop_fcall_stack();
--- 639,652 ----
else if (in_end_rule)
fatal(_("`nextfile' cannot be called from an END
rule"));
! /* could add a lint check here for in a loop or function */
! /*
! * Have to do this cleanup here, since we don't longjump
! * back to the main awk rule loop (rule_tag).
! */
! if (forloops_active())
! pop_all_forloops();
!
if (in_function())
pop_fcall_stack();
***************
*** 1277,1288 ****
return *lhs;
}
static struct fcall {
! char *fname;
! unsigned long count;
! NODE *arglist;
! NODE **prevstack;
! NODE **stack;
} *fcall_list = NULL;
static long fcall_list_size = 0;
--- 1286,1391 ----
return *lhs;
}
+ /*
+ * Avoiding memory leaks is difficult. In paticular, any of `next',
+ * `nextfile', `break' or `continue' (when not in a loop), can longjmp
+ * out to the outermost level. This leaks memory if it happens in a
+ * called function. It also leaks memory if it happens in a
+ * `for (iggy in foo)' loop, since such loops malloc an array of the
+ * current array indices to loop over, which provides stability.
+ *
+ * The following code takes care of these problems. First comes the
+ * array-loop management code. This can be a stack of arrays being looped
+ * on at any one time. This stack serves for both mainline code and
+ * function body code. As each loop starts and finishes, it pushes its
+ * info onto this stack and off of it; whether the loop is in a function
+ * body or not isn't relevant.
+ *
+ * Since the list of indices is created using dupnode(), when popping
+ * this stack it should be safe to unref() things, and then memory
+ * will get finally released when the function call stack is popped.
+ * This means that the loop_stack should be popped first upon a `next'.
+ */
+
+ static struct loop_info {
+ char *vname; /* variable name, for debugging */
+ NODE **elems; /* list of indices */
+ size_t nelems; /* how many there are */
+ } *loop_stack = NULL;
+ size_t nloops = 0; /* how many slots there are in the stack */
+ size_t nloops_active = 0; /* how many loops are actively stacked */
+
+
+ /* forloops_active --- return true if there are loops that need popping */
+
+ static int
+ forloops_active()
+ {
+ return nloops > 0;
+ }
+
+ /* pop_forloop --- pop one for loop off the stack */
+
+ static void
+ pop_forloop()
+ {
+ int i, curloop;
+ struct loop_info *loop;
+
+ assert(nloops_active > 0);
+
+ curloop = --nloops_active; /* 0-based indexing */
+ loop = & loop_stack[curloop];
+
+ for (i = 0; i < loop->nelems; i++)
+ unref(loop->elems[i]);
+
+ free(loop->elems);
+
+ loop->elems = NULL;
+ loop->vname = NULL;
+ loop->nelems = 0;
+ }
+
+ /* pop_forloops --- pop the for loops stack all the way */
+
+ static void
+ pop_all_forloops()
+ {
+ while (nloops_active > 0)
+ pop_forloop(); /* decrements nloops_active for us */
+ }
+
+ /* push_forloop --- add a single for loop to the stack */
+
+ static void
+ push_forloop(char *vname, NODE **elems, size_t nelems)
+ {
+ #define NLOOPS 4 /* seems like a good guess */
+ if (loop_stack == NULL) {
+ /* allocate stack, set vars */
+ nloops = NLOOPS;
+ emalloc(loop_stack, struct loop_info *, nloops * sizeof(struct
loop_info),
+ "push_forloop");
+ } else if (nloops_active == nloops) {
+ /* grow stack, set vars */
+ nloops *= 2;
+ erealloc(loop_stack, struct loop_info *, nloops * sizeof(struct
loop_info),
+ "push_forloop");
+ }
+
+ loop_stack[nloops_active].vname = vname;
+ loop_stack[nloops_active].elems = elems;
+ loop_stack[nloops_active].nelems = nelems;
+ nloops_active++;
+ }
+
static struct fcall {
! char *fname; /* function name */
! unsigned long count; /* how many args */
! NODE *arglist; /* list thereof */
! NODE **prevstack; /* function stack frame of previous function */
! NODE **stack; /* function stack frame of current function */
} *fcall_list = NULL;
static long fcall_list_size = 0;