From 623a2f43593093b3fb8cde9472bf5ecec652b6d3 Mon Sep 17 00:00:00 2001 From: Bo Borgerson Date: Tue, 22 Apr 2008 16:19:58 -0400 Subject: [PATCH] Improve memory management in join * src/join.c (struct seq): Use a (struct line **) for `lines' rather than one long (struct line *). This allows individual lines to be swapped out if necessary. (reset_line): Get a line ready for new input. (init_linep): Create a new line and assign it to the the pointer passed in. (spareline[2]): Hold a spare line for each input file. (free_spareline): Clean up. (get_line): Take a (struct line **) instead of a (struct line *). If the line to be overwritten is the previous line for the current file then swap it out for the spare. (join): Accomodate new structure of SEQs and new parameters to get_line; Don't free stale lines until the end -- they're re-usable now. (dup_line): Removed. Signed-off-by: Bo Borgerson --- src/join.c | 169 +++++++++++++++++++++++++++++++---------------------------- 1 files changed, 89 insertions(+), 80 deletions(-) diff --git a/src/join.c b/src/join.c index b8a0011..56eabc5 100644 --- a/src/join.c +++ b/src/join.c @@ -40,6 +40,12 @@ #define join system_join +#define SWAPLINES(a, b) do { \ + struct line *tmp = a; \ + a = b; \ + b = tmp; \ +} while (0); + /* An element of the list identifying which fields to print for each output line. */ struct outlist @@ -76,14 +82,19 @@ struct seq { size_t count; /* Elements used in `lines'. */ size_t alloc; /* Elements allocated in `lines'. */ - struct line *lines; + struct line **lines; }; /* The name this program was run with. */ char *program_name; /* The previous line read from each file. */ -static struct line *prevline[2]; +static struct line *prevline[2] = {NULL, NULL}; + +/* This provides an extra line buffer for each file. We need these if we + try to read two consecutive lines into the same buffer, since we don't + want to overwrite the previous buffer before we check order. */ +static struct line *spareline[2] = {NULL, NULL}; /* True if the LC_COLLATE locale is hard. */ static bool hard_LC_COLLATE; @@ -260,33 +271,6 @@ xfields (struct line *line) extract_field (line, ptr, lim - ptr); } -static struct line * -dup_line (const struct line *old) -{ - struct line *newline = xmalloc (sizeof *newline); - size_t i; - - /* Duplicate the buffer. */ - initbuffer (&newline->buf); - newline->buf.buffer = xmalloc (old->buf.size); - newline->buf.size = old->buf.size; - memcpy (newline->buf.buffer, old->buf.buffer, old->buf.length); - newline->buf.length = old->buf.length; - - /* Duplicate the field positions. */ - newline->fields = xnmalloc (old->nfields_allocated, sizeof *newline->fields); - newline->nfields = old->nfields; - newline->nfields_allocated = old->nfields_allocated; - - for (i = 0; i < old->nfields; i++) - { - newline->fields[i].len = old->fields[i].len; - newline->fields[i].beg = newline->buf.buffer + (old->fields[i].beg - - old->buf.buffer); - } - return newline; -} - static void freeline (struct line *line) { @@ -393,49 +377,69 @@ check_order (const struct line *prev, } } +static inline void +reset_line (struct line *line) +{ + line->nfields = 0; +} + +static struct line * +init_linep (struct line **linep) +{ + struct line *line = xmalloc (sizeof *line); + memset (line, '\0', sizeof *line); + *linep = line; + return line; +} + /* Read a line from FP into LINE and split it into fields. Return true if successful. */ static bool -get_line (FILE *fp, struct line *line, int which) +get_line (FILE *fp, struct line **linep, int which) { - initbuffer (&line->buf); + struct line *line = *linep; + + if (line == prevline[which - 1]) + { + SWAPLINES (line, spareline[which - 1]); + *linep = line; + } + + if (line) + reset_line (line); + else + line = init_linep (linep); if (! readlinebuffer (&line->buf, fp)) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("read error")); - free (line->buf.buffer); - line->buf.buffer = NULL; + freeline (line); return false; } - line->nfields_allocated = 0; - line->nfields = 0; - line->fields = NULL; xfields (line); if (prevline[which - 1]) - { - check_order (prevline[which - 1], line, which); - freeline (prevline[which - 1]); - free (prevline[which - 1]); - } - prevline[which - 1] = dup_line (line); + check_order (prevline[which - 1], line, which); + + prevline[which - 1] = line; return true; } static void -free_prevline (void) +free_spareline (void) { size_t i; - for (i = 0; i < ARRAY_CARDINALITY (prevline); i++) + for (i = 0; i < ARRAY_CARDINALITY (spareline); i++) { - if (prevline[i]) - freeline (prevline[i]); - free (prevline[i]); - prevline[i] = NULL; + if (spareline[i]) + { + freeline (spareline[i]); + free (spareline[i]); + } } } @@ -453,7 +457,12 @@ static bool getseq (FILE *fp, struct seq *seq, int whichfile) { if (seq->count == seq->alloc) - seq->lines = X2NREALLOC (seq->lines, &seq->alloc); + { + size_t i; + seq->lines = X2NREALLOC (seq->lines, &seq->alloc); + for (i = seq->count; i < seq->alloc; i++) + seq->lines[i] = NULL; + } if (get_line (fp, &seq->lines[seq->count], whichfile)) { @@ -469,10 +478,8 @@ static bool advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) { if (first) - { - freeline (&seq->lines[0]); - seq->count = 0; - } + seq->count = 0; + return getseq (fp, seq, whichfile); } @@ -480,9 +487,13 @@ static void delseq (struct seq *seq) { size_t i; - for (i = 0; i < seq->count; i++) - if (seq->lines[i].buf.buffer) - freeline (&seq->lines[i]); + for (i = 0; i < seq->alloc; i++) + if (seq->lines[i]) + { + if (seq->lines[i]->buf.buffer) + freeline (seq->lines[i]); + free (seq->lines[i]); + } free (seq->lines); } @@ -595,10 +606,12 @@ static void join (FILE *fp1, FILE *fp2) { struct seq seq1, seq2; - struct line line; + struct line **linep = xmalloc (sizeof *linep); int diff; bool eof1, eof2, checktail; + *linep = NULL; + /* Read the first line of each file. */ initseq (&seq1); getseq (fp1, &seq1, 1); @@ -608,12 +621,12 @@ join (FILE *fp1, FILE *fp2) while (seq1.count && seq2.count) { size_t i; - diff = keycmp (&seq1.lines[0], &seq2.lines[0], + diff = keycmp (seq1.lines[0], seq2.lines[0], join_field_1, join_field_2); if (diff < 0) { if (print_unpairables_1) - prjoin (&seq1.lines[0], &uni_blank); + prjoin (seq1.lines[0], &uni_blank); advance_seq (fp1, &seq1, true, 1); seen_unpairable = true; continue; @@ -621,7 +634,7 @@ join (FILE *fp1, FILE *fp2) if (diff > 0) { if (print_unpairables_2) - prjoin (&uni_blank, &seq2.lines[0]); + prjoin (&uni_blank, seq2.lines[0]); advance_seq (fp2, &seq2, true, 2); seen_unpairable = true; continue; @@ -637,7 +650,7 @@ join (FILE *fp1, FILE *fp2) ++seq1.count; break; } - while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0], + while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], join_field_1, join_field_2)); /* Keep reading lines from file2 as long as they continue to @@ -650,7 +663,7 @@ join (FILE *fp1, FILE *fp2) ++seq2.count; break; } - while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1], + while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], join_field_1, join_field_2)); if (print_pairables) @@ -659,25 +672,21 @@ join (FILE *fp1, FILE *fp2) { size_t j; for (j = 0; j < seq2.count - 1; ++j) - prjoin (&seq1.lines[i], &seq2.lines[j]); + prjoin (seq1.lines[i], seq2.lines[j]); } } - for (i = 0; i < seq1.count - 1; ++i) - freeline (&seq1.lines[i]); if (!eof1) { - seq1.lines[0] = seq1.lines[seq1.count - 1]; + SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); seq1.count = 1; } else seq1.count = 0; - for (i = 0; i < seq2.count - 1; ++i) - freeline (&seq2.lines[i]); if (!eof2) { - seq2.lines[0] = seq2.lines[seq2.count - 1]; + SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); seq2.count = 1; } else @@ -697,14 +706,12 @@ join (FILE *fp1, FILE *fp2) if ((print_unpairables_1 || checktail) && seq1.count) { if (print_unpairables_1) - prjoin (&seq1.lines[0], &uni_blank); - freeline (&seq1.lines[0]); + prjoin (seq1.lines[0], &uni_blank); seen_unpairable = true; - while (get_line (fp1, &line, 1)) + while (get_line (fp1, linep, 1)) { if (print_unpairables_1) - prjoin (&line, &uni_blank); - freeline (&line); + prjoin (*linep, &uni_blank); if (issued_disorder_warning[0] && !print_unpairables_1) break; } @@ -713,19 +720,21 @@ join (FILE *fp1, FILE *fp2) if ((print_unpairables_2 || checktail) && seq2.count) { if (print_unpairables_2) - prjoin (&uni_blank, &seq2.lines[0]); - freeline (&seq2.lines[0]); + prjoin (&uni_blank, seq2.lines[0]); seen_unpairable = true; - while (get_line (fp2, &line, 2)) + while (get_line (fp2, linep, 2)) { if (print_unpairables_2) - prjoin (&uni_blank, &line); - freeline (&line); + prjoin (&uni_blank, *linep); if (issued_disorder_warning[1] && !print_unpairables_2) break; } } + if (*linep) + free (*linep); + + free (linep); delseq (&seq1); delseq (&seq2); } @@ -941,7 +950,7 @@ main (int argc, char **argv) hard_LC_COLLATE = hard_locale (LC_COLLATE); atexit (close_stdout); - atexit (free_prevline); + atexit (free_spareline); print_pairables = true; seen_unpairable = false; -- 1.5.2.5