From b041f8bc3986d1adcba8d2ea1c48f37c435fcc62 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Mon, 18 Nov 2013 17:53:33 -0800 Subject: [PATCH] dfa: avoid undefined behavior of "1 << 31" * src/dfa.c (charclass): Change type from "int" to "unsigned int". (tstbit): Rather than shifting "1" left to form a mask, shift the LHS bits the right and use "1" as the mask. Also, return bool, rather than "int". (setbit, clrbit, dfastate): Don't shift "1" (aka (int)1) left by 31 bits. Instead, use "1U" as the operand, to avoid undefined behavior. Spotted by gcc's new -fsanitize=undefined. Co-authored-by: Paul Eggert --- src/dfa.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/dfa.c b/src/dfa.c index 92c410e..f196b8a 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -88,7 +88,7 @@ #define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS) /* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ -typedef int charclass[CHARCLASS_INTS]; +typedef unsigned int charclass[CHARCLASS_INTS]; /* Convert a possibly-signed character to an unsigned character. This is a bit safer than casting to unsigned char, since it catches some type @@ -547,22 +547,22 @@ prtok (token t) /* Stuff pertaining to charclasses. */ -static int +static bool tstbit (unsigned int b, charclass const c) { - return c[b / INTBITS] & 1 << b % INTBITS; + return c[b / INTBITS] >> b % INTBITS & 1; } static void setbit (unsigned int b, charclass c) { - c[b / INTBITS] |= 1 << b % INTBITS; + c[b / INTBITS] |= 1U << b % INTBITS; } static void clrbit (unsigned int b, charclass c) { - c[b / INTBITS] &= ~(1 << b % INTBITS); + c[b / INTBITS] &= ~(1U << b % INTBITS); } static void @@ -2738,7 +2738,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[]) /* Set the transitions for each character in the current label. */ for (j = 0; j < CHARCLASS_INTS; ++j) for (k = 0; k < INTBITS; ++k) - if (labels[i][j] & 1 << k) + if (labels[i][j] & 1U << k) { int c = j * INTBITS + k; -- 1.8.4.rc0.11.g35f5eaa