[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algo
From: |
Bruno Haible |
Subject: |
Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm |
Date: |
Sat, 31 Jul 2010 21:35:41 +0200 |
User-agent: |
KMail/1.9.9 |
Hi Paolo,
> I pushed it now, thanks for the review!
After I added some more unit tests for u8_strchr, I see crashes and assertion
failures:
1)
mem = (UNIT *) (page_boundary - 2 * sizeof (UNIT));
mem[0] = 0x50;
mem[1] = 0;
ASSERT (u8_strchr (mem, 0x123) == NULL); /* crashes */
2)
mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT));
mem[0] = 0x50;
mem[1] = 0x50;
mem[2] = 0;
ASSERT (u8_strchr (mem, 0x123) == NULL); /* assertion failed */
3)
mem = (UNIT *) (page_boundary - 3 * sizeof (UNIT));
mem[0] = 0x50;
mem[1] = 0x50;
mem[2] = 0;
ASSERT (u8_strchr (mem, 0x3456) == NULL); /* crashes */
4)
mem = (UNIT *) (page_boundary - 4 * sizeof (UNIT));
mem[0] = 0x50;
mem[1] = 0x50;
mem[2] = 0x50;
mem[3] = 0;
ASSERT (u8_strchr (mem, 0x23456) == NULL); /* crashes */
I'm applying the two attached patches: Bug fixes, then comments and tiny
optimizations.
2010-07-31 Bruno Haible <address@hidden>
unistr/u8-strchr: Fix several bugs.
* lib/unistr/u8-strchr.c (u8_strchr): Don't search beyond the end of
the string. When not found, return NULL, not a pointer near the end.
--- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:26:45 2010
+++ lib/unistr/u8-strchr.c Sat Jul 31 21:22:34 2010
@@ -62,7 +62,7 @@
switch (u8_uctomb_aux (c, uc, 6))
{
case 2:
- if (*s == 0)
+ if (*s == 0 || s[1] == 0)
break;
{
uint8_t c0 = c[0];
@@ -96,11 +96,11 @@
if (s[1] == 0)
break;
}
- return (uint8_t *) s;
}
+ break;
case 3:
- if (*s == 0 || s[1] == 0)
+ if (*s == 0 || s[1] == 0 || s[2] == 0)
break;
{
uint8_t c0 = c[0];
@@ -147,7 +147,7 @@
}
case 4:
- if (*s == 0 || s[1] == 0 || s[2] == 0)
+ if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
break;
{
uint8_t c0 = c[0];
2010-07-31 Bruno Haible <address@hidden>
unistr/u8-chr, unistr/u8-strchr: Optimize and add comments.
* lib/unistr/u8-chr.c (u8_chr): Add comments. Remove a useless test at
the beginning of the loop.
* lib/unistr/u8-strchr.c (u8_strchr): Add comments. Don't fall through
cases in 'switch' statement.
--- lib/unistr/u8-chr.c.orig Sat Jul 31 21:29:46 2010
+++ lib/unistr/u8-chr.c Sat Jul 31 21:29:14 2010
@@ -70,14 +70,17 @@
uint8_t c1 = c[1];
const uint8_t *end = s + n - 1;
- while (s < end)
+ do
{
+ /* Here s < end.
+ Test whether s[0..1] == { c0, c1 }. */
uint8_t s1 = s[1];
if (s1 == c1)
{
if (*s == c0)
return (uint8_t *) s;
else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
s += 2;
}
else
@@ -85,9 +88,11 @@
if (s1 == c0)
s++;
else
+ /* Skip the search at s + 1, because s[1] != c0. */
s += 2;
}
}
+ while (s < end);
break;
}
@@ -104,14 +109,19 @@
else
skip = 3;
- while (s < end)
+ do
{
+ /* Here s < end.
+ Test whether s[0..2] == { c0, c1, c2 }. */
uint8_t s2 = s[2];
if (s2 == c2)
{
if (s[1] == c1 && *s == c0)
return (uint8_t *) s;
else
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
s += skip;
}
else
@@ -119,11 +129,15 @@
if (s2 == c1)
s++;
else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
s += 2;
else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
s += 3;
}
}
+ while (s < end);
break;
}
@@ -143,14 +157,21 @@
else
skip = 4;
- while (s < end)
+ do
{
+ /* Here s < end.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
uint8_t s3 = s[3];
if (s3 == c3)
{
if (s[2] == c2 && s[1] == c1 && *s == c0)
return (uint8_t *) s;
else
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
s += skip;
}
else
@@ -158,13 +179,20 @@
if (s3 == c2)
s++;
else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
s += 2;
else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
s += 3;
else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
s += 4;
}
}
+ while (s < end);
break;
}
}
--- lib/unistr/u8-strchr.c.orig Sat Jul 31 21:29:46 2010
+++ lib/unistr/u8-strchr.c Sat Jul 31 21:29:14 2010
@@ -67,15 +67,19 @@
{
uint8_t c0 = c[0];
uint8_t c1 = c[1];
+ /* Search for { c0, c1 }. */
uint8_t s1 = s[1];
for (;;)
{
+ /* Here s[0] != 0, s[1] != 0.
+ Test whether s[0..1] == { c0, c1 }. */
if (s1 == c1)
{
if (*s == c0)
return (uint8_t *) s;
else
+ /* Skip the search at s + 1, because s[1] = c1 < c0. */
goto case2_skip2;
}
else
@@ -83,6 +87,7 @@
if (s1 == c0)
goto case2_skip1;
else
+ /* Skip the search at s + 1, because s[1] != c0. */
goto case2_skip2;
}
case2_skip2:
@@ -106,26 +111,36 @@
uint8_t c0 = c[0];
uint8_t c1 = c[1];
uint8_t c2 = c[2];
+ /* Search for { c0, c1, c2 }. */
uint8_t s2 = s[2];
for (;;)
{
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0.
+ Test whether s[0..2] == { c0, c1, c2 }. */
if (s2 == c2)
{
if (s[1] == c1 && *s == c0)
return (uint8_t *) s;
- else if (c2 == c1)
- goto case3_skip1;
else
- goto case3_skip3;
+ /* If c2 != c1:
+ Skip the search at s + 1, because s[2] == c2 != c1.
+ Skip the search at s + 2, because s[2] == c2 < c0. */
+ if (c2 == c1)
+ goto case3_skip1;
+ else
+ goto case3_skip3;
}
else
{
if (s2 == c1)
goto case3_skip1;
else if (s2 == c0)
+ /* Skip the search at s + 1, because s[2] != c1. */
goto case3_skip2;
else
+ /* Skip the search at s + 1, because s[2] != c1.
+ Skip the search at s + 2, because s[2] != c0. */
goto case3_skip3;
}
case3_skip3:
@@ -145,6 +160,7 @@
break;
}
}
+ break;
case 4:
if (*s == 0 || s[1] == 0 || s[2] == 0 || s[3] == 0)
@@ -154,30 +170,45 @@
uint8_t c1 = c[1];
uint8_t c2 = c[2];
uint8_t c3 = c[3];
+ /* Search for { c0, c1, c2, c3 }. */
uint8_t s3 = s[3];
for (;;)
{
+ /* Here s[0] != 0, s[1] != 0, s[2] != 0, s[3] != 0.
+ Test whether s[0..3] == { c0, c1, c2, c3 }. */
if (s3 == c3)
{
if (s[2] == c2 && s[1] == c1 && *s == c0)
return (uint8_t *) s;
- else if (c3 == c2)
- goto case4_skip1;
- else if (c3 == c1)
- goto case4_skip2;
else
- goto case4_skip4;
+ /* If c3 != c2:
+ Skip the search at s + 1, because s[3] == c3 != c2.
+ If c3 != c1:
+ Skip the search at s + 2, because s[3] == c3 != c1.
+ Skip the search at s + 3, because s[3] == c3 < c0. */
+ if (c3 == c2)
+ goto case4_skip1;
+ else if (c3 == c1)
+ goto case4_skip2;
+ else
+ goto case4_skip4;
}
else
{
if (s3 == c2)
goto case4_skip1;
else if (s3 == c1)
+ /* Skip the search at s + 1, because s[3] != c2. */
goto case4_skip2;
else if (s3 == c0)
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1. */
goto case4_skip3;
else
+ /* Skip the search at s + 1, because s[3] != c2.
+ Skip the search at s + 2, because s[3] != c1.
+ Skip the search at s + 3, because s[3] != c0. */
goto case4_skip4;
}
case4_skip4:
@@ -202,6 +233,7 @@
break;
}
}
+ break;
}
return NULL;
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, (continued)
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Bruno Haible, 2010/07/31
- guarantees of u8_mbtouc/u8_strmbtouc (was Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm), Paolo Bonzini, 2010/07/29
- Re: guarantees of u8_mbtouc/u8_strmbtouc, Bruno Haible, 2010/07/31
- Re: guarantees of u8_mbtouc/u8_strmbtouc, Paolo Bonzini, 2010/07/31
- Re: guarantees of u8_mbtouc/u8_strmbtouc, Bruno Haible, 2010/07/31
- Message not available
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Paolo Bonzini, 2010/07/29
- Re: speed up u8_strstr, Bruno Haible, 2010/07/31
- Re: speed up u8_strstr, Bruno Haible, 2010/07/31
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Bruno Haible, 2010/07/28
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Paolo Bonzini, 2010/07/28
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm,
Bruno Haible <=
- Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Paolo Bonzini, 2010/07/31
Re: [PATCH v2 0/5] Speed up uNN_chr and uNN_strchr with Boyer-Moore algorithm, Pádraig Brady, 2010/07/23