nmh-workers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Nmh-workers] pick character classes


From: Paul Fox
Subject: Re: [Nmh-workers] pick character classes
Date: Wed, 29 Mar 2006 15:27:56 -0500

 > > well, this certainly explains my sense of deja vu.  apparently
 > > i wrote that patch.  and then i rewrote it this afternoon.  doh! 
 > > i'm such a dolt.  :-)
 > > 
 > > how do i get at the patch from that page?  i'm sure it's better than
 > > what i did today.  :-)

indeed, i seem to have done a better job of fixing the problem
last may than i did yesterday.  i hope my boss doesn't find out
that a) the quality of my work has been declining, and b) i can't
remember what i worked on less than a year ago!

anyway, here's a corrected, updated patch.  it:
    - adds range support to character classes
    - fixes out-of-bounds references to the case-folding array
        when doing case comparisons for 8-bit message text
    - makes character classes as case tolerant as the rest
        of the pick regexp machine.  (where lowercase chars in
        patterns should match uppercase in the text.)
    - fixes an uninitialized pointer warning, which might actually have
        been a real bug

(the reason this feature is important to me, btw, is because i
want to use pick arguments like this:
        -or -subj '[?-?][?-?].*[?-?][?-?].*[?-?][?-?]' \
        -or -from '[?-?][?-?].*[?-?][?-?].*[?-?][?-?]' \

to do blind deletion of messages in my "probably spam" folder
that really are certainly spam:  i get no legitimate mail that
would have multiple sequences of binary characters in either the
subject or the from.  so if such a message has been classified as
spam, it almost certainly _is_ spam.  this makes finding false
positive spam hits quite a bit easier to spot, by reducing the
overall volume.)

it occurs to me that since i'm now a project member and was able
to update the project homepage, i could probably also apply this
patch myself.  it works for me, but i'd really rather someone
else reviewed and/or tested it first...

paul

--- picksbr.c.orig      Wed Mar 29 08:35:39 2006
+++ picksbr.c   Wed Mar 29 09:05:16 2006
@@ -81,7 +81,7 @@
 #define        STAR    01
 
 #define LBSIZE  1024
-#define        ESIZE   256
+#define        ESIZE   1024
 
 
 static char linebuf[LBSIZE + 1];
@@ -104,6 +104,23 @@
        0150,0151,0152,0153,0154,0155,0156,0157,
        0160,0161,0162,0163,0164,0165,0166,0167,
        0170,0171,0172,0173,0174,0175,0176,0177,
+
+       0200,0201,0202,0203,0204,0205,0206,0207,
+       0210,0211,0212,0213,0214,0215,0216,0217,
+       0220,0221,0222,0223,0224,0225,0226,0227,
+       0230,0231,0232,0233,0234,0235,0236,0237,
+       0240,0241,0242,0243,0244,0245,0246,0247,
+       0250,0251,0252,0253,0254,0255,0256,0257,
+       0260,0261,0262,0263,0264,0265,0266,0267,
+       0270,0271,0272,0273,0274,0275,0276,0277,
+       0300,0301,0302,0303,0304,0305,0306,0307,
+       0310,0311,0312,0313,0314,0315,0316,0317,
+       0320,0321,0322,0323,0324,0325,0326,0327,
+       0330,0331,0332,0333,0334,0335,0336,0337,
+       0340,0341,0342,0343,0344,0345,0346,0347,
+       0350,0351,0352,0353,0354,0355,0356,0357,
+       0360,0361,0362,0363,0364,0365,0366,0367,
+       0370,0371,0372,0373,0374,0375,0376,0377,
 };
 
 /*
@@ -557,7 +574,7 @@
 {
     register int c;
     int cclcnt;
-    register char *ep, *dp, *sp, *lastep;
+    register char *ep, *dp, *sp, *lastep = 0;
 
     dp = (ep = n->n_expbuf) + sizeof n->n_expbuf;
     sp = astr;
@@ -601,11 +618,25 @@
                    c = *sp++;
                    ep[-2] = NCCL;
                }
-               do {
+               if (c == '-') {
                    *ep++ = c;
                    cclcnt++;
-                   if (c == '\0' || ep >= dp)
-                       goto cerror;
+                   c = *sp++;
+               }
+               do {
+                   if (c == '-' && *sp != '\0' && *sp != ']') {
+                       for (c = ep[-1]+1; c < *sp; c++) {
+                           *ep++ = c;
+                           cclcnt++;
+                           if (c == '\0' || ep >= dp)
+                               goto cerror;
+                       }
+                   } else {
+                       *ep++ = c;
+                       cclcnt++;
+                       if (c == '\0' || ep >= dp)
+                           goto cerror;
+                   }
                } while ((c = *sp++) != ']');
                lastep[1] = cclcnt;
                continue;
@@ -799,7 +830,7 @@
 
     n = *set++;
     while (--n)
-       if (*set++ == c)
+       if (*set++ == c || set[-1] == cc[(unsigned char)c])
            return (af);
 
     return (!af);

=---------------------
 paul fox, address@hidden (arlington, ma, where it's 59.0 degrees)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]