gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r27315 - gnunet/src/regex


From: gnunet
Subject: [GNUnet-SVN] r27315 - gnunet/src/regex
Date: Tue, 28 May 2013 01:35:17 +0200

Author: bartpolot
Date: 2013-05-28 01:35:17 +0200 (Tue, 28 May 2013)
New Revision: 27315

Modified:
   gnunet/src/regex/regex_test_lib.c
Log:
Fix regexes with accepting common prefix, optimize regex prefix lengths

Modified: gnunet/src/regex/regex_test_lib.c
===================================================================
--- gnunet/src/regex/regex_test_lib.c   2013-05-27 17:28:55 UTC (rev 27314)
+++ gnunet/src/regex/regex_test_lib.c   2013-05-27 23:35:17 UTC (rev 27315)
@@ -60,29 +60,30 @@
   char *s;
 };
 
-// static void
-// space (int n)
-// {
-//   int i;
-//   for (i = 0; i < n; i++)
-//     printf ("  ");
-// }
-// 
-// static void
-// debugctx (struct RegexCombineCtx *ctx, int level)
-// {
-//   struct RegexCombineCtx *p;
-//   space (level);
-//   if (NULL != ctx->s)
-//     printf ("'%s'\n", ctx->s);
-//   else
-//     printf ("NULL\n");
-//   for (p = ctx->head; NULL != p; p = p->next)
-//   {
-//     debugctx (p, level + 1);
-//   }
-// }
+/*
+static void
+space (int n)
+{
+  int i;
+  for (i = 0; i < n; i++)
+    printf ("  ");
+}
 
+static void
+debugctx (struct RegexCombineCtx *ctx, int level)
+{
+  struct RegexCombineCtx *p;
+  space (level);
+  if (NULL != ctx->s)
+    printf ("'%s'\n", ctx->s);
+  else
+    printf ("NULL\n");
+  for (p = ctx->head; NULL != p; p = p->next)
+  {
+    debugctx (p, level + 1);
+  }
+}
+*/
 
 /**
  * Extract a string from all prefix-combined regexes.
@@ -110,7 +111,9 @@
     s = regex_combine (p);
     GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "  total '%s'\n", s);
     if (strlen(s) == 0)
+    {
       opt = GNUNET_YES;
+    }
     else
     {
       GNUNET_asprintf (&tmp, "%s%s|", regex, s);
@@ -136,7 +139,7 @@
   if (NULL != ctx->s)
   {
     if (opt)
-      GNUNET_asprintf (&s, "%s[%s]", ctx->s, regex);
+      GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
     else
       GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
     GNUNET_free (regex);
@@ -149,6 +152,68 @@
 
 
 /**
+ * Get the number of matching characters on the prefix of both strings.
+ * 
+ * @param s1 String 1.
+ * @param s2 String 2.
+ * 
+ * @return Number of characters of matching prefix.
+ */
+static unsigned int
+get_prefix_length (const char *s1, const char *s2)
+{
+  unsigned int l1;
+  unsigned int l2;
+  unsigned int limit;
+  unsigned int i;
+
+  l1 = strlen (s1);
+  l2 = strlen (s2);
+  limit = l1 > l2 ? l2 : l1;
+
+  for (i = 1; i <= limit; i++)
+  {
+    if (0 != strncmp (s1, s2, i))
+      return i - 1;
+  }
+  return limit;
+}
+
+
+/**
+ * Return the child context with the longest prefix match with the regex.
+ * Usually only one child will match, search all just in case.
+ * 
+ * @param ctx Context whose children to search.
+ * @param regex String to match.
+ * 
+ * @return Child with the longest prefix, NULL if no child matches.
+ */
+static struct RegexCombineCtx *
+get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
+{
+  struct RegexCombineCtx *p;
+  struct RegexCombineCtx *best;
+  unsigned int l;
+  unsigned int best_l;
+
+  best_l = 0;
+  best = NULL;
+  for (p = ctx->head; NULL != p; p = p->next)
+  {
+    l = get_prefix_length (p->s, regex);
+    if (l > best_l)
+    {
+      GNUNET_break (0 == best_l);
+      best = p;
+      best_l = l;
+    }
+  }
+  return best;
+}
+
+
+/**
  * Add a single regex to a context, combining with exisiting regex by-prefix.
  *
  * @param ctx Context with 0 or more regexes.
@@ -158,42 +223,55 @@
 regex_add (struct RegexCombineCtx *ctx, const char *regex)
 {
   struct RegexCombineCtx *p;
-  const char *rest;
+  struct RegexCombineCtx *newctx;
+  unsigned int prefix_l;
+  const char *rest_r;
+  const char *rest_s;
+  size_t len;
 
-  rest = &regex[1];
-  for (p = ctx->head; NULL != p; p = p->next)
+  if (0 == strlen (regex))
+    return;
+
+  p = get_longest_prefix (ctx, regex);
+  if (NULL != p) /* There is some prefix match, reduce regex and try again */
   {
-    if (p->s[0] == regex[0])
+    prefix_l = get_prefix_length (p->s, regex);
+    rest_s = &p->s[prefix_l];
+    rest_r = &regex[prefix_l];
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
+    len = strlen (p->s);
+    if (prefix_l < len) /* only partial match, split existing state */
     {
-      if (1 == strlen(p->s))
-      {
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest);
-        regex_add (p, rest);
-      }
-      else
-      {
-        struct RegexCombineCtx *newctx;
-        newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
-        newctx->s = GNUNET_strdup (&p->s[1]);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", newctx->s);
-        GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
-        p->s[1] = '\0'; /* dont realloc */
-        GNUNET_CONTAINER_DLL_insert (p->head, p->tail, newctx);
-        regex_add (p, rest);
-      }
-      return;
+      newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+      newctx->head = p->head;
+      newctx->tail = p->tail;
+      newctx->s = GNUNET_malloc(len - prefix_l + 1);
+      strncpy (newctx->s, rest_s, len - prefix_l + 1);
+
+      p->head = newctx;
+      p->tail = newctx;
+      p->s[prefix_l] = '\0';
     }
+    regex_add (p, rest_r);
+    return;
   }
-  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no  match\n");
+  /* There is no prefix match, add new */
+  if (NULL == ctx->head && NULL != ctx->s)
+  {
+    /* this was the end before, add empty string */
+    newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+    newctx->s = GNUNET_strdup ("");
+    GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
+  }
+  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
-  p = GNUNET_malloc (sizeof (struct RegexCombineCtx));
-  p->s = GNUNET_strdup (regex);
-  GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p);
+  newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
+  newctx->s = GNUNET_strdup (regex);
+  GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
 }
 
 
@@ -240,9 +318,10 @@
     current = regexes[i];
     GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
     regex_add (ctx, current);
+    /* debugctx (ctx, 0); */
   }
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
-  //debugctx (ctx, 0);
+  /* debugctx (ctx, 0); */
 
   combined = regex_combine (ctx);
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]