gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r22429 - gnunet/src/regex


From: gnunet
Subject: [GNUnet-SVN] r22429 - gnunet/src/regex
Date: Mon, 2 Jul 2012 14:22:37 +0200

Author: szengel
Date: 2012-07-02 14:22:37 +0200 (Mon, 02 Jul 2012)
New Revision: 22429

Modified:
   gnunet/src/regex/regex.c
   gnunet/src/regex/test_regex_eval_api.c
   gnunet/src/regex/test_regex_iterate_api.c
   gnunet/src/regex/test_regex_proofs.c
Log:
regex bugfixes


Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c    2012-07-02 12:14:49 UTC (rev 22428)
+++ gnunet/src/regex/regex.c    2012-07-02 12:22:37 UTC (rev 22429)
@@ -503,7 +503,7 @@
     }
   }
 
-  if (is_dup)
+  if (GNUNET_YES == is_dup)
     return;
 
   // sort transitions by label
@@ -734,21 +734,41 @@
 {
   struct GNUNET_REGEX_State *s_check;
   struct Transition *t_check;
+  struct Transition *t;
+  struct Transition *t_next;
   char *new_name;
+  int is_dup;
 
   GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
 
   if (s1 == s2)
     return;
 
-  // 1. Make all transitions pointing to s2 point to s1
+  // 1. Make all transitions pointing to s2 point to s1, unless this transition
+  // does not already exists, if it already exists remove transition.
   for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
   {
-    for (t_check = s_check->transitions_head; NULL != t_check;
-         t_check = t_check->next)
+    for (t_check = s_check->transitions_head; NULL != t_check; t_check = 
t_next)
     {
+      t_next = t_check->next;
+
       if (s2 == t_check->to_state)
-        t_check->to_state = s1;
+      {
+        is_dup = GNUNET_NO;
+        for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
+        {
+          if (t->to_state == s1 && t_check->label == t->label)
+            is_dup = GNUNET_YES;
+        }
+        if (GNUNET_NO == is_dup)
+          t_check->to_state = s1;
+        else
+        {
+          GNUNET_CONTAINER_DLL_remove (t_check->from_state->transitions_head,
+                                       t_check->from_state->transitions_tail,
+                                       t_check);
+        }
+      }
     }
   }
 
@@ -1015,22 +1035,23 @@
   states[count] = s;
 }
 
-
-/**
- * create proofs for all states in the given automaton. Implementation of the
- * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
- * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
- *
- * @param a automaton.
+/** 
+ * Construct the regular expression given the inductive step,
+ * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
+ * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
+ * 
+ * @param R_last_ij value of  $R^{(k-1)_{ij}.
+ * @param R_last_ik value of  $R^{(k-1)_{ik}.
+ * @param R_last_kk value of  $R^{(k-1)_{kk}.
+ * @param R_last_kj value of  $R^{(k-1)_{kj}.
+ * @param R_cur_ij result for this inductive step is saved in R_cur_ij, 
R_cur_ij
+ *                 is expected to be NULL when called!
  */
 static void
-automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
+automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik,
+                                  char *R_last_kk, char *R_last_kj,
+                                  char **R_cur_ij)
 {
-  unsigned int n = a->state_count;
-  struct GNUNET_REGEX_State *states[n];
-  char *R_last[n][n];
-  char *R_cur[n][n];
-  struct Transition *t;
   char *R_cur_l;
   char *R_cur_r;
   char *temp_a;
@@ -1039,11 +1060,7 @@
   char *R_temp_ik;
   char *R_temp_kj;
   char *R_temp_kk;
-  char *complete_regex;
-  unsigned int i;
-  unsigned int j;
-  unsigned int k;
-  unsigned int cnt;
+
   int eps_check;
   int ij_ik_cmp;
   int ij_kj_cmp;
@@ -1053,10 +1070,382 @@
   int kk_kj_cmp;
   int clean_ik_kk_cmp;
   int clean_kk_kj_cmp;
+  unsigned int cnt;
+
   size_t length;
   size_t length_l;
   size_t length_r;
 
+  GNUNET_assert (NULL == *R_cur_ij && NULL != R_cur_ij);
+
+  // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* 
R^{(k-1)}_{kj}
+  // R_last == R^{(k-1)}, R_cur == R^{(k)}
+  // R_cur_ij = R_cur_l | R_cur_r
+  // R_cur_l == R^{(k-1)}_{ij}
+  // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
+
+  if ((NULL == R_last_ij) && ((NULL == R_last_ik) || (NULL == R_last_kk) ||    
 /* technically cannot happen, but looks saner */
+                              (NULL == R_last_kj)))
+  {
+    /* R^{(k)}_{ij} = N | N */
+    *R_cur_ij = NULL;
+    return;
+  }
+
+  if ((NULL == R_last_ik) || (NULL == R_last_kk) ||     /* technically cannot 
happen, but looks saner */
+      (NULL == R_last_kj))
+  {
+    /*  R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
+    *R_cur_ij = GNUNET_strdup (R_last_ij);
+    return;
+  }
+
+  // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
+  // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* 
R^{(k-1)}_{kj}
+
+  /* *R_cur_ij = NULL; */
+  R_cur_r = NULL;
+  R_cur_l = NULL;
+
+  // cache results from strcmp, we might need these many times
+  ij_kj_cmp = nullstrcmp (R_last_ij, R_last_kj);
+  ij_ik_cmp = nullstrcmp (R_last_ij, R_last_ik);
+  ik_kk_cmp = nullstrcmp (R_last_ik, R_last_kk);
+  //ik_kj_cmp = nullstrcmp (R_last_ik, R_last_kj);
+  kk_kj_cmp = nullstrcmp (R_last_kk, R_last_kj);
+
+  // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
+  // as parentheses, so we can better compare the contents
+  R_temp_ik = remove_parentheses (remove_epsilon (R_last_ik));
+  R_temp_kk = remove_parentheses (remove_epsilon (R_last_kk));
+  R_temp_kj = remove_parentheses (remove_epsilon (R_last_kj));
+
+  clean_ik_kk_cmp = nullstrcmp (R_last_ik, R_temp_kk);
+  clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last_kj);
+
+  // construct R_cur_l (and, if necessary R_cur_r)
+  if (NULL != R_last_ij)
+  {
+    // Assign R_temp_ij to R_last_ij and remove epsilon as well
+    // as parentheses, so we can better compare the contents
+    R_temp_ij = remove_parentheses (remove_epsilon (R_last_ij));
+
+    if (0 == strcmp (R_temp_ij, R_temp_ik) && 0 == strcmp (R_temp_ik, 
R_temp_kk)
+        && 0 == strcmp (R_temp_kk, R_temp_kj))
+    {
+      if (0 == strlen (R_temp_ij))
+      {
+        R_cur_r = GNUNET_strdup ("");
+      }
+      else if ((0 == strncmp (R_last_ij, "(|", 2)) ||
+               (0 == strncmp (R_last_ik, "(|", 2) &&
+                0 == strncmp (R_last_kj, "(|", 2)))
+      {
+        // a|(e|a)a*(e|a) = a*
+        // a|(e|a)(e|a)*(e|a) = a*
+        // (e|a)|aa*a = a*
+        // (e|a)|aa*(e|a) = a*
+        // (e|a)|(e|a)a*a = a*
+        // (e|a)|(e|a)a*(e|a) = a*
+        // (e|a)|(e|a)(e|a)*(e|a) = a*
+        if (GNUNET_YES == needs_parentheses (R_temp_ij))
+          GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
+      }
+      else
+      {
+        // a|aa*a = a+
+        // a|(e|a)a*a = a+
+        // a|aa*(e|a) = a+
+        // a|(e|a)(e|a)*a = a+
+        // a|a(e|a)*(e|a) = a+
+        if (GNUNET_YES == needs_parentheses (R_temp_ij))
+          GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
+      }
+    }
+    else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && 0 != clean_ik_kk_cmp)
+    {
+      // a|ab*b = ab*
+      if (strlen (R_last_kk) < 1)
+        R_cur_r = GNUNET_strdup (R_last_ij);
+      else if (GNUNET_YES == needs_parentheses (R_temp_kk))
+        GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk);
+      else
+        GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_last_kk);
+
+      R_cur_l = NULL;
+    }
+    else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && 0 != clean_kk_kj_cmp)
+    {
+      // a|bb*a = b*a
+      if (strlen (R_last_kk) < 1)
+        R_cur_r = GNUNET_strdup (R_last_kj);
+      else if (GNUNET_YES == needs_parentheses (R_temp_kk))
+        GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj);
+      else
+        GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj);
+
+      R_cur_l = NULL;
+    }
+    else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && !has_epsilon (R_last_ij) &&
+             has_epsilon (R_last_kk))
+    {
+      // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
+      if (needs_parentheses (R_temp_kk))
+        GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk);
+      else
+        GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_temp_kk);
+
+      R_cur_l = NULL;
+    }
+    else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && !has_epsilon (R_last_ij) &&
+             has_epsilon (R_last_kk))
+    {
+      // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|...  = b*a
+      if (needs_parentheses (R_temp_kk))
+        GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_ij);
+      else
+        GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_ij);
+
+      R_cur_l = NULL;
+    }
+    else
+    {
+      temp_a = (NULL == R_last_ij) ? NULL : GNUNET_strdup (R_last_ij);
+      temp_a = remove_parentheses (temp_a);
+      R_cur_l = temp_a;
+    }
+
+    GNUNET_free_non_null (R_temp_ij);
+  }
+  else
+  {
+    // we have no left side
+    R_cur_l = NULL;
+  }
+
+  // construct R_cur_r, if not already constructed
+  if (NULL == R_cur_r)
+  {
+    length = strlen (R_temp_kk) - strlen (R_last_ik);
+
+    // a(ba)*bx = (ab)+x
+    if (length > 0 && NULL != R_last_kk && 0 < strlen (R_last_kk) &&
+        NULL != R_last_kj && 0 < strlen (R_last_kj) && NULL != R_last_ik &&
+        0 < strlen (R_last_ik) && 0 == strkcmp (R_temp_kk, R_last_ik, length) 
&&
+        0 == strncmp (R_temp_kk, R_last_kj, length))
+    {
+      temp_a = GNUNET_malloc (length + 1);
+      temp_b = GNUNET_malloc ((strlen (R_last_kj) - length) + 1);
+
+      length_l = 0;
+      length_r = 0;
+
+      for (cnt = 0; cnt < strlen (R_last_kj); cnt++)
+      {
+        if (cnt < length)
+        {
+          temp_a[length_l] = R_last_kj[cnt];
+          length_l++;
+        }
+        else
+        {
+          temp_b[length_r] = R_last_kj[cnt];
+          length_r++;
+        }
+      }
+      temp_a[length_l] = '\0';
+      temp_b[length_r] = '\0';
+
+      // e|(ab)+ = (ab)*
+      if (NULL != R_cur_l && 0 == strlen (R_cur_l) && 0 == strlen (temp_b))
+      {
+        GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last_ik, temp_a);
+        GNUNET_free (R_cur_l);
+        R_cur_l = NULL;
+      }
+      else
+      {
+        GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last_ik, temp_a, temp_b);
+      }
+      GNUNET_free (temp_a);
+      GNUNET_free (temp_b);
+    }
+    else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
+             0 == strcmp (R_temp_kk, R_temp_kj))
+    {
+      // (e|a)a*(e|a) = a*
+      // (e|a)(e|a)*(e|a) = a*
+      if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
+      }
+      // aa*a = a+a
+      else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
+               !has_epsilon (R_last_ik))
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
+        else
+          GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
+      }
+      // (e|a)a*a = a+
+      // aa*(e|a) = a+
+      // a(e|a)*(e|a) = a+
+      // (e|a)a*a = a+
+      else
+      {
+        eps_check =
+            (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
+             has_epsilon (R_last_kj));
+
+        if (eps_check == 1)
+        {
+          if (needs_parentheses (R_temp_kk))
+            GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
+          else
+            GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
+        }
+      }
+    }
+    // aa*b = a+b
+    // (e|a)(e|a)*b = a*b
+    else if (0 == strcmp (R_temp_ik, R_temp_kk))
+    {
+      if (has_epsilon (R_last_ik))
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj);
+      }
+      else
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last_kj);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last_kj);
+      }
+    }
+    // ba*a = ba+
+    // b(e|a)*(e|a) = ba*
+    else if (0 == strcmp (R_temp_kk, R_temp_kj))
+    {
+      if (has_epsilon (R_last_kj))
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ik, R_temp_kk);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ik, R_temp_kk);
+      }
+      else
+      {
+        if (needs_parentheses (R_temp_kk))
+          GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last_ik, R_temp_kk);
+        else
+          GNUNET_asprintf (&R_cur_r, "%s+%s", R_last_ik, R_temp_kk);
+      }
+    }
+    else
+    {
+      if (strlen (R_temp_kk) > 0)
+      {
+        if (needs_parentheses (R_temp_kk))
+        {
+          GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last_ik, R_temp_kk,
+                           R_last_kj);
+        }
+        else
+        {
+          GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last_ik, R_temp_kk,
+                           R_last_kj);
+        }
+      }
+      else
+      {
+        GNUNET_asprintf (&R_cur_r, "%s%s", R_last_ik, R_last_kj);
+      }
+    }
+  }
+
+  /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
+  /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
+
+  if (NULL == R_cur_l && NULL == R_cur_r)
+  {
+    *R_cur_ij = NULL;
+    return;
+  }
+
+  if (NULL != R_cur_l && NULL == R_cur_r)
+  {
+    *R_cur_ij = GNUNET_strdup (R_cur_l);
+    return;
+  }
+
+  if (NULL == R_cur_l && NULL != R_cur_r)
+  {
+    *R_cur_ij = GNUNET_strdup (R_cur_r);
+    return;
+  }
+
+  if (R_cur_l[0] == 'C' || R_cur_r[0] == 'C')
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s R_cur_r: %s\n", R_cur_l,
+                R_cur_r);
+
+
+  if (0 == nullstrcmp (R_cur_l, R_cur_r))
+  {
+    *R_cur_ij = GNUNET_strdup (R_cur_l);
+    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, ">>>>>>>>>>>>>> %s == %s\n", R_cur_l,
+                R_cur_r);
+    return;
+  }
+
+  /* else */
+  /* { */
+  /*   GNUNET_log (GNUNET_ERROR_TYPE_ERROR, ">>>>>>>>>>>>>> %s != %s\n", 
R_cur_l, R_cur_r); */
+  /* } */
+
+  GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r);
+
+  GNUNET_free_non_null (R_cur_l);
+  GNUNET_free_non_null (R_cur_r);
+
+  GNUNET_free_non_null (R_temp_ik);
+  GNUNET_free_non_null (R_temp_kk);
+  GNUNET_free_non_null (R_temp_kj);
+
+}
+
+/**
+ * create proofs for all states in the given automaton. Implementation of the
+ * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
+ * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
+ *
+ * @param a automaton.
+ */
+static void
+automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
+{
+  unsigned int n = a->state_count;
+  struct GNUNET_REGEX_State *states[n];
+  char *R_last[n][n];
+  char *R_cur[n][n];
+  char *temp;
+  struct Transition *t;
+  char *complete_regex;
+  unsigned int i;
+  unsigned int j;
+  unsigned int k;
+
+
   /* create depth-first numbering of the states, initializes 'state' */
   automaton_traverse (a, &number_states, states);
 
@@ -1075,31 +1464,29 @@
         GNUNET_asprintf (&R_last[i][j], "%c", t->label);
       else
       {
-        temp_a = R_last[i][j];
+        temp = R_last[i][j];
         GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
-        GNUNET_free (temp_a);
+        GNUNET_free (temp);
       }
     }
     if (NULL == R_last[i][i])
       GNUNET_asprintf (&R_last[i][i], "");
     else
     {
-      temp_a = R_last[i][i];
+      temp = R_last[i][i];
       GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]);
-      GNUNET_free (temp_a);
+      GNUNET_free (temp);
     }
   }
   for (i = 0; i < n; i++)
     for (j = 0; j < n; j++)
       if (needs_parentheses (R_last[i][j]))
       {
-        temp_a = R_last[i][j];
+        temp = R_last[i][j];
         GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
-        GNUNET_free (temp_a);
+        GNUNET_free (temp);
       }
 
-  // TODO: clean up and fix the induction part
-
   /* Compute regular expressions of length "k" between each pair of states per 
induction */
   for (k = 0; k < n; k++)
   {
@@ -1110,341 +1497,11 @@
         // Basis for the recursion:
         // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} 
)^* R^{(k-1)}_{kj}
         // R_last == R^{(k-1)}, R_cur == R^{(k)}
-        // With: R_cur[i][j] = R_cur_l | R_cur_r
-        // R_cur_l == R^{(k-1)}_{ij}
-        // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
 
-        if ((NULL == R_last[i][j]) && ((NULL == R_last[i][k]) || (NULL == 
R_last[k][k]) ||      /* technically cannot happen, but looks saner */
-                                       (NULL == R_last[k][j])))
-        {
-          /*  R^{(k)}_{ij} = N | N */
-          /* R_cur[i][j] is already NULL */
-          continue;
-        }
-        if ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically 
cannot happen, but looks saner */
-            (NULL == R_last[k][j]))
-        {
-          /*  R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
-          R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
-          continue;
-        }
-        // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* 
R^{(k-1)}_{kj} OR
-        // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} 
)^* R^{(k-1)}_{kj}
-
-        R_cur[i][j] = NULL;
-        R_cur_r = NULL;
-        R_cur_l = NULL;
-
-        // cache results from strcmp, we might need these many times
-        ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
-        ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
-        ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
-        //ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
-        kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
-
-        // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
-        // as parentheses, so we can better compare the contents
-        R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
-        R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k]));
-        R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
-
-        clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk);
-        clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]);
-
-        // construct R_cur_l (and, if necessary R_cur_r)
-        if (NULL != R_last[i][j])
-        {
-          // Assign R_temp_ij to R_last[i][j] and remove epsilon as well
-          // as parentheses, so we can better compare the contents
-          R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
-
-          if (0 == strcmp (R_temp_ij, R_temp_ik) &&
-              0 == strcmp (R_temp_ik, R_temp_kk) &&
-              0 == strcmp (R_temp_kk, R_temp_kj))
-          {
-            if (0 == strlen (R_temp_ij))
-            {
-              R_cur_r = GNUNET_strdup ("");
-            }
-            else if ((0 == strncmp (R_last[i][j], "(|", 2)) ||
-                     (0 == strncmp (R_last[i][k], "(|", 2) &&
-                      0 == strncmp (R_last[k][j], "(|", 2)))
-            {
-              // a|(e|a)a*(e|a) = a*
-              // a|(e|a)(e|a)*(e|a) = a*
-              // (e|a)|aa*a = a*
-              // (e|a)|aa*(e|a) = a*
-              // (e|a)|(e|a)a*a = a*
-              // (e|a)|(e|a)a*(e|a) = a*
-              // (e|a)|(e|a)(e|a)*(e|a) = a*
-              if (GNUNET_YES == needs_parentheses (R_temp_ij))
-                GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
-            }
-            else
-            {
-              // a|aa*a = a+
-              // a|(e|a)a*a = a+
-              // a|aa*(e|a) = a+
-              // a|(e|a)(e|a)*a = a+
-              // a|a(e|a)*(e|a) = a+
-              if (GNUNET_YES == needs_parentheses (R_temp_ij))
-                GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
-            }
-          }
-          else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp &&
-                   0 != clean_ik_kk_cmp)
-          {
-            // a|ab*b = ab*
-            if (strlen (R_last[k][k]) < 1)
-              R_cur_r = GNUNET_strdup (R_last[i][j]);
-            else if (GNUNET_YES == needs_parentheses (R_temp_kk))
-              GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
-            else
-              GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]);
-
-            R_cur_l = NULL;
-          }
-          else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp &&
-                   0 != clean_kk_kj_cmp)
-          {
-            // a|bb*a = b*a
-            if (strlen (R_last[k][k]) < 1)
-              R_cur_r = GNUNET_strdup (R_last[k][j]);
-            else if (GNUNET_YES == needs_parentheses (R_temp_kk))
-              GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
-            else
-              GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
-
-            R_cur_l = NULL;
-          }
-          else if (0 == ij_ik_cmp && 0 == kk_kj_cmp &&
-                   !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
-          {
-            // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
-            if (needs_parentheses (R_temp_kk))
-              GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
-            else
-              GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk);
-
-            R_cur_l = NULL;
-          }
-          else if (0 == ij_kj_cmp && 0 == ik_kk_cmp &&
-                   !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
-          {
-            // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|...  = b*a
-            if (needs_parentheses (R_temp_kk))
-              GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]);
-            else
-              GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]);
-
-            R_cur_l = NULL;
-          }
-          else
-          {
-            temp_a =
-                (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
-            temp_a = remove_parentheses (temp_a);
-            R_cur_l = temp_a;
-          }
-
-          GNUNET_free_non_null (R_temp_ij);
-        }
-        else
-        {
-          // we have no left side
-          R_cur_l = NULL;
-        }
-
-        // construct R_cur_r, if not already constructed
-        if (NULL == R_cur_r)
-        {
-          length = strlen (R_temp_kk) - strlen (R_last[i][k]);
-
-          // a(ba)*bx = (ab)+x
-          if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) 
&&
-              NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
-              NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) &&
-              0 == strkcmp (R_temp_kk, R_last[i][k], length) &&
-              0 == strncmp (R_temp_kk, R_last[k][j], length))
-          {
-            temp_a = GNUNET_malloc (length + 1);
-            temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1);
-
-            length_l = 0;
-            length_r = 0;
-
-            for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++)
-            {
-              if (cnt < length)
-              {
-                temp_a[length_l] = R_last[k][j][cnt];
-                length_l++;
-              }
-              else
-              {
-                temp_b[length_r] = R_last[k][j][cnt];
-                length_r++;
-              }
-            }
-            temp_a[length_l] = '\0';
-            temp_b[length_r] = '\0';
-
-            // e|(ab)+ = (ab)*
-            if (NULL != R_cur_l && 0 == strlen (R_cur_l) &&
-                0 == strlen (temp_b))
-            {
-              GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a);
-              GNUNET_free (R_cur_l);
-              R_cur_l = NULL;
-            }
-            else
-            {
-              GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a,
-                               temp_b);
-            }
-            GNUNET_free (temp_a);
-            GNUNET_free (temp_b);
-          }
-          else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
-                   0 == strcmp (R_temp_kk, R_temp_kj))
-          {
-            // (e|a)a*(e|a) = a*
-            // (e|a)(e|a)*(e|a) = a*
-            if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j]))
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
-            }
-            // aa*a = a+a
-            else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
-                     !has_epsilon (R_last[i][k]))
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
-              else
-                GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
-            }
-            // (e|a)a*a = a+
-            // aa*(e|a) = a+
-            // a(e|a)*(e|a) = a+
-            // (e|a)a*a = a+
-            else
-            {
-              eps_check =
-                  (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) +
-                   has_epsilon (R_last[k][j]));
-
-              if (eps_check == 1)
-              {
-                if (needs_parentheses (R_temp_kk))
-                  GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
-                else
-                  GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
-              }
-            }
-          }
-          // aa*b = a+b
-          // (e|a)(e|a)*b = a*b
-          else if (0 == strcmp (R_temp_ik, R_temp_kk))
-          {
-            if (has_epsilon (R_last[i][k]))
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
-            }
-            else
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last[k][j]);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]);
-            }
-          }
-          // ba*a = ba+
-          // b(e|a)*(e|a) = ba*
-          else if (0 == strcmp (R_temp_kk, R_temp_kj))
-          {
-            if (has_epsilon (R_last[k][j]))
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], R_temp_kk);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk);
-            }
-            else
-            {
-              if (needs_parentheses (R_temp_kk))
-                GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], R_temp_kk);
-              else
-                GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk);
-            }
-          }
-          else
-          {
-            if (strlen (R_temp_kk) > 0)
-            {
-              if (needs_parentheses (R_temp_kk))
-              {
-                GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], 
R_temp_kk,
-                                 R_last[k][j]);
-              }
-              else
-              {
-                GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk,
-                                 R_last[k][j]);
-              }
-            }
-            else
-            {
-              GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]);
-            }
-          }
-        }
-
-        /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
-        /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
-
-        // putting it all together
-        if (NULL != R_cur_l && NULL != R_cur_r)
-        {
-          // a|a = a
-          if (0 == strcmp (R_cur_l, R_cur_r))
-          {
-            R_cur[i][j] = GNUNET_strdup (R_cur_l);
-          }
-          // R_cur_l | R_cur_r
-          else
-          {
-            GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
-          }
-        }
-        else if (NULL != R_cur_l)
-        {
-          R_cur[i][j] = GNUNET_strdup (R_cur_l);
-        }
-        else if (NULL != R_cur_r)
-        {
-          R_cur[i][j] = GNUNET_strdup (R_cur_r);
-        }
-        else
-        {
-          R_cur[i][j] = NULL;
-        }
-
-        GNUNET_free_non_null (R_cur_l);
-        GNUNET_free_non_null (R_cur_r);
-
-        GNUNET_free_non_null (R_temp_ik);
-        GNUNET_free_non_null (R_temp_kk);
-        GNUNET_free_non_null (R_temp_kj);
+        // Create R_cur[i][j] and simplify the expression
+        automaton_create_proofs_simplify (R_last[i][j], R_last[i][k],
+                                          R_last[k][k], R_last[k][j],
+                                          &R_cur[i][j]);
       }
     }
 
@@ -1478,14 +1535,16 @@
     if (states[i]->accepting)
     {
       if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i]))
+      {
         GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]);
+      }
       else if (NULL != R_last[a->start->proof_id][i] &&
                0 < strlen (R_last[a->start->proof_id][i]))
       {
-        temp_a = complete_regex;
+        temp = complete_regex;
         GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex,
                          R_last[a->start->proof_id][i]);
-        GNUNET_free (temp_a);
+        GNUNET_free (temp);
       }
     }
   }
@@ -1524,8 +1583,6 @@
   int len = 0;
   struct GNUNET_REGEX_State *cstate;
   struct Transition *ctran;
-  int insert = 1;
-  struct Transition *t;
   unsigned int i;
 
   s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
@@ -1573,21 +1630,7 @@
     for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
     {
       if (0 != ctran->label)
-      {
-        insert = 1;
-
-        for (t = s->transitions_head; NULL != t; t = t->next)
-        {
-          if (t->label == ctran->label)
-          {
-            insert = 0;
-            break;
-          }
-        }
-
-        if (insert)
-          state_add_transition (ctx, s, ctran->label, NULL);
-      }
+        state_add_transition (ctx, s, ctran->label, NULL);
     }
 
     // If the nfa_states contain an accepting state, the new dfa state is also

Modified: gnunet/src/regex/test_regex_eval_api.c
===================================================================
--- gnunet/src/regex/test_regex_eval_api.c      2012-07-02 12:14:49 UTC (rev 
22428)
+++ gnunet/src/regex/test_regex_eval_api.c      2012-07-02 12:22:37 UTC (rev 
22429)
@@ -58,7 +58,7 @@
 test_random (unsigned int rx_length, unsigned int max_str_len,
              unsigned int str_count)
 {
-  int i;
+  unsigned int i;
   char *rand_rx;
   char *matching_str;
   int eval;
@@ -114,6 +114,11 @@
     eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
     regfree (&rx);
 
+    // We only want to match the whole string, because that's what our DFA 
does, too.
+    if (eval_check == 0 &&
+        (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
+      eval_check = 1;
+
     // Match canonical regex
     if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED))
     {
@@ -127,11 +132,6 @@
     regfree (&rx);
     GNUNET_free (canonical_regex);
 
-    // We only want to match the whole string, because that's what our DFA 
does, too.
-    if (eval_check == 0 &&
-        (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
-      eval_check = 1;
-
     // compare result
     if (eval_check != eval)
     {
@@ -198,11 +198,11 @@
       result = 1;
       regerror (eval_check, rx, error, sizeof error);
       GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
-                  "Unexpected result:\nregex: %s\nstring: %s\nexpected result: 
%i\n"
+                  "Unexpected result:\nregex: %s\ncanonical_regex: %s\nstring: 
%s\nexpected result: %i\n"
                   "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: 
%i\nrm_eo: %i\n\n",
-                  rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
-                  eval, eval_check, error, matchptr[0].rm_so,
-                  matchptr[0].rm_eo);
+                  rxstr->regex, GNUNET_REGEX_get_canonical_regex (a),
+                  rxstr->strings[i], rxstr->expected_results[i], eval,
+                  eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo);
     }
   }
   return result;
@@ -298,6 +298,7 @@
     check_dfa += test_automaton (a, &rx, &rxstr[i]);
     check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
     GNUNET_REGEX_automaton_destroy (a);
+
     a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof));
     check_dfa += test_automaton (a, &rx, &rxstr[i]);
     GNUNET_REGEX_automaton_destroy (a);

Modified: gnunet/src/regex/test_regex_iterate_api.c
===================================================================
--- gnunet/src/regex/test_regex_iterate_api.c   2012-07-02 12:14:49 UTC (rev 
22428)
+++ gnunet/src/regex/test_regex_iterate_api.c   2012-07-02 12:22:37 UTC (rev 
22429)
@@ -32,7 +32,7 @@
               int accepting, unsigned int num_edges,
               const struct GNUNET_REGEX_Edge *edges)
 {
-  int i;
+  unsigned int i;
 
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n",
               accepting);

Modified: gnunet/src/regex/test_regex_proofs.c
===================================================================
--- gnunet/src/regex/test_regex_proofs.c        2012-07-02 12:14:49 UTC (rev 
22428)
+++ gnunet/src/regex/test_regex_proofs.c        2012-07-02 12:22:37 UTC (rev 
22429)
@@ -106,39 +106,48 @@
   unsigned int i;
   unsigned int error;
 
-  const char *regex[4] = {
+  const char *regex[6] = {
     "a|aa*a",
     "a+",
     "a*",
-    "a*a*"
+    "a*a*",
+    "(F*C|WfPf|y+F*C)",
+    "y*F*C|WfPf",
+    /* "2?jA?e?D*K", */
+    /* "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", */
+    /* "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", */
   };
 
-  char *canonical_regex;
-  struct GNUNET_REGEX_Automaton *dfa;
+  const char *canon_rx1;
+  const char *canon_rx2;
+  struct GNUNET_REGEX_Automaton *dfa1;
+  struct GNUNET_REGEX_Automaton *dfa2;
 
   error = 0;
 
-  for (i = 0; i < 4; i += 2)
+  for (i = 0; i < 6; i += 2)
   {
-    dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
-    canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
-    GNUNET_REGEX_automaton_destroy (dfa);
+    dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
+    dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]));
 
-    dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]));
-    error +=
-        (0 ==
-         strcmp (canonical_regex,
-                 GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1;
+    canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1);
+    canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2);
 
+    error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1;
+
     if (error > 0)
     {
       GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
-                  "Comparing canonical regex of %s with %s failed.\n", 
regex[i],
-                  regex[i + 1]);
+                  "Comparing canonical regex failed:\nrx1: %s\ncrx1: %s\nrx2: 
%s\ncrx2: %s\n",
+                  regex[i], canon_rx1, regex[i + 1], canon_rx2);
+
+      /* Save the graphs of the two conflicting DFAs */
+      /* GNUNET_REGEX_automaton_save_graph (dfa1, "proofs_fail_dfa1.dot"); */
+      /* GNUNET_REGEX_automaton_save_graph (dfa2, "proofs_fail_dfa2.dot"); */
     }
 
-    GNUNET_free (canonical_regex);
-    GNUNET_REGEX_automaton_destroy (dfa);
+    GNUNET_REGEX_automaton_destroy (dfa1);
+    GNUNET_REGEX_automaton_destroy (dfa2);
   }
 
   return error;
@@ -161,7 +170,7 @@
   error = 0;
 
   error += test_proofs_static ();
-//  error += test_proofs_random (100, 10);
+  /* error += test_proofs_random (10000, 10); */
 
   return error;
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]