gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r20979 - in gnunet/src: include regex


From: gnunet
Subject: [GNUnet-SVN] r20979 - in gnunet/src: include regex
Date: Fri, 13 Apr 2012 16:41:33 +0200

Author: szengel
Date: 2012-04-13 16:41:33 +0200 (Fri, 13 Apr 2012)
New Revision: 20979

Modified:
   gnunet/src/include/gnunet_regex_lib.h
   gnunet/src/regex/regex.c
   gnunet/src/regex/test_regex.c
Log:
- SCC detection
- Graphviz scc coloration
- optimizations
- api


Modified: gnunet/src/include/gnunet_regex_lib.h
===================================================================
--- gnunet/src/include/gnunet_regex_lib.h       2012-04-13 08:51:53 UTC (rev 
20978)
+++ gnunet/src/include/gnunet_regex_lib.h       2012-04-13 14:41:33 UTC (rev 
20979)
@@ -38,63 +38,108 @@
 #endif
 
 /**
- * Automaton (NFA/DFA) representation
+ * Automaton (NFA/DFA) representation.
  */
 struct GNUNET_REGEX_Automaton;
 
 /**
+ * State representation.
+ */
+struct GNUNET_REGEX_State;
+
+/**
  * Construct an NFA by parsing the regex string of length 'len'.
  *
- * @param regex regular expression string
- * @param len length of the string
+ * @param regex regular expression string.
+ * @param len length of the string.
  *
- * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
+ * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton.
  */
 struct GNUNET_REGEX_Automaton *
 GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
 
 /**
- * Construct DFA for the given 'regex' of length 'len'
+ * Construct DFA for the given 'regex' of length 'len'.
  *
- * @param regex regular expression string
- * @param len length of the regular expression
+ * @param regex regular expression string.
+ * @param len length of the regular expression.
  *
- * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton
+ * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton.
  */
 struct GNUNET_REGEX_Automaton *
 GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
 
 /**
- * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
+ * Free the memory allocated by constructing the GNUNET_REGEX_Automaton.
  * data structure.
  *
- * @param a automaton to be destroyed
+ * @param a automaton to be destroyed.
  */
 void
 GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);
 
 /**
- * Save the given automaton as a GraphViz dot file
+ * Save the given automaton as a GraphViz dot file.
  *
- * @param a the automaton to be saved
- * @param filename where to save the file
+ * @param a the automaton to be saved.
+ * @param filename where to save the file.
  */
 void
 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
                                    const char *filename);
 
 /**
- * Evaluates the given 'string' against the given compiled regex
+ * Evaluates the given 'string' against the given compiled regex.
  *
- * @param a automaton
- * @param string string to check
+ * @param a automaton.
+ * @param string string to check.
  *
- * @return 0 if string matches, non 0 otherwise
+ * @return 0 if string matches, non 0 otherwise.
  */
 int
-GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, 
+GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
                    const char *string);
 
+
+/**
+ * Get the starting state of the given automaton 'a'.
+ *
+ * @param a automaton.
+ *
+ * @return starting state.
+ */
+struct GNUNET_REGEX_State *
+GNUNET_REGEX_automaton_get_start (struct GNUNET_REGEX_Automaton *a);
+
+/**
+ * Get the next states, starting from states 's'.
+ *
+ * @param a automaton.
+ * @param s states.
+ * @param count number of states given in 's'. Will contain number of
+ *              states that were returned upon return.
+ *
+ * @return next states, 'count' will contain the number of states.
+ */
+struct GNUNET_REGEX_State **
+GNUNET_REGEX_automaton_states_get_next (struct GNUNET_REGEX_Automaton *a,
+                                        struct GNUNET_REGEX_State **s,
+                                        unsigned int *count);
+
+/**
+ * Hash a set of states.
+ *
+ * @param a automaton.
+ * @param s states.
+ * @param count number of states.
+ *
+ * @return hash.
+ */
+struct GNUNET_HashCode
+GNUNET_REGEX_automaton_states_hash (struct GNUNET_REGEX_Automaton *a,
+                                    struct GNUNET_REGEX_State **s,
+                                    unsigned int count);
+
 #if 0                           /* keep Emacsens' auto-indent happy */
 {
 #endif
@@ -104,3 +149,4 @@
 
 /* end of gnunet_regex_lib.h */
 #endif
+

Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c    2012-04-13 08:51:53 UTC (rev 20978)
+++ gnunet/src/regex/regex.c    2012-04-13 14:41:33 UTC (rev 20979)
@@ -44,6 +44,11 @@
   unsigned int transition_id;
 
   /**
+   * Unique SCC (Strongly Connected Component) id.
+   */
+  unsigned int scc_id;
+
+  /**
    * DLL of GNUNET_REGEX_Automaton's used as a stack.
    */
   struct GNUNET_REGEX_Automaton *stack_head;
@@ -84,12 +89,12 @@
    * itself consists of one or more NFAs linked
    * together.
    */
-  struct State *start;
+  struct GNUNET_REGEX_State *start;
 
   /**
    * End state of the automaton.
    */
-  struct State *end;
+  struct GNUNET_REGEX_State *end;
 
   /**
    * Number of states in the automaton.
@@ -99,12 +104,12 @@
   /**
    * DLL of states.
    */
-  struct State *states_head;
+  struct GNUNET_REGEX_State *states_head;
 
   /**
    * DLL of states
    */
-  struct State *states_tail;
+  struct GNUNET_REGEX_State *states_tail;
 
   /**
    * Type of the automaton.
@@ -115,17 +120,17 @@
 /**
  * A state. Can be used in DFA and NFA automatons.
  */
-struct State
+struct GNUNET_REGEX_State
 {
   /**
    * This is a linked list.
    */
-  struct State *prev;
+  struct GNUNET_REGEX_State *prev;
 
   /**
    * This is a linked list.
    */
-  struct State *next;
+  struct GNUNET_REGEX_State *next;
 
   /**
    * Unique state id.
@@ -145,6 +150,28 @@
   int marked;
 
   /**
+   * Marking the state as contained. This is used for checking,
+   * if the state is contained in a set in constant time
+   */
+  int contained;
+
+  /**
+   * Marking the state as part of an SCC (Strongly Connected Component).
+   * All states with the same scc_id are part of the same SCC.
+   */
+  unsigned int scc_id;
+
+  /**
+   * Used for SCC detection.
+   */
+  int index;
+
+  /**
+   * Used for SCC detection.
+   */
+  int lowlink;
+
+  /**
    * Human readable name of the automaton. Used for debugging
    * and graph creation.
    */
@@ -169,7 +196,7 @@
    * Set of states on which this state is based on. Used when
    * creating a DFA out of several NFA states.
    */
-  struct StateSet *nfa_set;
+  struct GNUNET_REGEX_StateSet *nfa_set;
 };
 
 /**
@@ -202,23 +229,23 @@
   /**
    * State to which this transition leads.
    */
-  struct State *to_state;
+  struct GNUNET_REGEX_State *to_state;
 
   /**
    * State from which this transition origins.
    */
-  struct State *from_state;
+  struct GNUNET_REGEX_State *from_state;
 };
 
 /**
  * Set of states.
  */
-struct StateSet
+struct GNUNET_REGEX_StateSet
 {
   /**
    * Array of states.
    */
-  struct State **states;
+  struct GNUNET_REGEX_State **states;
 
   /**
    * Length of the 'states' array.
@@ -226,37 +253,18 @@
   unsigned int len;
 };
 
-/**
- * Initialize a new context
- *
- * @param ctx context
- */
 static void
-GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
+debug_print_state (struct GNUNET_REGEX_State *s)
 {
-  if (NULL == ctx)
-  {
-    GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
-    return;
-  }
-  ctx->state_id = 0;
-  ctx->transition_id = 0;
-  ctx->stack_head = NULL;
-  ctx->stack_tail = NULL;
-}
-
-static void
-debug_print_state (struct State *s)
-{
   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
-              "State %i: %s marked: %i accepting: %i\n", s->id, s->name,
-              s->marked, s->accepting);
+              "State %i: %s marked: %i accepting: %i scc_id: %i\n", s->id,
+              s->name, s->marked, s->accepting, s->scc_id);
 }
 
 static void
-debug_print_states (struct StateSet *sset)
+debug_print_states (struct GNUNET_REGEX_StateSet *sset)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
   int i;
 
   for (i = 0; i < sset->len; i++)
@@ -267,7 +275,7 @@
 }
 
 static void
-debug_print_transitions (struct State *s)
+debug_print_transitions (struct GNUNET_REGEX_State *s)
 {
   struct Transition *t;
   char *state;
@@ -291,6 +299,96 @@
 }
 
 /**
+ * Recursive function doing DFS with 'v' as a start, detecting all SCCs
+ * inside the subgraph reachable from 'v'. Used with scc_tarjan function
+ * to detect all SCCs inside an automaton.
+ *
+ * @param ctx context
+ * @param v start vertex
+ * @param index current index
+ * @param stack stack for saving all SCCs
+ * @param stack_size current size of the stack
+ */
+static void
+scc_tarjan_strongconnect (struct GNUNET_REGEX_Context *ctx,
+                          struct GNUNET_REGEX_State *v, int *index,
+                          struct GNUNET_REGEX_State **stack,
+                          unsigned int *stack_size)
+{
+  struct GNUNET_REGEX_State *w;
+  struct Transition *t;
+
+  v->index = *index;
+  v->lowlink = *index;
+  (*index)++;
+  stack[(*stack_size)++] = v;
+  v->contained = 1;
+
+  for (t = v->transitions_head; NULL != t; t = t->next)
+  {
+    w = t->to_state;
+    if (NULL != w && w->index < 0)
+    {
+      scc_tarjan_strongconnect (ctx, w, index, stack, stack_size);
+      v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
+    }
+    else if (0 != w->contained)
+      v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
+  }
+
+  if (v->lowlink == v->index)
+  {
+    w = stack[--(*stack_size)];
+    w->contained = 0;
+
+    if (v != w)
+    {
+      ctx->scc_id++;
+      while (v != w)
+      {
+        w->scc_id = ctx->scc_id;
+        w = stack[--(*stack_size)];
+        w->contained = 0;
+      }
+      w->scc_id = ctx->scc_id;
+    }
+  }
+}
+
+/**
+ * Detect all SCCs (Strongly Connected Components) inside the given automaton.
+ * SCCs will be marked using the scc_id on each state.
+ *
+ * @param ctx context
+ * @param a automaton
+ */
+static void
+scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a)
+{
+  unsigned int i;
+  int index;
+  struct GNUNET_REGEX_State *v;
+  struct GNUNET_REGEX_State *stack[a->state_count];
+  unsigned int stack_size;
+
+  for (v = a->states_head; NULL != v; v = v->next)
+  {
+    v->contained = 0;
+    v->index = -1;
+    v->lowlink = -1;
+  }
+
+  stack_size = 0;
+  index = 0;
+
+  for (i = 0, v = a->states_head; NULL != v; v = v->next)
+  {
+    if (v->index < 0)
+      scc_tarjan_strongconnect (ctx, v, &index, stack, &stack_size);
+  }
+}
+
+/**
  * Compare two states. Used for sorting.
  *
  * @param a first state
@@ -303,11 +401,11 @@
 static int
 state_compare (const void *a, const void *b)
 {
-  struct State **s1;
-  struct State **s2;
+  struct GNUNET_REGEX_State **s1;
+  struct GNUNET_REGEX_State **s2;
 
-  s1 = (struct State **) a;
-  s2 = (struct State **) b;
+  s1 = (struct GNUNET_REGEX_State **) a;
+  s2 = (struct GNUNET_REGEX_State **) b;
 
   return (*s1)->id - (*s2)->id;
 }
@@ -324,7 +422,8 @@
  *         less than, equal to, or greater than the second.
  */
 static int
-state_set_compare (struct StateSet *sset1, struct StateSet *sset2)
+state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
+                   struct GNUNET_REGEX_StateSet *sset2)
 {
   int result;
   int i;
@@ -345,35 +444,12 @@
 }
 
 /**
- * Checks if 'elem' is contained in 'set'
- *
- * @param set set of states
- * @param elem state
- *
- * @return GNUNET_YES if 'set' contains 'elem, GNUNET_NO otherwise
- */
-static int
-state_set_contains (struct StateSet *set, struct State *elem)
-{
-  struct State *s;
-  int i;
-
-  for (i = 0; i < set->len; i++)
-  {
-    s = set->states[i];
-    if (0 == memcmp (s, elem, sizeof (struct State)))
-      return GNUNET_YES;
-  }
-  return GNUNET_NO;
-}
-
-/**
  * Clears the given StateSet 'set'
  *
  * @param set set to be cleared
  */
 static void
-state_set_clear (struct StateSet *set)
+state_set_clear (struct GNUNET_REGEX_StateSet *set)
 {
   if (NULL != set)
   {
@@ -393,8 +469,8 @@
  */
 static void
 state_add_transition (struct GNUNET_REGEX_Context *ctx,
-                      struct State *from_state, const char literal,
-                      struct State *to_state)
+                      struct GNUNET_REGEX_State *from_state, const char 
literal,
+                      struct GNUNET_REGEX_State *to_state)
 {
   struct Transition *t;
 
@@ -441,7 +517,7 @@
  * @param s state that should be destroyed
  */
 static void
-automaton_destroy_state (struct State *s)
+automaton_destroy_state (struct GNUNET_REGEX_State *s)
 {
   struct Transition *t;
   struct Transition *next_t;
@@ -474,10 +550,11 @@
  * @param s state to remove
  */
 static void
-automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
+automaton_remove_state (struct GNUNET_REGEX_Automaton *a,
+                        struct GNUNET_REGEX_State *s)
 {
-  struct State *ss;
-  struct State *s_check;
+  struct GNUNET_REGEX_State *ss;
+  struct GNUNET_REGEX_State *s_check;
   struct Transition *t_check;
 
   if (NULL == a || NULL == s)
@@ -516,10 +593,11 @@
  */
 static void
 automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
-                        struct GNUNET_REGEX_Automaton *a, struct State *s1,
-                        struct State *s2)
+                        struct GNUNET_REGEX_Automaton *a,
+                        struct GNUNET_REGEX_State *s1,
+                        struct GNUNET_REGEX_State *s2)
 {
-  struct State *s_check;
+  struct GNUNET_REGEX_State *s_check;
   struct Transition *t_check;
   struct Transition *t;
   char *new_name;
@@ -573,7 +651,8 @@
  * @param s state that should be added
  */
 static void
-automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
+automaton_add_state (struct GNUNET_REGEX_Automaton *a,
+                     struct GNUNET_REGEX_State *s)
 {
   GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
   a->state_count++;
@@ -588,23 +667,28 @@
  *
  * @return new DFA state
  */
-static struct State *
-dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet 
*nfa_states)
+static struct GNUNET_REGEX_State *
+dfa_state_create (struct GNUNET_REGEX_Context *ctx,
+                  struct GNUNET_REGEX_StateSet *nfa_states)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
   char *name;
   int len = 0;
-  struct State *cstate;
+  struct GNUNET_REGEX_State *cstate;
   struct Transition *ctran;
   int insert = 1;
   struct Transition *t;
   int i;
 
-  s = GNUNET_malloc (sizeof (struct State));
+  s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
   s->id = ctx->state_id++;
   s->accepting = 0;
   s->marked = 0;
   s->name = NULL;
+  s->scc_id = 0;
+  s->index = -1;
+  s->lowlink = -1;
+  s->contained = 0;
 
   if (NULL == nfa_states)
   {
@@ -676,11 +760,11 @@
  *
  * @return new state or NULL, if transition on literal not possible
  */
-static struct State *
-dfa_move (struct State *s, const char literal)
+static struct GNUNET_REGEX_State *
+dfa_move (struct GNUNET_REGEX_State *s, const char literal)
 {
   struct Transition *t;
-  struct State *new_s;
+  struct GNUNET_REGEX_State *new_s;
 
   if (NULL == s)
     return NULL;
@@ -708,10 +792,10 @@
 static void
 dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
 {
-  struct State *stack[a->state_count * a->state_count];
+  struct GNUNET_REGEX_State *stack[a->state_count * a->state_count];
   int stack_len;
-  struct State *s;
-  struct State *s_next;
+  struct GNUNET_REGEX_State *s;
+  struct GNUNET_REGEX_State *s_next;
   struct Transition *t;
 
   stack_len = 0;
@@ -755,7 +839,7 @@
 static void
 dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
   struct Transition *t;
   int dead;
 
@@ -796,8 +880,8 @@
 {
   int i;
   int table[a->state_count][a->state_count];
-  struct State *s1;
-  struct State *s2;
+  struct GNUNET_REGEX_State *s1;
+  struct GNUNET_REGEX_State *s2;
   struct Transition *t1;
   struct Transition *t2;
   int change;
@@ -854,7 +938,7 @@
     }
   }
 
-  struct State *s2_next;
+  struct GNUNET_REGEX_State *s2_next;
 
   for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
   {
@@ -902,7 +986,8 @@
  * @return new NFA fragment
  */
 static struct GNUNET_REGEX_Automaton *
-nfa_fragment_create (struct State *start, struct State *end)
+nfa_fragment_create (struct GNUNET_REGEX_State *start,
+                     struct GNUNET_REGEX_State *end)
 {
   struct GNUNET_REGEX_Automaton *n;
 
@@ -932,10 +1017,11 @@
  * @param states_tail tail of the DLL of states
  */
 static void
-nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
-                struct State *states_tail)
+nfa_add_states (struct GNUNET_REGEX_Automaton *n,
+                struct GNUNET_REGEX_State *states_head,
+                struct GNUNET_REGEX_State *states_tail)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
 
   if (NULL == n || NULL == states_head)
   {
@@ -968,15 +1054,19 @@
  *
  * @return new NFA state
  */
-static struct State *
+static struct GNUNET_REGEX_State *
 nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
 
-  s = GNUNET_malloc (sizeof (struct State));
+  s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
   s->id = ctx->state_id++;
   s->accepting = accepting;
   s->marked = 0;
+  s->contained = 0;
+  s->index = -1;
+  s->lowlink = -1;
+  s->scc_id = 0;
   s->name = NULL;
   GNUNET_asprintf (&s->name, "s%i", s->id);
 
@@ -986,27 +1076,32 @@
 /**
  * Calculates the NFA closure set for the given state.
  *
+ * @param nfa the NFA containing 's'
  * @param s starting point state
  * @param literal transitioning literal on which to base the closure on,
  *                pass 0 for epsilon transition
  *
  * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
  */
-static struct StateSet *
-nfa_closure_create (struct State *s, const char literal)
+static struct GNUNET_REGEX_StateSet *
+nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa,
+                    struct GNUNET_REGEX_State *s, const char literal)
 {
-  struct StateSet *cls;
-  struct StateSet *cls_check;
-  struct State *clsstate;
-  struct State *currentstate;
+  struct GNUNET_REGEX_StateSet *cls;
+  struct GNUNET_REGEX_StateSet *cls_check;
+  struct GNUNET_REGEX_State *clsstate;
+  struct GNUNET_REGEX_State *currentstate;
   struct Transition *ctran;
 
   if (NULL == s)
     return NULL;
 
-  cls = GNUNET_malloc (sizeof (struct StateSet));
-  cls_check = GNUNET_malloc (sizeof (struct StateSet));
+  cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
+  cls_check = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
 
+  for (clsstate = nfa->states_head; NULL != clsstate; clsstate = 
clsstate->next)
+    clsstate->contained = 0;
+
   // Add start state to closure only for epsilon closure
   if (0 == literal)
     GNUNET_array_append (cls->states, cls->len, s);
@@ -1024,11 +1119,11 @@
       {
         clsstate = ctran->to_state;
 
-        if (NULL != clsstate &&
-            GNUNET_YES != state_set_contains (cls, clsstate))
+        if (NULL != clsstate && 0 == clsstate->contained)
         {
           GNUNET_array_append (cls->states, cls->len, clsstate);
           GNUNET_array_append (cls_check->states, cls_check->len, clsstate);
+          clsstate->contained = 1;
         }
       }
     }
@@ -1037,7 +1132,8 @@
   GNUNET_free (cls_check);
 
   if (cls->len > 1)
-    qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
+    qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
+           state_compare);
 
   return cls;
 }
@@ -1045,18 +1141,21 @@
 /**
  * Calculates the closure set for the given set of states.
  *
+ * @param nfa the NFA containing 's'
  * @param states list of states on which to base the closure on
  * @param literal transitioning literal for which to base the closure on,
  *                pass 0 for epsilon transition
  *
  * @return sorted nfa closure on 'literal' (epsilon closure if 'literal' is 0)
  */
-static struct StateSet *
-nfa_closure_set_create (struct StateSet *states, const char literal)
+static struct GNUNET_REGEX_StateSet *
+nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa,
+                        struct GNUNET_REGEX_StateSet *states,
+                        const char literal)
 {
-  struct State *s;
-  struct StateSet *sset;
-  struct StateSet *cls;
+  struct GNUNET_REGEX_State *s;
+  struct GNUNET_REGEX_StateSet *sset;
+  struct GNUNET_REGEX_StateSet *cls;
   int i;
   int j;
   int k;
@@ -1065,12 +1164,12 @@
   if (NULL == states)
     return NULL;
 
-  cls = GNUNET_malloc (sizeof (struct StateSet));
+  cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
 
   for (i = 0; i < states->len; i++)
   {
     s = states->states[i];
-    sset = nfa_closure_create (s, literal);
+    sset = nfa_closure_create (nfa, s, literal);
 
     for (j = 0; j < sset->len; j++)
     {
@@ -1090,7 +1189,8 @@
   }
 
   if (cls->len > 1)
-    qsort (cls->states, cls->len, sizeof (struct State *), state_compare);
+    qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
+           state_compare);
 
   return cls;
 }
@@ -1137,8 +1237,8 @@
 {
   struct GNUNET_REGEX_Automaton *a;
   struct GNUNET_REGEX_Automaton *new;
-  struct State *start;
-  struct State *end;
+  struct GNUNET_REGEX_State *start;
+  struct GNUNET_REGEX_State *end;
 
   a = ctx->stack_tail;
   GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
@@ -1196,8 +1296,8 @@
 {
   struct GNUNET_REGEX_Automaton *a;
   struct GNUNET_REGEX_Automaton *new;
-  struct State *start;
-  struct State *end;
+  struct GNUNET_REGEX_State *start;
+  struct GNUNET_REGEX_State *end;
 
   a = ctx->stack_tail;
   GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
@@ -1237,8 +1337,8 @@
   struct GNUNET_REGEX_Automaton *a;
   struct GNUNET_REGEX_Automaton *b;
   struct GNUNET_REGEX_Automaton *new;
-  struct State *start;
-  struct State *end;
+  struct GNUNET_REGEX_State *start;
+  struct GNUNET_REGEX_State *end;
 
   b = ctx->stack_tail;
   GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
@@ -1276,8 +1376,8 @@
 nfa_add_literal (struct GNUNET_REGEX_Context *ctx, const char lit)
 {
   struct GNUNET_REGEX_Automaton *n;
-  struct State *start;
-  struct State *end;
+  struct GNUNET_REGEX_State *start;
+  struct GNUNET_REGEX_State *end;
 
   GNUNET_assert (NULL != ctx);
 
@@ -1290,6 +1390,26 @@
 }
 
 /**
+ * Initialize a new context
+ *
+ * @param ctx context
+ */
+static void
+GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx)
+{
+  if (NULL == ctx)
+  {
+    GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
+    return;
+  }
+  ctx->state_id = 0;
+  ctx->transition_id = 0;
+  ctx->scc_id = 0;
+  ctx->stack_head = NULL;
+  ctx->stack_tail = NULL;
+}
+
+/**
  * Construct an NFA by parsing the regex string of length 'len'.
  *
  * @param regex regular expression string
@@ -1451,31 +1571,6 @@
 }
 
 /**
- * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
- * data structure.
- *
- * @param a automaton to be destroyed
- */
-void
-GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
-{
-  struct State *s;
-  struct State *next_state;
-
-  if (NULL == a)
-    return;
-
-  for (s = a->states_head; NULL != s;)
-  {
-    next_state = s->next;
-    automaton_destroy_state (s);
-    s = next_state;
-  }
-
-  GNUNET_free (a);
-}
-
-/**
  * Construct DFA for the given 'regex' of length 'len'
  *
  * @param regex regular expression string
@@ -1489,14 +1584,14 @@
   struct GNUNET_REGEX_Context ctx;
   struct GNUNET_REGEX_Automaton *dfa;
   struct GNUNET_REGEX_Automaton *nfa;
-  struct StateSet *tmp;
-  struct StateSet *nfa_set;
-  struct StateSet *dfa_stack;
+  struct GNUNET_REGEX_StateSet *tmp;
+  struct GNUNET_REGEX_StateSet *nfa_set;
+  struct GNUNET_REGEX_StateSet *dfa_stack;
   struct Transition *ctran;
-  struct State *dfa_state;
-  struct State *new_dfa_state;
-  struct State *state_contains;
-  struct State *state_iter;
+  struct GNUNET_REGEX_State *dfa_state;
+  struct GNUNET_REGEX_State *new_dfa_state;
+  struct GNUNET_REGEX_State *state_contains;
+  struct GNUNET_REGEX_State *state_iter;
 
   GNUNET_REGEX_context_init (&ctx);
 
@@ -1514,8 +1609,8 @@
   dfa->type = DFA;
 
   // Create DFA start state from epsilon closure
-  dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
-  nfa_set = nfa_closure_create (nfa->start, 0);
+  dfa_stack = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
+  nfa_set = nfa_closure_create (nfa, nfa->start, 0);
   dfa->start = dfa_state_create (&ctx, nfa_set);
   automaton_add_state (dfa, dfa->start);
   GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
@@ -1531,8 +1626,8 @@
     {
       if (0 != ctran->literal && NULL == ctran->to_state)
       {
-        tmp = nfa_closure_set_create (dfa_state->nfa_set, ctran->literal);
-        nfa_set = nfa_closure_set_create (tmp, 0);
+        tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->literal);
+        nfa_set = nfa_closure_set_create (nfa, tmp, 0);
         state_set_clear (tmp);
         new_dfa_state = dfa_state_create (&ctx, nfa_set);
         state_contains = NULL;
@@ -1564,11 +1659,37 @@
   GNUNET_REGEX_automaton_destroy (nfa);
 
   dfa_minimize (&ctx, dfa);
+  scc_tarjan (&ctx, dfa);
 
   return dfa;
 }
 
 /**
+ * Free the memory allocated by constructing the GNUNET_REGEX_Automaton
+ * data structure.
+ *
+ * @param a automaton to be destroyed
+ */
+void
+GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
+{
+  struct GNUNET_REGEX_State *s;
+  struct GNUNET_REGEX_State *next_state;
+
+  if (NULL == a)
+    return;
+
+  for (s = a->states_head; NULL != s;)
+  {
+    next_state = s->next;
+    automaton_destroy_state (s);
+    s = next_state;
+  }
+
+  GNUNET_free (a);
+}
+
+/**
  * Save the given automaton as a GraphViz dot file
  *
  * @param a the automaton to be saved
@@ -1578,7 +1699,7 @@
 GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
                                    const char *filename)
 {
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
   struct Transition *ctran;
   char *s_acc = NULL;
   char *s_tran = NULL;
@@ -1614,10 +1735,19 @@
   {
     if (s->accepting)
     {
-      GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", s->name);
+      GNUNET_asprintf (&s_acc,
+                       "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 
0.95\"];\n",
+                       s->name, s->scc_id);
       fwrite (s_acc, strlen (s_acc), 1, p);
       GNUNET_free (s_acc);
     }
+    else
+    {
+      GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
+                       s->scc_id);
+      fwrite (s_acc, strlen (s_acc), 1, p);
+      GNUNET_free (s_acc);
+    }
 
     s->marked = 1;
 
@@ -1633,13 +1763,16 @@
 
       if (ctran->literal == 0)
       {
-        GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"epsilon\"];\n",
-                         s->name, ctran->to_state->name);
+        GNUNET_asprintf (&s_tran,
+                         "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 
0.8 0.95\"];\n",
+                         s->name, ctran->to_state->name, s->scc_id);
       }
       else
       {
-        GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%c\"];\n",
-                         s->name, ctran->to_state->name, ctran->literal);
+        GNUNET_asprintf (&s_tran,
+                         "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 
0.95\"];\n",
+                         s->name, ctran->to_state->name, ctran->literal,
+                         s->scc_id);
       }
 
       fwrite (s_tran, strlen (s_tran), 1, p);
@@ -1664,7 +1797,7 @@
 evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
 {
   const char *strp;
-  struct State *s;
+  struct GNUNET_REGEX_State *s;
 
   if (DFA != a->type)
   {
@@ -1700,9 +1833,9 @@
 evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
 {
   const char *strp;
-  struct State *s;
-  struct StateSet *sset;
-  struct StateSet *new_sset;
+  struct GNUNET_REGEX_State *s;
+  struct GNUNET_REGEX_StateSet *sset;
+  struct GNUNET_REGEX_StateSet *new_sset;
   int i;
   int result;
 
@@ -1715,13 +1848,13 @@
 
   result = 1;
   strp = string;
-  sset = nfa_closure_create (a->start, 0);
+  sset = nfa_closure_create (a, a->start, 0);
 
   for (strp = string; NULL != strp && *strp; strp++)
   {
-    new_sset = nfa_closure_set_create (sset, *strp);
+    new_sset = nfa_closure_set_create (a, sset, *strp);
     state_set_clear (sset);
-    sset = nfa_closure_set_create (new_sset, 0);
+    sset = nfa_closure_set_create (a, new_sset, 0);
     state_set_clear (new_sset);
   }
 
@@ -1769,3 +1902,74 @@
 
   return result;
 }
+
+/**
+ * Get the starting state of the given automaton 'a'.
+ *
+ * @param a automaton.
+ *
+ * @return starting state.
+ */
+struct GNUNET_REGEX_State *
+GNUNET_REGEX_automaton_get_start (struct GNUNET_REGEX_Automaton *a)
+{
+  return a->start;
+}
+
+/**
+ * Get the next states, starting from states 's'.
+ *
+ * @param a automaton.
+ * @param s states.
+ * @param count number of states given in 's'. Will contain number of
+ *              states that were returned upon return.
+ *
+ * @return next states, 'count' will contain the number of states.
+ */
+struct GNUNET_REGEX_State **
+GNUNET_REGEX_automaton_states_get_next (struct GNUNET_REGEX_Automaton *a,
+                                        struct GNUNET_REGEX_State **s,
+                                        unsigned int *count)
+{
+  struct GNUNET_REGEX_State *states[a->state_count];
+  struct GNUNET_REGEX_State *state;
+  struct Transition *t;
+  unsigned int cnt;
+  int i;
+
+
+  for (cnt = 0, i = 0; i < *count; i++)
+  {
+    state = s[i];
+    for (t = state->transitions_head; NULL != t; t = t->next)
+    {
+      if (NULL != t && NULL != t->to_state)
+      {
+        states[cnt++] = t->to_state;
+      }
+    }
+  }
+
+  *count = cnt;
+
+  return NULL;
+}
+
+/**
+ * Hash a set of states.
+ *
+ * @param a automaton.
+ * @param s states.
+ * @param count number of states.
+ *
+ * @return hash.
+ */
+struct GNUNET_HashCode
+GNUNET_REGEX_automaton_states_hash (struct GNUNET_REGEX_Automaton *a,
+                                    struct GNUNET_REGEX_State **s,
+                                    unsigned int count)
+{
+  struct GNUNET_HashCode hash;
+
+  return hash;
+}

Modified: gnunet/src/regex/test_regex.c
===================================================================
--- gnunet/src/regex/test_regex.c       2012-04-13 08:51:53 UTC (rev 20978)
+++ gnunet/src/regex/test_regex.c       2012-04-13 14:41:33 UTC (rev 20979)
@@ -249,6 +249,9 @@
   int check_rand;
 
   struct Regex_String_Pair rxstr[4] = {
+    {"ab?(abcd)?", 5,
+     {"ababcd", "abab", "aabcd", "a", "abb"},
+     {match, nomatch, match, match, nomatch}},
     {"ab(c|d)+c*(a(b|c)d)+", 5,
      {"abcdcdcdcdddddabd", "abcd", 
"abcddddddccccccccccccccccccccccccabdacdabd",
       "abccccca", "abcdcdcdccdabdabd"},
@@ -259,10 +262,7 @@
      {nomatch, nomatch, nomatch, nomatch, nomatch}},
     
{"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*",
 1,
      {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
-     {nomatch}},
-    {"ab?(abcd)?", 5,
-     {"ababcd", "abab", "aabcd", "a", "abb"},
-     {match, nomatch, match, match, nomatch}}
+     {nomatch}}
   };
 
   check_nfa = 0;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]