[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r22478 - gnunet/src/regex
From: |
gnunet |
Subject: |
[GNUnet-SVN] r22478 - gnunet/src/regex |
Date: |
Wed, 4 Jul 2012 15:54:43 +0200 |
Author: szengel
Date: 2012-07-04 15:54:43 +0200 (Wed, 04 Jul 2012)
New Revision: 22478
Added:
gnunet/src/regex/regex_graph.c
Modified:
gnunet/src/regex/Makefile.am
gnunet/src/regex/regex.c
gnunet/src/regex/regex_internal.h
gnunet/src/regex/test_regex_eval_api.c
gnunet/src/regex/test_regex_proofs.c
Log:
Summary: regex cleanup and bugfixes
Author: szengel
Modified: gnunet/src/regex/Makefile.am
===================================================================
--- gnunet/src/regex/Makefile.am 2012-07-04 13:49:54 UTC (rev 22477)
+++ gnunet/src/regex/Makefile.am 2012-07-04 13:54:43 UTC (rev 22478)
@@ -12,7 +12,7 @@
libgnunetregex_la_SOURCES = \
regex_internal.h regex.c \
- regex_random.c
+ regex_graph.c regex_random.c
libgnunetregex_la_LIBADD = -lm \
$(top_builddir)/src/util/libgnunetutil.la
libgnunetregex_la_LDFLAGS = \
Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c 2012-07-04 13:49:54 UTC (rev 22477)
+++ gnunet/src/regex/regex.c 2012-07-04 13:54:43 UTC (rev 22478)
@@ -28,11 +28,13 @@
#include "gnunet_regex_lib.h"
#include "regex_internal.h"
+
/**
* Constant for how many bits the initial string regex should have.
*/
#define INITIAL_BITS 10
+
/**
* Context that contains an id counter for states and transitions as well as a
* DLL of automatons used as a stack for NFA construction.
@@ -60,213 +62,8 @@
struct GNUNET_REGEX_Automaton *stack_tail;
};
-/**
- * Type of an automaton.
- */
-enum GNUNET_REGEX_AutomatonType
-{
- NFA,
- DFA
-};
/**
- * Automaton representation.
- */
-struct GNUNET_REGEX_Automaton
-{
- /**
- * Linked list of NFAs used for partial NFA creation.
- */
- struct GNUNET_REGEX_Automaton *prev;
-
- /**
- * Linked list of NFAs used for partial NFA creation.
- */
- struct GNUNET_REGEX_Automaton *next;
-
- /**
- * First state of the automaton. This is mainly used for constructing an NFA,
- * where each NFA itself consists of one or more NFAs linked together.
- */
- struct GNUNET_REGEX_State *start;
-
- /**
- * End state of the partial NFA. This is undefined for DFAs
- */
- struct GNUNET_REGEX_State *end;
-
- /**
- * Number of states in the automaton.
- */
- unsigned int state_count;
-
- /**
- * DLL of states.
- */
- struct GNUNET_REGEX_State *states_head;
-
- /**
- * DLL of states
- */
- struct GNUNET_REGEX_State *states_tail;
-
- /**
- * Type of the automaton.
- */
- enum GNUNET_REGEX_AutomatonType type;
-
- /**
- * Regex
- */
- char *regex;
-
- /**
- * Canonical regex (result of RX->NFA->DFA->RX)
- */
- char *canonical_regex;
-};
-
-/**
- * A state. Can be used in DFA and NFA automatons.
- */
-struct GNUNET_REGEX_State
-{
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *prev;
-
- /**
- * This is a linked list.
- */
- struct GNUNET_REGEX_State *next;
-
- /**
- * Unique state id.
- */
- unsigned int id;
-
- /**
- * If this is an accepting state or not.
- */
- int accepting;
-
- /**
- * Marking of the state. This is used for marking all visited states when
- * traversing all states of an automaton and for cases where the state id
- * cannot be used (dfa minimization).
- */
- int marked;
-
- /**
- * Marking the state as contained. This is used for checking, if the state is
- * contained in a set in constant time
- */
- int contained;
-
- /**
- * Marking the state as part of an SCC (Strongly Connected Component). All
- * states with the same scc_id are part of the same SCC. scc_id is 0, if
state
- * is not a part of any SCC.
- */
- unsigned int scc_id;
-
- /**
- * Used for SCC detection.
- */
- int index;
-
- /**
- * Used for SCC detection.
- */
- int lowlink;
-
- /**
- * Human readable name of the automaton. Used for debugging and graph
- * creation.
- */
- char *name;
-
- /**
- * Hash of the state.
- */
- struct GNUNET_HashCode hash;
-
- /**
- * State ID for proof creation.
- */
- unsigned int proof_id;
-
- /**
- * Proof for this state.
- */
- char *proof;
-
- /**
- * Number of transitions from this state to other states.
- */
- unsigned int transition_count;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_head;
-
- /**
- * DLL of transitions.
- */
- struct Transition *transitions_tail;
-
- /**
- * Set of states on which this state is based on. Used when creating a DFA
out
- * of several NFA states.
- */
- struct GNUNET_REGEX_StateSet *nfa_set;
-};
-
-/**
- * Transition between two states. Each state can have 0-n transitions. If
label
- * is 0, this is considered to be an epsilon transition.
- */
-struct Transition
-{
- /**
- * This is a linked list.
- */
- struct Transition *prev;
-
- /**
- * This is a linked list.
- */
- struct Transition *next;
-
- /**
- * Unique id of this transition.
- */
- unsigned int id;
-
- /**
- * Label for this transition. This is basically the edge label for the graph.
- */
- char label;
-
- /**
- * State to which this transition leads.
- */
- struct GNUNET_REGEX_State *to_state;
-
- /**
- * State from which this transition origins.
- */
- struct GNUNET_REGEX_State *from_state;
-
- /**
- * Mark this transition. For example when reversing the automaton.
- */
- int mark;
-};
-
-/**
* Set of states.
*/
struct GNUNET_REGEX_StateSet
@@ -282,6 +79,7 @@
unsigned int len;
};
+
/*
* Debug helper functions
*/
@@ -294,6 +92,7 @@
void
debug_print_transitions (struct GNUNET_REGEX_State *s);
+
/**
* Print information of the given state 's'.
*
@@ -318,6 +117,7 @@
debug_print_transitions (s);
}
+
/**
* Print debug information for all states contained in the automaton 'a'.
*
@@ -332,13 +132,14 @@
debug_print_state (s);
}
+
/**
* Print debug information for given transition 't'.
*
* @param t transition for which to print debug info.
*/
void
-debug_print_transition (struct Transition *t)
+debug_print_transition (struct GNUNET_REGEX_Transition *t)
{
char *to_state;
char *from_state;
@@ -366,10 +167,11 @@
t->id, from_state, label, to_state);
}
+
void
debug_print_transitions (struct GNUNET_REGEX_State *s)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
for (t = s->transitions_head; NULL != t; t = t->next)
debug_print_transition (t);
@@ -377,97 +179,6 @@
/**
- * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
- * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
- * SCCs inside an automaton.
- *
- * @param scc_counter counter for numbering the sccs
- * @param v start vertex
- * @param index current index
- * @param stack stack for saving all SCCs
- * @param stack_size current size of the stack
- */
-static void
-scc_tarjan_strongconnect (unsigned int *scc_counter,
- struct GNUNET_REGEX_State *v, unsigned int *index,
- struct GNUNET_REGEX_State **stack,
- unsigned int *stack_size)
-{
- struct GNUNET_REGEX_State *w;
- struct Transition *t;
-
- v->index = *index;
- v->lowlink = *index;
- (*index)++;
- stack[(*stack_size)++] = v;
- v->contained = 1;
-
- for (t = v->transitions_head; NULL != t; t = t->next)
- {
- w = t->to_state;
- if (NULL != w && w->index < 0)
- {
- scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size);
- v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
- }
- else if (0 != w->contained)
- v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
- }
-
- if (v->lowlink == v->index)
- {
- w = stack[--(*stack_size)];
- w->contained = 0;
-
- if (v != w)
- {
- (*scc_counter)++;
- while (v != w)
- {
- w->scc_id = *scc_counter;
- w = stack[--(*stack_size)];
- w->contained = 0;
- }
- w->scc_id = *scc_counter;
- }
- }
-}
-
-
-/**
- * Detect all SCCs (Strongly Connected Components) inside the given automaton.
- * SCCs will be marked using the scc_id on each state.
- *
- * @param a the automaton for which SCCs should be computed and assigned.
- */
-static void
-scc_tarjan (struct GNUNET_REGEX_Automaton *a)
-{
- unsigned int index;
- unsigned int scc_counter;
- struct GNUNET_REGEX_State *v;
- struct GNUNET_REGEX_State *stack[a->state_count];
- unsigned int stack_size;
-
- for (v = a->states_head; NULL != v; v = v->next)
- {
- v->contained = 0;
- v->index = -1;
- v->lowlink = -1;
- }
-
- stack_size = 0;
- index = 0;
- scc_counter = 0;
-
- for (v = a->states_head; NULL != v; v = v->next)
- {
- if (v->index < 0)
- scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size);
- }
-}
-
-/**
* Adds a transition from one state to another on 'label'. Does not add
* duplicate states.
*
@@ -482,8 +193,8 @@
struct GNUNET_REGEX_State *to_state)
{
int is_dup;
- struct Transition *t;
- struct Transition *oth;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *oth;
if (NULL == from_state)
{
@@ -513,7 +224,7 @@
break;
}
- t = GNUNET_malloc (sizeof (struct Transition));
+ t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
t->id = ctx->transition_id++;
t->label = label;
t->to_state = to_state;
@@ -525,14 +236,16 @@
from_state->transitions_tail, oth, t);
}
-/**
+
+/**
* Remove a 'transition' from 'state'.
- *
+ *
* @param state state from which the to-be-removed transition originates.
* @param transition transition that should be removed from state 'state'.
*/
static void
-state_remove_transition (struct GNUNET_REGEX_State *state, struct Transition
*transition)
+state_remove_transition (struct GNUNET_REGEX_State *state,
+ struct GNUNET_REGEX_Transition *transition)
{
if (NULL == state || NULL == transition)
return;
@@ -541,10 +254,12 @@
return;
state->transition_count--;
- GNUNET_CONTAINER_DLL_remove (state->transitions_head,
state->transitions_tail, transition);
+ GNUNET_CONTAINER_DLL_remove (state->transitions_head,
state->transitions_tail,
+ transition);
GNUNET_free (transition);
}
+
/**
* Compare two states. Used for sorting.
*
@@ -567,6 +282,7 @@
return (*s1)->id - (*s2)->id;
}
+
/**
* Get all edges leaving state 's'.
*
@@ -578,7 +294,7 @@
static unsigned int
state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
unsigned int count;
if (NULL == s)
@@ -598,6 +314,7 @@
return count;
}
+
/**
* Compare to state sets by comparing the id's of the states that are contained
* in each set. Both sets are expected to be sorted by id!
@@ -631,6 +348,7 @@
return result;
}
+
/**
* Clears the given StateSet 'set'
*
@@ -646,6 +364,7 @@
}
}
+
/**
* Clears an automaton fragment. Does not destroy the states inside the
* automaton.
@@ -666,6 +385,7 @@
GNUNET_free (a);
}
+
/**
* Frees the memory used by State 's'
*
@@ -674,8 +394,8 @@
static void
automaton_destroy_state (struct GNUNET_REGEX_State *s)
{
- struct Transition *t;
- struct Transition *next_t;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *next_t;
if (NULL == s)
return;
@@ -695,6 +415,7 @@
GNUNET_free (s);
}
+
/**
* Remove a state from the given automaton 'a'. Always use this function when
* altering the states of an automaton. Will also remove all transitions
leading
@@ -709,7 +430,7 @@
{
struct GNUNET_REGEX_State *ss;
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
+ struct GNUNET_REGEX_Transition *t_check;
if (NULL == a || NULL == s)
return;
@@ -737,6 +458,7 @@
automaton_destroy_state (ss);
}
+
/**
* Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
* 's2'.
@@ -753,9 +475,9 @@
struct GNUNET_REGEX_State *s2)
{
struct GNUNET_REGEX_State *s_check;
- struct Transition *t_check;
- struct Transition *t;
- struct Transition *t_next;
+ struct GNUNET_REGEX_Transition *t_check;
+ struct GNUNET_REGEX_Transition *t;
+ struct GNUNET_REGEX_Transition *t_next;
char *new_name;
int is_dup;
@@ -806,6 +528,7 @@
automaton_destroy_state (s2);
}
+
/**
* Add a state to the automaton 'a', always use this function to alter the
* states DLL of the automaton.
@@ -821,15 +544,6 @@
a->state_count++;
}
-/**
- * Function that is called with each state, when traversing an automaton.
- *
- * @param cls closure.
- * @param count current count of the state, from 0 to a->state_count -1.
- * @param s state.
- */
-typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count,
- struct GNUNET_REGEX_State * s);
/**
* Depth-first traversal of all states that are reachable from state 's'.
Expects the states to
@@ -845,7 +559,7 @@
automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count,
GNUNET_REGEX_traverse_action action, void
*action_cls)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
if (GNUNET_NO != s->marked)
return;
@@ -866,9 +580,10 @@
* @param action action to be performed on each state.
* @param action_cls closure for action
*/
-static void
-automaton_traverse (struct GNUNET_REGEX_Automaton *a,
- GNUNET_REGEX_traverse_action action, void *action_cls)
+void
+GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a,
+ GNUNET_REGEX_traverse_action action,
+ void *action_cls)
{
unsigned int count;
struct GNUNET_REGEX_State *s;
@@ -884,9 +599,6 @@
* Check if the given string 'str' needs parentheses around it when
* using it to generate a regex.
*
- * Currently only tests for first and last characters being '()' respectively.
- * FIXME: What about "(ab)|(cd)"?
- *
* @param str string
*
* @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
@@ -932,12 +644,9 @@
/**
* Remove parentheses surrounding string 'str'.
- * Example: "(a)" becomes "a".
+ * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
* You need to GNUNET_free the returned string.
*
- * Currently only tests for first and last characters being '()' respectively.
- * FIXME: What about "(ab)|(cd)"?
- *
* @param str string, free'd or re-used by this function, can be NULL
*
* @return string without surrounding parentheses, string 'str' if no preceding
@@ -947,12 +656,18 @@
remove_parentheses (char *str)
{
size_t slen;
+ const char *pos;
if ((NULL == str) || ('(' != str[0]) ||
(str[(slen = strlen (str)) - 1] != ')'))
return str;
- memmove (str, &str[1], slen - 2);
- str[slen - 2] = '\0';
+
+ pos = strchr (&str[1], ')');
+ if (pos == &str[slen - 1])
+ {
+ memmove (str, &str[1], slen - 2);
+ str[slen - 2] = '\0';
+ }
return str;
}
@@ -999,6 +714,7 @@
return GNUNET_strdup (str);
}
+
/**
* Compare 'str1', starting from position 'k', with whole 'str2'
*
@@ -1034,8 +750,9 @@
return strcmp (str1, str2);
}
+
/**
- * Helper function used as 'action' in 'automaton_traverse' function to create
+ * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse'
function to create
* the depth-first numbering of the states.
*
* @param cls states array.
@@ -1051,11 +768,12 @@
states[count] = s;
}
-/**
+
+/**
* Construct the regular expression given the inductive step,
* $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
* R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
- *
+ *
* @param R_last_ij value of $R^{(k-1)_{ij}.
* @param R_last_ik value of $R^{(k-1)_{ik}.
* @param R_last_kk value of $R^{(k-1)_{kk}.
@@ -1390,7 +1108,7 @@
GNUNET_free_non_null (R_temp_ik);
GNUNET_free_non_null (R_temp_kk);
GNUNET_free_non_null (R_temp_kj);
-
+
if (NULL == R_cur_l && NULL == R_cur_r)
{
*R_cur_ij = NULL;
@@ -1417,10 +1135,12 @@
}
GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r);
+
GNUNET_free (R_cur_l);
GNUNET_free (R_cur_r);
}
+
/**
* create proofs for all states in the given automaton. Implementation of the
* algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
@@ -1436,7 +1156,7 @@
char *R_last[n][n];
char *R_cur[n][n];
char *temp;
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
char *complete_regex;
unsigned int i;
unsigned int j;
@@ -1444,7 +1164,7 @@
/* create depth-first numbering of the states, initializes 'state' */
- automaton_traverse (a, &number_states, states);
+ GNUNET_REGEX_automaton_traverse (a, &number_states, states);
/* Compute regular expressions of length "1" between each pair of states */
for (i = 0; i < n; i++)
@@ -1547,13 +1267,6 @@
}
a->canonical_regex = complete_regex;
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
- "---------------------------------------------\n");
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex: %s\n", a->regex);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Complete Regex: %s\n", complete_regex);
- GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
- "---------------------------------------------\n");
-
// cleanup
for (i = 0; i < n; i++)
{
@@ -1562,6 +1275,7 @@
}
}
+
/**
* Creates a new DFA state based on a set of NFA states. Needs to be freed
using
* automaton_destroy_state.
@@ -1579,7 +1293,7 @@
char *name;
int len = 0;
struct GNUNET_REGEX_State *cstate;
- struct Transition *ctran;
+ struct GNUNET_REGEX_Transition *ctran;
unsigned int i;
s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
@@ -1641,6 +1355,7 @@
return s;
}
+
/**
* Move from the given state 's' to the next state on transition 'label'
*
@@ -1652,7 +1367,7 @@
static struct GNUNET_REGEX_State *
dfa_move (struct GNUNET_REGEX_State *s, const char label)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
struct GNUNET_REGEX_State *new_s;
if (NULL == s)
@@ -1672,6 +1387,7 @@
return new_s;
}
+
/**
* Remove all unreachable states from DFA 'a'. Unreachable states are those
* states that are not reachable from the starting state.
@@ -1689,7 +1405,7 @@
s->marked = GNUNET_NO;
// 2. traverse dfa from start state and mark all visited states
- automaton_traverse (a, NULL, NULL);
+ GNUNET_REGEX_automaton_traverse (a, NULL, NULL);
// 3. delete all states that were not visited
for (s = a->states_head; NULL != s; s = s_next)
@@ -1700,6 +1416,7 @@
}
}
+
/**
* Remove all dead states from the DFA 'a'. Dead states are those states that
do
* not transition to any other state but themselfes.
@@ -1710,7 +1427,7 @@
dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
{
struct GNUNET_REGEX_State *s;
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
int dead;
GNUNET_assert (DFA == a->type);
@@ -1738,6 +1455,7 @@
}
}
+
/**
* Merge all non distinguishable states in the DFA 'a'
*
@@ -1752,8 +1470,8 @@
int table[a->state_count][a->state_count];
struct GNUNET_REGEX_State *s1;
struct GNUNET_REGEX_State *s2;
- struct Transition *t1;
- struct Transition *t2;
+ struct GNUNET_REGEX_Transition *t1;
+ struct GNUNET_REGEX_Transition *t2;
struct GNUNET_REGEX_State *s1_next;
struct GNUNET_REGEX_State *s2_next;
int change;
@@ -1832,6 +1550,7 @@
}
}
+
/**
* Minimize the given DFA 'a' by removing all unreachable states, removing all
* dead states and merging all non distinguishable states
@@ -1858,6 +1577,7 @@
dfa_merge_nondistinguishable_states (ctx, a);
}
+
/**
* Creates a new NFA fragment. Needs to be cleared using
* automaton_fragment_clear.
@@ -1891,6 +1611,7 @@
return n;
}
+
/**
* Adds a list of states to the given automaton 'n'.
*
@@ -1928,6 +1649,7 @@
n->state_count++;
}
+
/**
* Creates a new NFA state. Needs to be freed using automaton_destroy_state.
*
@@ -1955,6 +1677,7 @@
return s;
}
+
/**
* Calculates the NFA closure set for the given state.
*
@@ -1973,7 +1696,7 @@
struct GNUNET_REGEX_StateSet *cls_check;
struct GNUNET_REGEX_State *clsstate;
struct GNUNET_REGEX_State *currentstate;
- struct Transition *ctran;
+ struct GNUNET_REGEX_Transition *ctran;
if (NULL == s)
return NULL;
@@ -2021,6 +1744,7 @@
return cls;
}
+
/**
* Calculates the closure set for the given set of states.
*
@@ -2077,6 +1801,7 @@
return cls;
}
+
/**
* Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
*
@@ -2109,6 +1834,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
}
+
/**
* Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
*
@@ -2150,6 +1876,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
}
+
/**
* Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
*
@@ -2168,6 +1895,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
}
+
/**
* Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
*
@@ -2207,6 +1935,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
}
+
/**
* Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment
that
* alternates between a and b (a|b)
@@ -2248,6 +1977,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new);
}
+
/**
* Adds a new nfa fragment to the stack
*
@@ -2271,6 +2001,7 @@
GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
}
+
/**
* Initialize a new context
*
@@ -2290,6 +2021,7 @@
ctx->stack_tail = NULL;
}
+
/**
* Construct an NFA by parsing the regex string of length 'len'.
*
@@ -2452,6 +2184,7 @@
return NULL;
}
+
/**
* Create DFA states based on given 'nfa' and starting with 'dfa_state'.
*
@@ -2467,7 +2200,7 @@
struct GNUNET_REGEX_Automaton *dfa,
struct GNUNET_REGEX_State *dfa_state)
{
- struct Transition *ctran;
+ struct GNUNET_REGEX_Transition *ctran;
struct GNUNET_REGEX_State *state_iter;
struct GNUNET_REGEX_State *new_dfa_state;
struct GNUNET_REGEX_State *state_contains;
@@ -2505,6 +2238,7 @@
}
}
+
/**
* Construct DFA for the given 'regex' of length 'len'
*
@@ -2555,6 +2289,7 @@
return dfa;
}
+
/**
* Free the memory allocated by constructing the GNUNET_REGEX_Automaton data
* structure.
@@ -2583,134 +2318,8 @@
GNUNET_free (a);
}
-/**
- * Save a state to an open file pointer. cls is expected to be a file pointer
to
- * an open file. Used only in conjunction with
- * GNUNET_REGEX_automaton_save_graph.
- *
- * @param cls file pointer.
- * @param count current count of the state, not used.
- * @param s state.
- */
-void
-GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
- struct GNUNET_REGEX_State *s)
-{
- FILE *p;
- struct Transition *ctran;
- char *s_acc = NULL;
- char *s_tran = NULL;
- p = cls;
-
- if (s->accepting)
- {
- GNUNET_asprintf (&s_acc,
- "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8
0.95\"];\n",
- s->name, s->proof_id, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n",
s->name,
- s->proof_id, s->scc_id);
- }
-
- if (NULL == s_acc)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
s->name);
- return;
- }
- fwrite (s_acc, strlen (s_acc), 1, p);
- GNUNET_free (s_acc);
- s_acc = NULL;
-
- for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
- {
- if (NULL == ctran->to_state)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Transition from State %i has no state for transitioning\n",
- s->id);
- continue;
- }
-
- if (ctran->label == 0)
- {
- GNUNET_asprintf (&s_tran,
- "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\",
color=\"0.%i 0.8 0.95\"];\n",
- s->name, s->proof_id, ctran->to_state->name,
- ctran->to_state->proof_id, s->scc_id);
- }
- else
- {
- GNUNET_asprintf (&s_tran,
- "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i
0.8 0.95\"];\n",
- s->name, s->proof_id, ctran->to_state->name,
- ctran->to_state->proof_id, ctran->label, s->scc_id);
- }
-
- if (NULL == s_tran)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
- s->name);
- return;
- }
-
- fwrite (s_tran, strlen (s_tran), 1, p);
- GNUNET_free (s_tran);
- s_tran = NULL;
- }
-}
-
/**
- * Save the given automaton as a GraphViz dot file
- *
- * @param a the automaton to be saved
- * @param filename where to save the file
- */
-void
-GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
- const char *filename)
-{
- char *start;
- char *end;
- FILE *p;
-
- if (NULL == a)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
- return;
- }
-
- if (NULL == filename || strlen (filename) < 1)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
- return;
- }
-
- p = fopen (filename, "w");
-
- if (NULL == p)
- {
- GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
- filename);
- return;
- }
-
- /* First add the SCCs to the automaton, so we can color them nicely */
- scc_tarjan (a);
-
- start = "digraph G {\nrankdir=LR\n";
- fwrite (start, strlen (start), 1, p);
-
- automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step, p);
-
- end = "\n}\n";
- fwrite (end, strlen (end), 1, p);
- fclose (p);
-}
-
-/**
* Evaluates the given string using the given DFA automaton
*
* @param a automaton, type must be DFA
@@ -2750,6 +2359,7 @@
return 1;
}
+
/**
* Evaluates the given string using the given NFA automaton
*
@@ -2805,6 +2415,7 @@
return result;
}
+
/**
* Evaluates the given 'string' against the given compiled regex
*
@@ -2857,6 +2468,7 @@
return a->canonical_regex;
}
+
/**
* Get the first key for the given 'input_string'. This hashes the first x bits
* of the 'input_strings'.
@@ -2887,6 +2499,7 @@
return size;
}
+
/**
* Check if the given 'proof' matches the given 'key'.
*
@@ -2901,6 +2514,7 @@
return GNUNET_OK;
}
+
/**
* Iterate over all edges helper function starting from state 's', calling
* iterator on for each edge.
@@ -2913,7 +2527,7 @@
iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator,
void *iterator_cls)
{
- struct Transition *t;
+ struct GNUNET_REGEX_Transition *t;
struct GNUNET_REGEX_Edge edges[s->transition_count];
unsigned int num_edges;
@@ -2930,6 +2544,7 @@
}
}
+
/**
* Iterate over all edges starting from start state of automaton 'a'. Calling
* iterator for each edge.
Added: gnunet/src/regex/regex_graph.c
===================================================================
--- gnunet/src/regex/regex_graph.c (rev 0)
+++ gnunet/src/regex/regex_graph.c 2012-07-04 13:54:43 UTC (rev 22478)
@@ -0,0 +1,248 @@
+/*
+ This file is part of GNUnet
+ (C) 2012 Christian Grothoff (and other contributing authors)
+
+ GNUnet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GNUnet is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNUnet; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA.
+*/
+/**
+ * @file src/regex/regex_graph.c
+ * @brief functions for creating .dot graphs from regexes
+ * @author Maximilian Szengel
+ */
+#include "platform.h"
+#include "gnunet_regex_lib.h"
+#include "regex_internal.h"
+
+/**
+ * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside
+ * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all
+ * SCCs inside an automaton.
+ *
+ * @param scc_counter counter for numbering the sccs
+ * @param v start vertex
+ * @param index current index
+ * @param stack stack for saving all SCCs
+ * @param stack_size current size of the stack
+ */
+static void
+scc_tarjan_strongconnect (unsigned int *scc_counter,
+ struct GNUNET_REGEX_State *v, unsigned int *index,
+ struct GNUNET_REGEX_State **stack,
+ unsigned int *stack_size)
+{
+ struct GNUNET_REGEX_State *w;
+ struct GNUNET_REGEX_Transition *t;
+
+ v->index = *index;
+ v->lowlink = *index;
+ (*index)++;
+ stack[(*stack_size)++] = v;
+ v->contained = 1;
+
+ for (t = v->transitions_head; NULL != t; t = t->next)
+ {
+ w = t->to_state;
+ if (NULL != w && w->index < 0)
+ {
+ scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size);
+ v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink;
+ }
+ else if (0 != w->contained)
+ v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink;
+ }
+
+ if (v->lowlink == v->index)
+ {
+ w = stack[--(*stack_size)];
+ w->contained = 0;
+
+ if (v != w)
+ {
+ (*scc_counter)++;
+ while (v != w)
+ {
+ w->scc_id = *scc_counter;
+ w = stack[--(*stack_size)];
+ w->contained = 0;
+ }
+ w->scc_id = *scc_counter;
+ }
+ }
+}
+
+
+/**
+ * Detect all SCCs (Strongly Connected Components) inside the given automaton.
+ * SCCs will be marked using the scc_id on each state.
+ *
+ * @param a the automaton for which SCCs should be computed and assigned.
+ */
+static void
+scc_tarjan (struct GNUNET_REGEX_Automaton *a)
+{
+ unsigned int index;
+ unsigned int scc_counter;
+ struct GNUNET_REGEX_State *v;
+ struct GNUNET_REGEX_State *stack[a->state_count];
+ unsigned int stack_size;
+
+ for (v = a->states_head; NULL != v; v = v->next)
+ {
+ v->contained = 0;
+ v->index = -1;
+ v->lowlink = -1;
+ }
+
+ stack_size = 0;
+ index = 0;
+ scc_counter = 0;
+
+ for (v = a->states_head; NULL != v; v = v->next)
+ {
+ if (v->index < 0)
+ scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size);
+ }
+}
+
+
+/**
+ * Save a state to an open file pointer. cls is expected to be a file pointer
to
+ * an open file. Used only in conjunction with
+ * GNUNET_REGEX_automaton_save_graph.
+ *
+ * @param cls file pointer.
+ * @param count current count of the state, not used.
+ * @param s state.
+ */
+void
+GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
+ struct GNUNET_REGEX_State *s)
+{
+ FILE *p;
+ struct GNUNET_REGEX_Transition *ctran;
+ char *s_acc = NULL;
+ char *s_tran = NULL;
+
+ p = cls;
+
+ if (s->accepting)
+ {
+ GNUNET_asprintf (&s_acc,
+ "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8
0.95\"];\n",
+ s->name, s->proof_id, s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n",
s->name,
+ s->proof_id, s->scc_id);
+ }
+
+ if (NULL == s_acc)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
s->name);
+ return;
+ }
+ fwrite (s_acc, strlen (s_acc), 1, p);
+ GNUNET_free (s_acc);
+ s_acc = NULL;
+
+ for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
+ {
+ if (NULL == ctran->to_state)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+ "Transition from State %i has no state for transitioning\n",
+ s->id);
+ continue;
+ }
+
+ if (ctran->label == 0)
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\",
color=\"0.%i 0.8 0.95\"];\n",
+ s->name, s->proof_id, ctran->to_state->name,
+ ctran->to_state->proof_id, s->scc_id);
+ }
+ else
+ {
+ GNUNET_asprintf (&s_tran,
+ "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i
0.8 0.95\"];\n",
+ s->name, s->proof_id, ctran->to_state->name,
+ ctran->to_state->proof_id, ctran->label, s->scc_id);
+ }
+
+ if (NULL == s_tran)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n",
+ s->name);
+ return;
+ }
+
+ fwrite (s_tran, strlen (s_tran), 1, p);
+ GNUNET_free (s_tran);
+ s_tran = NULL;
+ }
+}
+
+
+/**
+ * Save the given automaton as a GraphViz dot file
+ *
+ * @param a the automaton to be saved
+ * @param filename where to save the file
+ */
+void
+GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
+ const char *filename)
+{
+ char *start;
+ char *end;
+ FILE *p;
+
+ if (NULL == a)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
+ return;
+ }
+
+ if (NULL == filename || strlen (filename) < 1)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
+ return;
+ }
+
+ p = fopen (filename, "w");
+
+ if (NULL == p)
+ {
+ GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
+ filename);
+ return;
+ }
+
+ /* First add the SCCs to the automaton, so we can color them nicely */
+ scc_tarjan (a);
+
+ start = "digraph G {\nrankdir=LR\n";
+ fwrite (start, strlen (start), 1, p);
+
+ GNUNET_REGEX_automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step,
+ p);
+
+ end = "\n}\n";
+ fwrite (end, strlen (end), 1, p);
+ fclose (p);
+}
Modified: gnunet/src/regex/regex_internal.h
===================================================================
--- gnunet/src/regex/regex_internal.h 2012-07-04 13:49:54 UTC (rev 22477)
+++ gnunet/src/regex/regex_internal.h 2012-07-04 13:54:43 UTC (rev 22478)
@@ -43,6 +43,241 @@
/**
+ * Transition between two states. Each state can have 0-n transitions. If
label
+ * is 0, this is considered to be an epsilon transition.
+ */
+struct GNUNET_REGEX_Transition
+{
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_Transition *prev;
+
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_Transition *next;
+
+ /**
+ * Unique id of this transition.
+ */
+ unsigned int id;
+
+ /**
+ * Label for this transition. This is basically the edge label for the graph.
+ */
+ char label;
+
+ /**
+ * State to which this transition leads.
+ */
+ struct GNUNET_REGEX_State *to_state;
+
+ /**
+ * State from which this transition origins.
+ */
+ struct GNUNET_REGEX_State *from_state;
+
+ /**
+ * Mark this transition. For example when reversing the automaton.
+ */
+ int mark;
+};
+
+
+/**
+ * A state. Can be used in DFA and NFA automatons.
+ */
+struct GNUNET_REGEX_State
+{
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_State *prev;
+
+ /**
+ * This is a linked list.
+ */
+ struct GNUNET_REGEX_State *next;
+
+ /**
+ * Unique state id.
+ */
+ unsigned int id;
+
+ /**
+ * If this is an accepting state or not.
+ */
+ int accepting;
+
+ /**
+ * Marking of the state. This is used for marking all visited states when
+ * traversing all states of an automaton and for cases where the state id
+ * cannot be used (dfa minimization).
+ */
+ int marked;
+
+ /**
+ * Marking the state as contained. This is used for checking, if the state is
+ * contained in a set in constant time
+ */
+ int contained;
+
+ /**
+ * Marking the state as part of an SCC (Strongly Connected Component). All
+ * states with the same scc_id are part of the same SCC. scc_id is 0, if
state
+ * is not a part of any SCC.
+ */
+ unsigned int scc_id;
+
+ /**
+ * Used for SCC detection.
+ */
+ int index;
+
+ /**
+ * Used for SCC detection.
+ */
+ int lowlink;
+
+ /**
+ * Human readable name of the automaton. Used for debugging and graph
+ * creation.
+ */
+ char *name;
+
+ /**
+ * Hash of the state.
+ */
+ struct GNUNET_HashCode hash;
+
+ /**
+ * State ID for proof creation.
+ */
+ unsigned int proof_id;
+
+ /**
+ * Proof for this state.
+ */
+ char *proof;
+
+ /**
+ * Number of transitions from this state to other states.
+ */
+ unsigned int transition_count;
+
+ /**
+ * DLL of transitions.
+ */
+ struct GNUNET_REGEX_Transition *transitions_head;
+
+ /**
+ * DLL of transitions.
+ */
+ struct GNUNET_REGEX_Transition *transitions_tail;
+
+ /**
+ * Set of states on which this state is based on. Used when creating a DFA
out
+ * of several NFA states.
+ */
+ struct GNUNET_REGEX_StateSet *nfa_set;
+};
+
+
+/**
+ * Type of an automaton.
+ */
+enum GNUNET_REGEX_AutomatonType
+{
+ NFA,
+ DFA
+};
+
+
+/**
+ * Automaton representation.
+ */
+struct GNUNET_REGEX_Automaton
+{
+ /**
+ * Linked list of NFAs used for partial NFA creation.
+ */
+ struct GNUNET_REGEX_Automaton *prev;
+
+ /**
+ * Linked list of NFAs used for partial NFA creation.
+ */
+ struct GNUNET_REGEX_Automaton *next;
+
+ /**
+ * First state of the automaton. This is mainly used for constructing an NFA,
+ * where each NFA itself consists of one or more NFAs linked together.
+ */
+ struct GNUNET_REGEX_State *start;
+
+ /**
+ * End state of the partial NFA. This is undefined for DFAs
+ */
+ struct GNUNET_REGEX_State *end;
+
+ /**
+ * Number of states in the automaton.
+ */
+ unsigned int state_count;
+
+ /**
+ * DLL of states.
+ */
+ struct GNUNET_REGEX_State *states_head;
+
+ /**
+ * DLL of states
+ */
+ struct GNUNET_REGEX_State *states_tail;
+
+ /**
+ * Type of the automaton.
+ */
+ enum GNUNET_REGEX_AutomatonType type;
+
+ /**
+ * Regex
+ */
+ char *regex;
+
+ /**
+ * Canonical regex (result of RX->NFA->DFA->RX)
+ */
+ char *canonical_regex;
+};
+
+
+/**
+ * Function that is called with each state, when traversing an automaton.
+ *
+ * @param cls closure.
+ * @param count current count of the state, from 0 to a->state_count -1.
+ * @param s state.
+ */
+typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count,
+ struct GNUNET_REGEX_State * s);
+
+
+/**
+ * Traverses the given automaton from it's start state, visiting all reachable
+ * states and calling 'action' on each one of them.
+ *
+ * @param a automaton.
+ * @param action action to be performed on each state.
+ * @param action_cls closure for action
+ */
+void
+GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a,
+ GNUNET_REGEX_traverse_action action,
+ void *action_cls);
+
+
+/**
* Get the canonical regex of the given automaton.
* When constructing the automaton a proof is computed for each state,
* consisting of the regular expression leading to this state. A complete
Modified: gnunet/src/regex/test_regex_eval_api.c
===================================================================
--- gnunet/src/regex/test_regex_eval_api.c 2012-07-04 13:49:54 UTC (rev
22477)
+++ gnunet/src/regex/test_regex_eval_api.c 2012-07-04 13:54:43 UTC (rev
22478)
@@ -227,7 +227,7 @@
int check_rand;
char *check_proof;
- struct Regex_String_Pair rxstr[14] = {
+ struct Regex_String_Pair rxstr[16] = {
{"ab?(abcd)?", 5,
{"ababcd", "abab", "aabcd", "a", "abb"},
{match, nomatch, match, match, nomatch}},
@@ -270,6 +270,12 @@
{"(ab|c)+", 7,
{"", "ab", "c", "abc", "ababcc", "acc", "abac"},
{nomatch, match, match, match, match, nomatch, nomatch}},
+ {"((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1,
+ {"", "2j2jADK", "j2jADK"},
+ {nomatch, match, match}},
+ {"((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2,
+ {"", "2j2jjADK", "j2jADK"},
+ {nomatch, match, match}},
{"ab(c|d)+c*(a(b|c)d)+", 1,
{"abacd"},
{nomatch}}
@@ -279,7 +285,7 @@
check_dfa = 0;
check_rand = 0;
- for (i = 0; i < 14; i++)
+ for (i = 0; i < 16; i++)
{
if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
{
Modified: gnunet/src/regex/test_regex_proofs.c
===================================================================
--- gnunet/src/regex/test_regex_proofs.c 2012-07-04 13:49:54 UTC (rev
22477)
+++ gnunet/src/regex/test_regex_proofs.c 2012-07-04 13:54:43 UTC (rev
22478)
@@ -106,16 +106,15 @@
unsigned int i;
unsigned int error;
- const char *regex[6] = {
+ const char *regex[8] = {
"a|aa*a",
"a+",
"a*",
"a*a*",
"(F*C|WfPf|y+F*C)",
"y*F*C|WfPf",
- /* "2?jA?e?D*K", */
- /* "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", */
- /* "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", */
+ "((a|b)c|(a|b)(d|(a|b)e))",
+ "((a|b)(c|d)|(a|b)(a|b)e)"
};
const char *canon_rx1;
@@ -125,7 +124,7 @@
error = 0;
- for (i = 0; i < 6; i += 2)
+ for (i = 0; i < 8; i += 2)
{
dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]));
@@ -138,12 +137,8 @@
if (error > 0)
{
GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
- "Comparing canonical regex failed:\nrx1: %s\ncrx1: %s\nrx2:
%s\ncrx2: %s\n",
+ "Comparing canonical regex
failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n",
regex[i], canon_rx1, regex[i + 1], canon_rx2);
-
- /* Save the graphs of the two conflicting DFAs */
- /* GNUNET_REGEX_automaton_save_graph (dfa1, "proofs_fail_dfa1.dot"); */
- /* GNUNET_REGEX_automaton_save_graph (dfa2, "proofs_fail_dfa2.dot"); */
}
GNUNET_REGEX_automaton_destroy (dfa1);
@@ -170,7 +165,7 @@
error = 0;
error += test_proofs_static ();
- /* error += test_proofs_random (10000, 10); */
+ error += test_proofs_random (100, 30);
return error;
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r22478 - gnunet/src/regex,
gnunet <=