[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] Changes to eliot/dic/dic_search.c [antoine-1]
From: |
eliot-dev |
Subject: |
[Eliot-dev] Changes to eliot/dic/dic_search.c [antoine-1] |
Date: |
Sun, 23 Oct 2005 13:13:59 -0400 |
Index: eliot/dic/dic_search.c
diff -u /dev/null eliot/dic/dic_search.c:1.12.2.1
--- /dev/null Sun Oct 23 17:13:59 2005
+++ eliot/dic/dic_search.c Sun Oct 23 17:13:56 2005
@@ -0,0 +1,561 @@
+/* Eliot */
+/* Copyright (C) 1999 Antoine Fraboulet */
+/* */
+/* This file is part of Eliot. */
+/* */
+/* Eliot is free software; you can redistribute it and/or modify */
+/* it under the terms of the GNU General Public License as published by */
+/* the Free Software Foundation; either version 2 of the License, or */
+/* (at your option) any later version. */
+/* */
+/* Elit is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* You should have received a copy of the GNU General Public License */
+/* along with this program; if not, write to the Free Software */
+/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
+<<<<<<< dic_search.c
+/* $Id: dic_search.c,v 1.12.2.1 2005/10/23 17:13:56 afrab Exp $ */
+=======
+/*
+ * $Id: dic_search.c,v 1.12.2.1 2005/10/23 17:13:56 afrab Exp $
+ */
+>>>>>>> 1.12
+
+/**
+ * \file dic_search.c
+ * \brief Dictionary lookup functions
+ * \author Antoine Fraboulet
+ * \date 2002
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dic_internals.h"
+#include "dic.h"
+#include "regexp.h"
+#include "dic_search.h"
+#include "libdic_a-er.h" /* generated by bison */
+#include "scanner.h" /* generated by flex */
+#include "automaton.h"
+
+/*
+ * shut down the compiler
+ */
+static int yy_init_globals (yyscan_t yyscanner )
+{
+ yy_init_globals(yyscanner);
+ return 0;
+}
+
+/**
+ * Dic_seel_edgeptr
+ * walk the dictionary until the end of the word
+ * @param dic : dictionnary
+ * @param s : current pointer to letters
+ * @param eptr : current edge in the dawg
+ */
+static Dawg_edge*
+Dic_seek_edgeptr(const Dictionary dic, const char* s, Dawg_edge *eptr)
+{
+ if (*s)
+ {
+ Dawg_edge *p = dic->dawg + eptr->ptr;
+ do {
+ if (p->chr == (unsigned)(*s & DIC_CHAR_MASK))
+ return Dic_seek_edgeptr (dic,s + 1, p);
+ } while (!(*p++).last);
+ return dic->dawg;
+ }
+ else
+ return eptr;
+}
+
+
+/**
+ * Dic_search_word : direct application of Dic_seek_edgeptr
+ * @param dic : dictionary
+ * @param word : word to lookup
+ * @result 0 not a valid word, 1 ok
+ */
+
+int
+Dic_search_word(const Dictionary dic, const char* word)
+{
+ Dawg_edge *e;
+ e = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
+ return e->term;
+}
+
+
+/**
+ * global variables for Dic_search_word_by_len :
+ *
+ * a pointer to the structure is passed as a parameter
+ * so that all the search_* variables appear to the functions
+ * as global but the code remains re-entrant.
+ * Should be better to change the algorithm ...
+ */
+
+struct params_7plus1_t {
+ Dictionary search_dic;
+ int search_len;
+ int search_wordlistlen;
+ int search_wordlistlenmax;
+ char search_wordtst[DIC_WORD_MAX];
+ char search_letters[DIC_LETTERS];
+ char (*search_wordlist)[RES_7PL1_MAX][DIC_WORD_MAX];
+};
+
+static void
+Dic_search_word_by_len(struct params_7plus1_t *params, int i, Dawg_edge
*edgeptr)
+{
+ /* depth first search in the dictionary */
+ do {
+ /* we use a static array and not a real list so we have to stop if
+ * the array is full */
+ if (params->search_wordlistlen >= params->search_wordlistlenmax)
+ break;
+
+ /* the test is false only when reach the end-node */
+ if (edgeptr->chr)
+ {
+
+ /* is the letter available in search_letters */
+ if (params->search_letters[edgeptr->chr])
+ {
+ params->search_wordtst[i] = edgeptr->chr + 'A' - 1;
+ params->search_letters[edgeptr->chr] --;
+ if (i == params->search_len)
+ {
+ if ((edgeptr->term)
+ /* && (params->search_wordlistlen <
params->search_wordlistlenmax) */)
+
strcpy((*params->search_wordlist)[params->search_wordlistlen++],params->search_wordtst);
+ }
+ else /* if (params->search_wordlistlen <
params->search_wordlistlenmax) */
+ {
+ Dic_search_word_by_len(params,i + 1, params->search_dic->dawg +
edgeptr->ptr);
+ }
+ params->search_letters[edgeptr->chr] ++;
+ params->search_wordtst[i] = '\0';
+ }
+
+ /* the letter is of course available if we have a joker available */
+ if (params->search_letters[0])
+ {
+ params->search_wordtst[i] = edgeptr->chr + 'a' - 1;
+ params->search_letters[0] --;
+ if (i == params->search_len)
+ {
+ if ((edgeptr->term)
+ /* && (params->search_wordlistlen <
params->search_wordlistlenmax) */)
+
strcpy((*(params->search_wordlist))[params->search_wordlistlen++],params->search_wordtst);
+ }
+ else /* if (params->search_wordlistlen <
params->search_wordlistlenmax) */
+ {
+ Dic_search_word_by_len(params,i + 1,params->search_dic->dawg +
edgeptr->ptr);
+ }
+ params->search_letters[0] ++;
+ params->search_wordtst[i] = '\0';
+ }
+ }
+ } while (! (*edgeptr++).last);
+}
+
+void
+Dic_search_7pl1(const Dictionary dic, const char* rack,
+ char buff[DIC_LETTERS][RES_7PL1_MAX][DIC_WORD_MAX],
+ int joker)
+{
+ int i,j,wordlen;
+ const char* r = rack;
+ struct params_7plus1_t params;
+ Dawg_edge *root_edge;
+
+ for(i=0; i < DIC_LETTERS; i++)
+ for(j=0; j < RES_7PL1_MAX; j++)
+ buff[i][j][0] = '\0';
+
+ for(i=0; i<DIC_LETTERS; i++)
+ params.search_letters[i] = 0;
+
+ if (dic == NULL || rack == NULL)
+ return;
+
+ /*
+ * the letters are verified and changed to the dic internal
+ * representation (*r & DIC_CHAR_MASK)
+ */
+ for(wordlen=0; wordlen < DIC_WORD_MAX && *r; r++)
+ {
+ if (isalpha(*r))
+ {
+ params.search_letters[(int)*r & DIC_CHAR_MASK]++;
+ wordlen++;
+ }
+ else if (*r == '?')
+ {
+ if (joker)
+ {
+ params.search_letters[0]++;
+ wordlen++;
+ }
+ else
+ {
+ strncpy(buff[0][0],"** joker **",DIC_WORD_MAX);
+ return;
+ }
+ }
+ }
+
+ if (wordlen < 1)
+ return;
+
+ root_edge = dic->dawg + (dic->dawg[dic->root].ptr);
+
+ params.search_dic = dic;
+ params.search_wordlistlenmax = RES_7PL1_MAX;
+
+ /* search for all the words that can be done with the letters */
+ params.search_len = wordlen - 1;
+ params.search_wordtst[wordlen]='\0';
+ params.search_wordlist = & buff[0];
+ params.search_wordlistlen = 0;
+ Dic_search_word_by_len(¶ms,0,root_edge);
+
+ /* search for all the words that can be done with the letters +1 */
+ params.search_len = wordlen;
+ params.search_wordtst[wordlen + 1]='\0';
+ for(i='a'; i <= 'z'; i++)
+ {
+ params.search_letters[i & DIC_CHAR_MASK]++;
+
+ params.search_wordlist = & buff[i & DIC_CHAR_MASK];
+ params.search_wordlistlen = 0;
+ Dic_search_word_by_len(¶ms,0,root_edge);
+
+ params.search_letters[i & DIC_CHAR_MASK]--;
+ }
+}
+
+/****************************************/
+/****************************************/
+
+void
+Dic_search_Racc(const Dictionary dic, const char* word,
+ char wordlist[RES_RACC_MAX][DIC_WORD_MAX])
+{
+ /* search_racc will try to add a letter in front and at the end of a word */
+
+ int i,wordlistlen;
+ Dawg_edge *edge;
+ char wordtst[DIC_WORD_MAX];
+
+ for(i=0; i < RES_RACC_MAX; i++)
+ wordlist[i][0] = 0;
+
+ if (dic == NULL || wordlist == NULL)
+ return;
+
+ /* let's try for the front */
+ wordlistlen = 0;
+ strcpy(wordtst+1,word);
+ for(i='a'; i <= 'z'; i++)
+ {
+ wordtst[0] = i;
+ if (Dic_search_word(dic,wordtst) && wordlistlen < RES_RACC_MAX)
+ strcpy(wordlist[wordlistlen++],wordtst);
+ }
+
+ /* add a letter at the end */
+ for(i=0; word[i]; i++)
+ wordtst[i] = word[i];
+
+ wordtst[i ] = '\0';
+ wordtst[i+1] = '\0';
+
+ edge = Dic_seek_edgeptr(dic,word,dic->dawg + dic->root);
+
+ /* points to what the next letter can be */
+ edge = dic->dawg + edge->ptr;
+
+ if (edge != dic->dawg)
+ {
+ do {
+ if (edge->term && wordlistlen < RES_RACC_MAX)
+ {
+ wordtst[i] = edge->chr + 'a' - 1;
+ strcpy(wordlist[wordlistlen++],wordtst);
+ }
+ } while (!(*edge++).last);
+ }
+}
+
+/****************************************/
+/****************************************/
+
+
+void
+Dic_search_Benj(const Dictionary dic, const char* word,
+ char wordlist[RES_BENJ_MAX][DIC_WORD_MAX])
+{
+ int i,wordlistlen;
+ char wordtst[DIC_WORD_MAX];
+ Dawg_edge *edge0,*edge1,*edge2,*edgetst;
+
+ for(i=0; i < RES_BENJ_MAX; i++)
+ wordlist[i][0] = 0;
+
+ if (dic == NULL || word == NULL)
+ return;
+
+ wordlistlen = 0;
+
+ strcpy(wordtst+3,word);
+ edge0 = dic->dawg + (dic->dawg[dic->root].ptr);
+ do {
+ wordtst[0] = edge0->chr + 'a' - 1;
+ edge1 = dic->dawg + edge0->ptr;
+ do {
+ wordtst[1] = edge1->chr + 'a' - 1;
+ edge2 = dic->dawg + edge1->ptr;
+ do {
+ wordtst[2] = edge2->chr + 'a' - 1;
+ edgetst = Dic_seek_edgeptr(dic,word,edge2);
+ if (edgetst->term && wordlistlen < RES_BENJ_MAX)
+ strcpy(wordlist[wordlistlen++],wordtst);
+ } while (!(*edge2++).last);
+ } while (!(*edge1++).last);
+ } while (!(*edge0++).last);
+}
+
+
+/****************************************/
+/****************************************/
+
+struct params_cross_t {
+ Dictionary dic;
+ int wordlen;
+ int wordlistlen;
+ int wordlistlenmax;
+ char mask[DIC_WORD_MAX];
+};
+
+
+void
+Dic_search_cross_rec(struct params_cross_t *params,
+ char wordlist[RES_CROS_MAX][DIC_WORD_MAX],
+ Dawg_edge *edgeptr)
+{
+ Dawg_edge *current = params->dic->dawg + edgeptr->ptr;
+
+ if (params->mask[params->wordlen] == '\0' && edgeptr->term)
+ {
+ if (params->wordlistlen < params->wordlistlenmax)
+ strcpy(wordlist[params->wordlistlen++],params->mask);
+ }
+ else if (params->mask[params->wordlen] == '.')
+ {
+ do
+ {
+ params->mask[params->wordlen] = current->chr + 'a' - 1;
+ params->wordlen ++;
+ Dic_search_cross_rec(params,wordlist,current);
+ params->wordlen --;
+ params->mask[params->wordlen] = '.';
+ }
+ while (!(*current++).last);
+ }
+ else
+ {
+ do
+ {
+ if (current->chr == (unsigned int)(params->mask[params->wordlen] &
DIC_CHAR_MASK))
+ {
+ params->wordlen ++;
+ Dic_search_cross_rec(params,wordlist,current);
+ params->wordlen --;
+ break;
+ }
+ }
+ while (!(*current++).last);
+ }
+}
+
+
+
+void
+Dic_search_Cros(const Dictionary dic, const char* mask,
+ char wordlist[RES_CROS_MAX][DIC_WORD_MAX])
+{
+ int i;
+ struct params_cross_t params;
+
+ for(i=0; i < RES_CROS_MAX; i++)
+ wordlist[i][0] = 0;
+
+ if (dic == NULL || mask == NULL)
+ return;
+
+ for(i=0; i < DIC_WORD_MAX && mask[i]; i++)
+ {
+ if (isalpha(mask[i]))
+ params.mask[i] = (mask[i] & DIC_CHAR_MASK) + 'A' - 1;
+ else
+ params.mask[i] = '.';
+ }
+ params.mask[i] = '\0';
+
+ params.dic = dic;
+ params.wordlen = 0;
+ params.wordlistlen = 0;
+ params.wordlistlenmax = RES_CROS_MAX;
+ Dic_search_cross_rec(¶ms, wordlist, dic->dawg + dic->root);
+}
+
+/****************************************/
+/****************************************/
+
+struct params_regexp_t {
+ Dictionary dic;
+ int minlength;
+ int maxlength;
+ automaton automaton;
+ struct search_RegE_list_t *charlist;
+ char word[DIC_WORD_MAX];
+ int wordlen;
+ int wordlistlen;
+ int wordlistlenmax;
+};
+
+void
+Dic_search_regexp_rec(struct params_regexp_t *params,
+ int state,
+ Dawg_edge *edgeptr,
+ char wordlist[RES_REGE_MAX][DIC_WORD_MAX])
+{
+ int next_state;
+ Dawg_edge *current;
+ /* if we have a valid word we store it */
+ if (automaton_get_accept(params->automaton,state) && edgeptr->term)
+ {
+ int l = strlen(params->word);
+ if (params->wordlistlen < params->wordlistlenmax &&
+ params->minlength <= l &&
+ params->maxlength >= l)
+ {
+ strcpy(wordlist[params->wordlistlen++],params->word);
+ }
+ }
+ /* we now drive the search by exploring the dictionary */
+ current = params->dic->dawg + edgeptr->ptr;
+ do {
+ /* the current letter is current->chr */
+ next_state =
automaton_get_next_state(params->automaton,state,current->chr);
+ /* 1 : the letter appears in the automaton as is */
+ if (next_state)
+ {
+ params->word[params->wordlen] = current->chr + 'a' - 1;
+ params->wordlen ++;
+ Dic_search_regexp_rec(params,next_state,current,wordlist);
+ params->wordlen --;
+ params->word[params->wordlen] = '\0';
+ }
+ } while (!(*current++).last);
+}
+
+
+ /**
+ * function prototype for parser generated by bison
+ */
+int regexpparse(yyscan_t scanner, NODE** root,
+ struct search_RegE_list_t *list,
+ struct regexp_error_report_t *err);
+
+void
+Dic_search_RegE(const Dictionary dic, const char* re,
+ char wordlist[RES_REGE_MAX][DIC_WORD_MAX],
+ struct search_RegE_list_t *list)
+{
+ int i,p,n,value;
+ int ptl[REGEXP_MAX+1];
+ int PS [REGEXP_MAX+1];
+ NODE* root;
+ yyscan_t scanner;
+ YY_BUFFER_STATE buf;
+ automaton a;
+ char stringbuf[250];
+ struct params_regexp_t params;
+ struct regexp_error_report_t report;
+
+ /* init */
+ for(i=0; i < RES_REGE_MAX; i++)
+ wordlist[i][0] = 0;
+
+ if (dic == NULL || re == NULL)
+ return;
+
+ /* (expr)# */
+ sprintf(stringbuf,"(%s)#",re);
+ for(i=0; i < REGEXP_MAX; i++)
+ {
+ PS[i] = 0;
+ ptl[i] = 0;
+ }
+
+ report.pos1 = 0;
+ report.pos2 = 0;
+ report.msg[0] = '\0';
+
+ /* parsing */
+ regexplex_init( &scanner );
+ buf = regexp_scan_string( stringbuf, scanner );
+ root = NULL;
+ value = regexpparse( scanner , &root, list, &report);
+ regexp_delete_buffer(buf,scanner);
+ regexplex_destroy( scanner );
+
+ if (value)
+ {
+#ifdef DEBUG_FLEX_IS_BROKEN
+ fprintf(stderr,"parser error at pos %d - %d : %s\n",
+ report.pos1, report.pos2, report.msg);
+#endif
+ regexp_delete_tree(root);
+ return ;
+ }
+
+ n = 1;
+ p = 1;
+ regexp_parcours(root, &p, &n, ptl);
+ PS [0] = p - 1;
+ ptl[0] = p - 1;
+
+ regexp_possuivante(root,PS);
+
+ if ((a = automaton_build(root->PP,ptl,PS,list)) != NULL)
+ {
+ params.dic = dic;
+ params.minlength = list->minlength;
+ params.maxlength = list->maxlength;
+ params.automaton = a;
+ params.charlist = list;
+ memset(params.word,'\0',sizeof(params.word));
+ params.wordlen = 0;
+ params.wordlistlen = 0;
+ params.wordlistlenmax = RES_REGE_MAX;
+ Dic_search_regexp_rec(¶ms, automaton_get_init(a), dic->dawg +
dic->root, wordlist);
+
+ automaton_delete(a);
+ }
+ regexp_delete_tree(root);
+}
+
+/****************************************/
+/****************************************/
+
- [Eliot-dev] Changes to eliot/dic/dic_search.c [antoine-1],
eliot-dev <=