pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [pre-lexer-2 1/4] command: Factor command name matching out of comma


From: John Darrington
Subject: Re: [pre-lexer-2 1/4] command: Factor command name matching out of command.c.
Date: Sat, 25 Sep 2010 06:23:19 +0000
User-agent: Mutt/1.5.18 (2008-05-17)

I've no doubt this patch is an improvement.
However, I'm worried about how this is going to work with non-ascii encodings.
For example some recent syntax files that I've seen have UTF-8 "hard" spaces 
(0xc2 0x0a) instead of the normal ' '.
I think the lines : 
 /* Skip whitespace. */
  ss_ltrim (s, ss_cstr (" \t\n\r\f\v"));

will fail in this case.

On Fri, Sep 24, 2010 at 09:17:51PM -0700, Ben Pfaff wrote:
     Making command parsing into a library will allow other code to use
     the same functionality, which will be useful later in the lexer.
     It also simplifies parsing command names and allows us to easily
     add tests for command name parsing.
     ---
      src/language/command.c                   |  385 
+++++-------------------------
      src/language/lexer/automake.mk           |    5 +-
      src/language/lexer/command-name.c        |  167 +++++++++++++
      src/language/lexer/command-name.h        |   45 ++++
      tests/automake.mk                        |   15 +-
      tests/language/lexer/command-name-test.c |  149 ++++++++++++
      tests/language/lexer/command-name.at     |  230 ++++++++++++++++++
      7 files changed, 675 insertions(+), 321 deletions(-)
      create mode 100644 src/language/lexer/command-name.c
      create mode 100644 src/language/lexer/command-name.h
      create mode 100644 tests/language/lexer/command-name-test.c
      create mode 100644 tests/language/lexer/command-name.at
     
     diff --git a/src/language/command.c b/src/language/command.c
     index 9f90db9..f72755e 100644
     --- a/src/language/command.c
     +++ b/src/language/command.c
     @@ -28,6 +28,7 @@
      #include "data/procedure.h"
      #include "data/settings.h"
      #include "data/variable.h"
     +#include "language/lexer/command-name.h"
      #include "language/lexer/lexer.h"
      #include "language/prompt.h"
      #include "libpspp/assertion.h"
     @@ -115,7 +116,6 @@ static const size_t command_cnt = sizeof commands / 
sizeof *commands;
      
      static bool in_correct_state (const struct command *, enum cmd_state);
      static bool report_state_mismatch (const struct command *, enum 
cmd_state);
     -static const struct command *find_command (const char *name);
      static void set_completion_state (enum cmd_state);
      
      /* Command parser. */
     @@ -240,257 +240,23 @@ do_parse_command (struct lexer *lexer,
        return result;
      }
      
     -static size_t
     -match_strings (const char *a, size_t a_len,
     -               const char *b, size_t b_len)
     -{
     -  size_t match_len = 0;
     -
     -  while (a_len > 0 && b_len > 0)
     -    {
     -      /* Mismatch always returns zero. */
     -      if (toupper ((unsigned char) *a++) != toupper ((unsigned char) 
*b++))
     -        return 0;
     -
     -      /* Advance. */
     -      a_len--;
     -      b_len--;
     -      match_len++;
     -    }
     -
     -  return match_len;
     -}
     -
     -/* Returns the first character in the first word in STRING,
     -   storing the word's length in *WORD_LEN.  If no words remain,
     -   returns a null pointer and stores 0 in *WORD_LEN.  Words are
     -   sequences of alphanumeric characters or single
     -   non-alphanumeric characters.  Words are delimited by
     -   spaces. */
     -static const char *
     -find_word (const char *string, size_t *word_len)
     -{
     -  /* Skip whitespace and asterisks. */
     -  while (isspace ((unsigned char) *string))
     -    string++;
     -
     -  /* End of string? */
     -  if (*string == '\0')
     -    {
     -      *word_len = 0;
     -      return NULL;
     -    }
     -
     -  /* Special one-character word? */
     -  if (!isalnum ((unsigned char) *string))
     -    {
     -      *word_len = 1;
     -      return string;
     -    }
     -
     -  /* Alphanumeric word. */
     -  *word_len = 1;
     -  while (isalnum ((unsigned char) string[*word_len]))
     -    (*word_len)++;
     -
     -  return string;
     -}
     -
     -/* Returns true if strings A and B can be confused based on
     -   their first three letters. */
     -static bool
     -conflicting_3char_prefixes (const char *a, const char *b)
     -{
     -  size_t aw_len, bw_len;
     -  const char *aw, *bw;
     -
     -  aw = find_word (a, &aw_len);
     -  bw = find_word (b, &bw_len);
     -  assert (aw != NULL && bw != NULL);
     -
     -  /* Words that are the same don't conflict. */
     -  if (aw_len == bw_len && !buf_compare_case (aw, bw, aw_len))
     -    return false;
     -
     -  /* Words that are otherwise the same in the first three letters
     -     do conflict. */
     -  return ((aw_len > 3 && bw_len > 3)
     -          || (aw_len == 3 && bw_len > 3)
     -          || (bw_len == 3 && aw_len > 3)) && !buf_compare_case (aw, bw, 
3);
     -}
     -
     -/* Returns true if CMD can be confused with another command
     -   based on the first three letters of its first word. */
     -static bool
     -conflicting_3char_prefix_command (const struct command *cmd)
     -{
     -  assert (cmd >= commands && cmd < commands + command_cnt);
     -
     -  return ((cmd > commands
     -           && conflicting_3char_prefixes (cmd[-1].name, cmd[0].name))
     -          || (cmd < commands + command_cnt
     -              && conflicting_3char_prefixes (cmd[0].name, cmd[1].name)));
     -}
     -
     -/* Ways that a set of words can match a command name. */
     -enum command_match
     -  {
     -    MISMATCH,           /* Not a match. */
     -    PARTIAL_MATCH,      /* The words begin the command name. */
     -    COMPLETE_MATCH      /* The words are the command name. */
     -  };
     -
     -/* Figures out how well the WORD_CNT words in WORDS match CMD,
     -   and returns the appropriate enum value.  If WORDS are a
     -   partial match for CMD and the next word in CMD is a dash, then
     -   *DASH_POSSIBLE is set to 1 if DASH_POSSIBLE is non-null;
     -   otherwise, *DASH_POSSIBLE is unchanged. */
     -static enum command_match
     -cmd_match_words (const struct command *cmd,
     -                 char *const words[], size_t word_cnt,
     -                 int *dash_possible)
     -{
     -  const char *word;
     -  size_t word_len;
     -  size_t word_idx;
     -
     -  for (word = find_word (cmd->name, &word_len), word_idx = 0;
     -       word != NULL && word_idx < word_cnt;
     -       word = find_word (word + word_len, &word_len), word_idx++)
     -    if (word_len != strlen (words[word_idx])
     -        || buf_compare_case (word, words[word_idx], word_len))
     -      {
     -        size_t match_chars = match_strings (word, word_len,
     -                                            words[word_idx],
     -                                            strlen (words[word_idx]));
     -        if (match_chars == 0)
     -          {
     -            /* Mismatch. */
     -            return MISMATCH;
     -          }
     -        else if (match_chars == 1 || match_chars == 2)
     -          {
     -            /* One- and two-character abbreviations are not
     -               acceptable. */
     -            return MISMATCH;
     -          }
     -        else if (match_chars == 3)
     -          {
     -            /* Three-character abbreviations are acceptable
     -               in the first word of a command if there are
     -               no name conflicts.  They are always
     -               acceptable after the first word. */
     -            if (word_idx == 0 && conflicting_3char_prefix_command (cmd))
     -              return MISMATCH;
     -          }
     -        else /* match_chars > 3 */
     -          {
     -            /* Four-character and longer abbreviations are
     -               always acceptable.  */
     -          }
     -      }
     -
     -  if (word == NULL && word_idx == word_cnt)
     -    {
     -      /* cmd->name = "FOO BAR", words[] = {"FOO", "BAR"}. */
     -      return COMPLETE_MATCH;
     -    }
     -  else if (word == NULL)
     -    {
     -      /* cmd->name = "FOO BAR", words[] = {"FOO", "BAR", "BAZ"}. */
     -      return MISMATCH;
     -    }
     -  else
     -    {
     -      /* cmd->name = "FOO BAR BAZ", words[] = {"FOO", "BAR"}. */
     -      if (word[0] == '-' && dash_possible != NULL)
     -        *dash_possible = 1;
     -      return PARTIAL_MATCH;
     -    }
     -}
     -
     -/* Returns the number of commands for which the WORD_CNT words in
     -   WORDS are a partial or complete match.  If some partial match
     -   has a dash as the next word, then *DASH_POSSIBLE is set to 1,
     -   otherwise it is set to 0. */
      static int
     -count_matching_commands (char *const words[], size_t word_cnt,
     -                         int *dash_possible)
     +find_best_match (struct substring s, const struct command **matchp)
      {
        const struct command *cmd;
     -  int cmd_match_count;
     +  struct command_matcher cm;
     +  int missing_words;
      
     -  cmd_match_count = 0;
     -  *dash_possible = 0;
     -  for (cmd = commands; cmd < commands + command_cnt; cmd++)
     -    if (cmd_match_words (cmd, words, word_cnt, dash_possible) != MISMATCH)
     -      cmd_match_count++;
     +  command_matcher_init (&cm, s);
     +  for (cmd = commands; cmd < &commands[command_cnt]; cmd++)
     +    command_matcher_add (&cm, ss_cstr (cmd->name), CONST_CAST (void *, 
cmd));
      
     -  return cmd_match_count;
     -}
     -
     -/* Returns the command for which the WORD_CNT words in WORDS are
     -   a complete match.  Returns a null pointer if no such command
     -   exists. */
     -static const struct command *
     -get_complete_match (char *const words[], size_t word_cnt)
     -{
     -  const struct command *cmd;
     +  *matchp = command_matcher_get_match (&cm);
     +  missing_words = command_matcher_get_missing_words (&cm);
      
     -  for (cmd = commands; cmd < commands + command_cnt; cmd++)
     -    if (cmd_match_words (cmd, words, word_cnt, NULL) == COMPLETE_MATCH)
     -      return cmd;
     +  command_matcher_destroy (&cm);
      
     -  return NULL;
     -}
     -
     -/* Returns the command with the given exact NAME.
     -   Aborts if no such command exists. */
     -static const struct command *
     -find_command (const char *name)
     -{
     -  const struct command *cmd;
     -
     -  for (cmd = commands; cmd < commands + command_cnt; cmd++)
     -    if (!strcmp (cmd->name, name))
     -      return cmd;
     -  NOT_REACHED ();
     -}
     -
     -/* Frees the WORD_CNT words in WORDS. */
     -static void
     -free_words (char *words[], size_t word_cnt)
     -{
     -  size_t idx;
     -
     -  for (idx = 0; idx < word_cnt; idx++)
     -    free (words[idx]);
     -}
     -
     -/* Flags an error that the command whose name is given by the
     -   WORD_CNT words in WORDS is unknown. */
     -static void
     -unknown_command_error (struct lexer *lexer, char *const words[], size_t 
word_cnt)
     -{
     -  if (word_cnt == 0)
     -    lex_error (lexer, _("expecting command name"));
     -  else
     -    {
     -      struct string s;
     -      size_t i;
     -
     -      ds_init_empty (&s);
     -      for (i = 0; i < word_cnt; i++)
     -        {
     -          if (i != 0)
     -            ds_put_char (&s, ' ');
     -          ds_put_cstr (&s, words[i]);
     -        }
     -
     -      msg (SE, _("Unknown command %s."), ds_cstr (&s));
     -
     -      ds_destroy (&s);
     -    }
     +  return missing_words;
      }
      
      /* Parse the command name and return a pointer to the corresponding
     @@ -499,93 +265,74 @@ unknown_command_error (struct lexer *lexer, char 
*const words[], size_t word_cnt
      static const struct command *
      parse_command_name (struct lexer *lexer)
      {
     -  char *words[16];
     -  int word_cnt;
     -  int complete_word_cnt;
     -  int dash_possible;
     -
     -  if (lex_token (lexer) == T_EXP ||
     -            lex_token (lexer) == '*' || lex_token (lexer) == '[')
     -    return find_command ("COMMENT");
     -
     -  dash_possible = 0;
     -  word_cnt = complete_word_cnt = 0;
     -  while (lex_token (lexer) == T_ID || (dash_possible && lex_token (lexer) 
== '-'))
     -    {
     -      int cmd_match_cnt;
     +  const struct command *command;
     +  int missing_words;
     +  struct string s;
      
     -      assert (word_cnt < sizeof words / sizeof *words);
     -      if (lex_token (lexer) == T_ID)
     -        {
     -          words[word_cnt] = ds_xstrdup (lex_tokstr (lexer));
     -          str_uppercase (words[word_cnt]);
     -        }
     -      else if (lex_token (lexer) == '-')
     -        words[word_cnt] = xstrdup ("-");
     -      word_cnt++;
     +  if (lex_token (lexer) == T_EXP
     +      || lex_token (lexer) == '*'
     +      || lex_token (lexer) == '[')
     +    {
     +      static const struct command c = { S_ANY, 0, "COMMENT", cmd_comment 
};
     +      return &c;
     +    }
      
     -      cmd_match_cnt = count_matching_commands (words, word_cnt,
     -                                               &dash_possible);
     -      if (cmd_match_cnt == 0)
     -        break;
     -      else if (cmd_match_cnt == 1)
     +  command = NULL;
     +  missing_words = 0;
     +  ds_init_empty (&s);
     +  for (;;)
     +    {
     +      if (lex_token (lexer) == '-')
     +        ds_put_char (&s, '-');
     +      else if (lex_token (lexer) == T_ID)
              {
     -          const struct command *command = get_complete_match (words, 
word_cnt);
     -          if (command != NULL)
     -            {
     -              if (!(command->flags & F_KEEP_FINAL_TOKEN))
     -                lex_get (lexer);
     -              free_words (words, word_cnt);
     -              return command;
     -            }
     +          if (!ds_is_empty (&s) && ds_last (&s) != '-')
     +            ds_put_char (&s, ' ');
     +          ds_put_cstr (&s, lex_tokid (lexer));
              }
     -      else /* cmd_match_cnt > 1 */
     +      else if (lex_is_integer (lexer) && lex_integer (lexer) >= 0)
              {
     -          /* Do we have a complete command name so far? */
     -          if (get_complete_match (words, word_cnt) != NULL)
     -            complete_word_cnt = word_cnt;
     +          if (!ds_is_empty (&s) && ds_last (&s) != '-')
     +            ds_put_char (&s, ' ');
     +          ds_put_format (&s, "%ld", lex_integer (lexer));
              }
     +      else
     +        break;
     +
     +      missing_words = find_best_match (ds_ss (&s), &command);
     +      if (missing_words <= 0)
     +        break;
     +
            lex_get (lexer);
          }
      
     -  /* If we saw a complete command name earlier, drop back to
     -     it. */
     -  if (complete_word_cnt)
     +  if (command == NULL && missing_words > 0)
          {
     -      int pushback_word_cnt;
     -      const struct command *command;
     -
     -      /* Get the command. */
     -      command = get_complete_match (words, complete_word_cnt);
     -      assert (command != NULL);
     -
     -      /* Figure out how many words we want to keep.
     -         We normally want to swallow the entire command. */
     -      pushback_word_cnt = complete_word_cnt + 1;
     -      if (command->flags & F_KEEP_FINAL_TOKEN)
     -        pushback_word_cnt--;
     -
     -      /* FIXME: We only support one-token pushback. */
     -      assert (pushback_word_cnt + 1 >= word_cnt);
     -
     -      while (word_cnt > pushback_word_cnt)
     -        {
     -          word_cnt--;
     -          if (strcmp (words[word_cnt], "-"))
     -            lex_put_back_id (lexer, words[word_cnt]);
     -          else
     -            lex_put_back (lexer, '-');
     -          free (words[word_cnt]);
     -        }
     +      ds_put_cstr (&s, " .");
     +      missing_words = find_best_match (ds_ss (&s), &command);
     +      ds_truncate (&s, ds_length (&s) - 2);
     +    }
      
     -      free_words (words, word_cnt);
     -      return command;
     +  if (command == NULL)
     +    {
     +      if (ds_is_empty (&s))
     +        lex_error (lexer, _("expecting command name"));
     +      else
     +        msg (SE, _("Unknown command \"%s\"."), ds_cstr (&s));
     +    }
     +  else if (missing_words == 0)
     +    {
     +      if (!(command->flags & F_KEEP_FINAL_TOKEN))
     +        lex_get (lexer);
     +    }
     +  else if (missing_words < 0)
     +    {
     +      assert (missing_words == -1);
     +      assert (!(command->flags & F_KEEP_FINAL_TOKEN));
          }
      
     -  /* We didn't get a valid command name. */
     -  unknown_command_error (lexer, words, word_cnt);
     -  free_words (words, word_cnt);
     -  return NULL;
     +  ds_destroy (&s);
     +  return command;
      }
      
      /* Returns true if COMMAND is allowed in STATE,
     diff --git a/src/language/lexer/automake.mk 
b/src/language/lexer/automake.mk
     index aff3f2a..71f6b41 100644
     --- a/src/language/lexer/automake.mk
     +++ b/src/language/lexer/automake.mk
     @@ -2,7 +2,10 @@
      
      
      language_lexer_sources = \
     -  src/language/lexer/lexer.c  src/language/lexer/lexer.h \
     +  src/language/lexer/command-name.c \
     +  src/language/lexer/command-name.h \
     +  src/language/lexer/lexer.c \
     +  src/language/lexer/lexer.h \
        src/language/lexer/subcommand-list.c  \
        src/language/lexer/subcommand-list.h \
        src/language/lexer/format-parser.c \
     diff --git a/src/language/lexer/command-name.c 
b/src/language/lexer/command-name.c
     new file mode 100644
     index 0000000..fedcf96
     --- /dev/null
     +++ b/src/language/lexer/command-name.c
     @@ -0,0 +1,167 @@
     +/* PSPP - a program for statistical analysis.
     +   Copyright (C) 2010 Free Software Foundation, Inc.
     +
     +   This program is free software: you can redistribute it and/or modify
     +   it under the terms of the GNU General Public License as published by
     +   the Free Software Foundation, either version 3 of the License, or
     +   (at your option) any later version.
     +
     +   This program is distributed in the hope that it will be useful,
     +   but WITHOUT ANY WARRANTY; without even the implied warranty of
     +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     +   GNU General Public License for more details.
     +
     +   You should have received a copy of the GNU General Public License
     +   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
*/
     +
     +#include <config.h>
     +
     +#include "language/lexer/command-name.h"
     +
     +#include <assert.h>
     +#include <limits.h>
     +
     +#include "data/identifier.h"
     +
     +#include "gl/c-ctype.h"
     +
     +/* Returns the first character in the first word in STRING, storing the 
word's
     +   length in *WORD_LEN.  If no words remain, returns a null pointer and 
stores
     +   0 in *WORD_LEN.
     +
     +   A word is a sequence of digits, a letter possibly followed by a 
sequence of
     +   letters or digits, or one character of another type.  Words may be 
delimited
     +   by spaces. */
     +static bool
     +find_word (struct substring *s, struct substring *word)
     +{
     +  size_t n;
     +
     +  /* Skip whitespace. */
     +  ss_ltrim (s, ss_cstr (" \t\n\r\f\v"));
     +  if (ss_is_empty (*s))
     +    {
     +      *word = ss_empty ();
     +      return false;
     +    }
     +
     +  n = 1;
     +  if (lex_is_id1 (s->string[0]))
     +    {
     +      while (n < s->length && lex_is_idn (s->string[n]))
     +        n++;
     +    }
     +  else if (c_isdigit (s->string[0]))
     +    {
     +      while (n < s->length && c_isdigit (s->string[n]))
     +        n++;
     +    }
     +  ss_get_chars (s, n, word);
     +  return true;
     +}
     +
     +static int
     +count_words (struct substring s)
     +{
     +  int n;
     +
     +  for (n = 0; ; n++)
     +    {
     +      struct substring word;
     +
     +      if (!find_word (&s, &word))
     +        return n;
     +    }
     +}
     +
     +bool
     +command_match (struct substring command, struct substring string,
     +               bool *exact, int *missing_words)
     +{
     +  *exact = true;
     +  for (;;)
     +    {
     +      struct substring cw, sw;
     +      int match;
     +
     +      if (!find_word (&command, &cw))
     +        {
     +          *missing_words = -count_words (string);
     +          return true;
     +        }
     +      else if (!find_word (&string, &sw))
     +        {
     +          *missing_words = 1 + count_words (command);
     +          return true;
     +        }
     +
     +      match = lex_id_match (cw, sw);
     +      if (sw.length < cw.length)
     +        *exact = false;
     +      if (match == 0)
     +        return false;
     +    }
     +}
     +
     +void
     +command_matcher_init (struct command_matcher *cm, struct substring string)
     +{
     +  cm->string = string;
     +  cm->extensible = false;
     +  cm->exact_match = NULL;
     +  cm->n_matches = 0;
     +  cm->match = NULL;
     +  cm->match_missing_words = 0;
     +}
     +
     +void
     +command_matcher_destroy (struct command_matcher *cm UNUSED)
     +{
     +  /* Nothing to do. */
     +}
     +
     +void
     +command_matcher_add (struct command_matcher *cm, struct substring command,
     +                     void *aux)
     +{
     +  int missing_words;
     +  bool exact;
     +
     +  assert (aux != NULL);
     +  if (command_match (command, cm->string, &exact, &missing_words))
     +    {
     +      if (missing_words > 0)
     +        cm->extensible = true;
     +      else if (exact && missing_words == 0)
     +        cm->exact_match = aux;
     +      else
     +        {
     +          if (missing_words > cm->match_missing_words)
     +            cm->n_matches = 0;
     +
     +          if (missing_words >= cm->match_missing_words || cm->n_matches 
== 0)
     +            {
     +              cm->n_matches++;
     +              cm->match = aux;
     +              cm->match_missing_words = missing_words;
     +            }
     +        }
     +    }
     +}
     +
     +void *
     +command_matcher_get_match (const struct command_matcher *cm)
     +{
     +  return (cm->extensible ? NULL
     +          : cm->exact_match != NULL ? cm->exact_match
     +          : cm->n_matches == 1 ? cm->match
     +          : NULL);
     +}
     +
     +int
     +command_matcher_get_missing_words (const struct command_matcher *cm)
     +{
     +  return (cm->extensible ? 1
     +          : cm->exact_match != NULL ? 0
     +          : cm->match_missing_words);
     +}
     diff --git a/src/language/lexer/command-name.h 
b/src/language/lexer/command-name.h
     new file mode 100644
     index 0000000..98269d5
     --- /dev/null
     +++ b/src/language/lexer/command-name.h
     @@ -0,0 +1,45 @@
     +/* PSPP - a program for statistical analysis.
     +   Copyright (C) 2010 Free Software Foundation, Inc.
     +
     +   This program is free software: you can redistribute it and/or modify
     +   it under the terms of the GNU General Public License as published by
     +   the Free Software Foundation, either version 3 of the License, or
     +   (at your option) any later version.
     +
     +   This program is distributed in the hope that it will be useful,
     +   but WITHOUT ANY WARRANTY; without even the implied warranty of
     +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     +   GNU General Public License for more details.
     +
     +   You should have received a copy of the GNU General Public License
     +   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
*/
     +
     +#ifndef CMDTABLE_H
     +#define CMDTABLE_H 1
     +
     +#include <stdbool.h>
     +#include "libpspp/str.h"
     +
     +bool command_match (struct substring command, struct substring string,
     +                    bool *exact, int *missing_words);
     +
     +struct command_matcher
     +  {
     +    struct substring string;
     +    bool extensible;
     +    void *exact_match;
     +    int n_matches;
     +    void *match;
     +    int match_missing_words;
     +  };
     +
     +void command_matcher_init (struct command_matcher *, struct substring 
string);
     +void command_matcher_destroy (struct command_matcher *);
     +
     +void command_matcher_add (struct command_matcher *, struct substring 
command,
     +                          void *aux);
     +
     +void *command_matcher_get_match (const struct command_matcher *);
     +int command_matcher_get_missing_words (const struct command_matcher *);
     +
     +#endif /* cmdtable.h */
     diff --git a/tests/automake.mk b/tests/automake.mk
     index 83bbeeb..cc115e5 100644
     --- a/tests/automake.mk
     +++ b/tests/automake.mk
     @@ -169,6 +169,7 @@ check_PROGRAMS += \
        $(nodist_TESTS) \
        tests/data/datasheet-test \
        tests/formats/inexactify \
     +  tests/language/lexer/command-name-test \
        tests/libpspp/i18n-test \
        tests/libpspp/sparse-xarray-test \
        tests/output/render-test
     @@ -331,6 +332,17 @@ tests_dissect_sysfile_SOURCES = \
      tests_dissect_sysfile_LDADD = gl/libgl.la $(LIBINTL) 
      tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
      
     +check_PROGRAMS += tests/language/lexer/command-name-test
     +tests_language_lexer_command_name_test_SOURCES = \
     +  src/data/identifier.c \
     +  src/language/lexer/command-name.c \
     +  tests/language/lexer/command-name-test.c
     +tests_language_lexer_command_name_test_LDADD = \
     +  src/libpspp/libpspp.la \
     +  gl/libgl.la \
     +  $(LIBINTL) 
     +tests_language_lexer_command_name_test_CFLAGS = $(AM_CFLAGS)
     +
      check_PROGRAMS += tests/output/render-test
      tests_output_render_test_SOURCES = tests/output/render-test.c
      tests_output_render_test_LDADD = \
     @@ -406,6 +418,7 @@ TESTSUITE_AT = \
        tests/language/dictionary/attributes.at \
        tests/language/dictionary/mrsets.at \
        tests/language/expressions/evaluate.at \
     +  tests/language/lexer/command-name.at \
        tests/language/stats/aggregate.at \
        tests/language/stats/autorecode.at \
        tests/language/stats/crosstabs.at \
     @@ -436,7 +449,7 @@ $(srcdir)/tests/testsuite.at: tests/testsuite.in 
Makefile
      
      CHECK_LOCAL += tests_check
      tests_check: tests/atconfig tests/atlocal $(TESTSUITE)
     -  $(SHELL) '$(TESTSUITE)' -C tests 
AUTOTEST_PATH=tests/libpspp:tests/output:src/ui/terminal $(TESTSUITEFLAGS)
     +  $(SHELL) '$(TESTSUITE)' -C tests 
AUTOTEST_PATH=tests/language/lexer:tests/libpspp:tests/output:src/ui/terminal 
$(TESTSUITEFLAGS)
      
      CLEAN_LOCAL += tests_clean
      tests_clean:
     diff --git a/tests/language/lexer/command-name-test.c 
b/tests/language/lexer/command-name-test.c
     new file mode 100644
     index 0000000..d63b8a2
     --- /dev/null
     +++ b/tests/language/lexer/command-name-test.c
     @@ -0,0 +1,149 @@
     +/* PSPP - a program for statistical analysis.
     +   Copyright (C) 2010 Free Software Foundation, Inc.
     +
     +   This program is free software: you can redistribute it and/or modify
     +   it under the terms of the GNU General Public License as published by
     +   the Free Software Foundation, either version 3 of the License, or
     +   (at your option) any later version.
     +
     +   This program is distributed in the hope that it will be useful,
     +   but WITHOUT ANY WARRANTY; without even the implied warranty of
     +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     +   GNU General Public License for more details.
     +
     +   You should have received a copy of the GNU General Public License
     +   along with this program.  If not, see <http://www.gnu.org/licenses/>. 
*/
     +
     +#include <config.h>
     +
     +#include <ctype.h>
     +#include <errno.h>
     +#include <getopt.h>
     +#include <limits.h>
     +#include <stdio.h>
     +#include <stdlib.h>
     +#include <string.h>
     +
     +#include "libpspp/assertion.h"
     +#include "libpspp/compiler.h"
     +#include "language/lexer/command-name.h"
     +
     +#include "gl/error.h"
     +#include "gl/progname.h"
     +
     +static char **commands, **strings;
     +static size_t n_commands, n_strings;
     +
     +static void parse_options (int argc, char **argv);
     +static void usage (void) NO_RETURN;
     +
     +int
     +main (int argc, char *argv[])
     +{
     +  size_t i;
     +
     +  set_program_name (argv[0]);
     +  parse_options (argc, argv);
     +
     +  for (i = 0; i < n_strings; i++)
     +    {
     +      const char *string = strings[i];
     +      struct command_matcher cm;
     +      const char *best;
     +      size_t j;
     +
     +      if (i > 0)
     +        putchar ('\n');
     +      printf ("string=\"%s\":\n", string);
     +      for (j = 0; j < n_commands; j++)
     +        {
     +          const char *command = commands[j];
     +          int missing_words;
     +          bool match, exact;
     +
     +          match = command_match (ss_cstr (command), ss_cstr (string),
     +                                 &exact, &missing_words);
     +          printf ("\tcommand=\"%s\" match=%s",
     +                  command, match ? "yes" : "no");
     +          if (match)
     +            printf (" exact=%s missing_words=%d",
     +                    exact ? "yes" : "no", missing_words);
     +          putchar ('\n');
     +        }
     +
     +      command_matcher_init (&cm, ss_cstr (string));
     +      for (j = 0; j < n_commands; j++)
     +        command_matcher_add (&cm, ss_cstr (commands[j]), commands[j]);
     +      best = command_matcher_get_match (&cm);
     +      printf ("match: %s, missing_words=%d\n",
     +              best ? best : "none", command_matcher_get_missing_words 
(&cm));
     +      command_matcher_destroy (&cm);
     +    }
     +
     +  return 0;
     +}
     +
     +static void
     +parse_options (int argc, char **argv)
     +{
     +  int breakpoint;
     +
     +  for (;;)
     +    {
     +      static const struct option options[] =
     +        {
     +          {"help", no_argument, NULL, 'h'},
     +          {NULL, 0, NULL, 0},
     +        };
     +
     +      int c = getopt_long (argc, argv, "h", options, NULL);
     +      if (c == -1)
     +        break;
     +
     +      switch (c)
     +        {
     +        case 'h':
     +          usage ();
     +
     +        case 0:
     +          break;
     +
     +        case '?':
     +          exit (EXIT_FAILURE);
     +          break;
     +
     +        default:
     +          NOT_REACHED ();
     +        }
     +
     +    }
     +
     +  for (breakpoint = optind; ; breakpoint++)
     +    if (breakpoint >= argc)
     +      error (1, 0, "missing ',' on command line; use --help for help");
     +    else if (!strcmp (argv[breakpoint], ","))
     +      break;
     +
     +  commands = &argv[optind];
     +  n_commands = breakpoint - optind;
     +
     +  strings = &argv[breakpoint + 1];
     +  n_strings = argc - (breakpoint + 1);
     +
     +  if (n_commands == 0 || n_strings == 0)
     +    error (1, 0, "must specify at least one command and one string; "
     +           "use --help for help");
     +}
     +
     +static void
     +usage (void)
     +{
     +  printf ("\
     +%s, to match PSPP command names\n\
     +usage: %s [OPTIONS] COMMAND... , STRING...\n\
     +\n\
     +Options:\n\
     +  -h, --help          print this help message\n",
     +          program_name, program_name);
     +  exit (EXIT_SUCCESS);
     +}
     diff --git a/tests/language/lexer/command-name.at 
b/tests/language/lexer/command-name.at
     new file mode 100644
     index 0000000..c083516
     --- /dev/null
     +++ b/tests/language/lexer/command-name.at
     @@ -0,0 +1,230 @@
     +AT_BANNER([command name matching])
     +
     +AT_SETUP([single words])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test DESCRIPTIVES , DESCRIPTIVESX DESCRIPTIVES 
descr Des DEX DE '' 'DESCRIPTIVES MORE'],
     +  [0], [dnl
     +string="DESCRIPTIVESX":
     +  command="DESCRIPTIVES" match=no
     +match: none, missing_words=0
     +
     +string="DESCRIPTIVES":
     +  command="DESCRIPTIVES" match=yes exact=yes missing_words=0
     +match: DESCRIPTIVES, missing_words=0
     +
     +string="descr":
     +  command="DESCRIPTIVES" match=yes exact=no missing_words=0
     +match: DESCRIPTIVES, missing_words=0
     +
     +string="Des":
     +  command="DESCRIPTIVES" match=yes exact=no missing_words=0
     +match: DESCRIPTIVES, missing_words=0
     +
     +string="DEX":
     +  command="DESCRIPTIVES" match=no
     +match: none, missing_words=0
     +
     +string="DE":
     +  command="DESCRIPTIVES" match=no
     +match: none, missing_words=0
     +
     +string="":
     +  command="DESCRIPTIVES" match=yes exact=yes missing_words=1
     +match: none, missing_words=1
     +
     +string="DESCRIPTIVES MORE":
     +  command="DESCRIPTIVES" match=yes exact=yes missing_words=-1
     +match: DESCRIPTIVES, missing_words=-1
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([two words without prefix match])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test 'DO IF' 'DO REPEAT' , 'DO IF' 'DO REPEAT' 'DO 
REP' 'DO OTHER' 'D IF' 'DO I' DO],
     +  [0], [dnl
     +string="DO IF":
     +  command="DO IF" match=yes exact=yes missing_words=0
     +  command="DO REPEAT" match=no
     +match: DO IF, missing_words=0
     +
     +string="DO REPEAT":
     +  command="DO IF" match=no
     +  command="DO REPEAT" match=yes exact=yes missing_words=0
     +match: DO REPEAT, missing_words=0
     +
     +string="DO REP":
     +  command="DO IF" match=no
     +  command="DO REPEAT" match=yes exact=no missing_words=0
     +match: DO REPEAT, missing_words=0
     +
     +string="DO OTHER":
     +  command="DO IF" match=no
     +  command="DO REPEAT" match=no
     +match: none, missing_words=0
     +
     +string="D IF":
     +  command="DO IF" match=no
     +  command="DO REPEAT" match=no
     +match: none, missing_words=0
     +
     +string="DO I":
     +  command="DO IF" match=no
     +  command="DO REPEAT" match=no
     +match: none, missing_words=0
     +
     +string="DO":
     +  command="DO IF" match=yes exact=yes missing_words=1
     +  command="DO REPEAT" match=yes exact=yes missing_words=1
     +match: none, missing_words=1
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([two words with prefix match])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test GET 'GET DATA' , GET 'GET TYPE' 'GET DAT' 
'GET DATA'],
     +  [0], [dnl
     +string="GET":
     +  command="GET" match=yes exact=yes missing_words=0
     +  command="GET DATA" match=yes exact=yes missing_words=1
     +match: none, missing_words=1
     +
     +string="GET TYPE":
     +  command="GET" match=yes exact=yes missing_words=-1
     +  command="GET DATA" match=no
     +match: GET, missing_words=-1
     +
     +string="GET DAT":
     +  command="GET" match=yes exact=yes missing_words=-1
     +  command="GET DATA" match=yes exact=no missing_words=0
     +match: GET DATA, missing_words=0
     +
     +string="GET DATA":
     +  command="GET" match=yes exact=yes missing_words=-1
     +  command="GET DATA" match=yes exact=yes missing_words=0
     +match: GET DATA, missing_words=0
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([ambiguous single-word names])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test CASEPLOT CASESTOVARS , CAS Case CaseP CaseS], 
[0],
     +  [dnl
     +string="CAS":
     +  command="CASEPLOT" match=yes exact=no missing_words=0
     +  command="CASESTOVARS" match=yes exact=no missing_words=0
     +match: none, missing_words=0
     +
     +string="Case":
     +  command="CASEPLOT" match=yes exact=no missing_words=0
     +  command="CASESTOVARS" match=yes exact=no missing_words=0
     +match: none, missing_words=0
     +
     +string="CaseP":
     +  command="CASEPLOT" match=yes exact=no missing_words=0
     +  command="CASESTOVARS" match=no
     +match: CASEPLOT, missing_words=0
     +
     +string="CaseS":
     +  command="CASEPLOT" match=no
     +  command="CASESTOVARS" match=yes exact=no missing_words=0
     +match: CASESTOVARS, missing_words=0
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([ambiguous two-word names])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test VARCOMP VARSTOCASES 'VARIABLE ATTRIBUTE' , 
VAR VARC VARS VARI 'VAR ATT'],
     +  [0], [dnl
     +string="VAR":
     +  command="VARCOMP" match=yes exact=no missing_words=0
     +  command="VARSTOCASES" match=yes exact=no missing_words=0
     +  command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=1
     +match: none, missing_words=1
     +
     +string="VARC":
     +  command="VARCOMP" match=yes exact=no missing_words=0
     +  command="VARSTOCASES" match=no
     +  command="VARIABLE ATTRIBUTE" match=no
     +match: VARCOMP, missing_words=0
     +
     +string="VARS":
     +  command="VARCOMP" match=no
     +  command="VARSTOCASES" match=yes exact=no missing_words=0
     +  command="VARIABLE ATTRIBUTE" match=no
     +match: VARSTOCASES, missing_words=0
     +
     +string="VARI":
     +  command="VARCOMP" match=no
     +  command="VARSTOCASES" match=no
     +  command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=1
     +match: none, missing_words=1
     +
     +string="VAR ATT":
     +  command="VARCOMP" match=yes exact=no missing_words=-1
     +  command="VARSTOCASES" match=yes exact=no missing_words=-1
     +  command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=0
     +match: VARIABLE ATTRIBUTE, missing_words=0
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([numbers and punctuation])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test T-TEST 2SLS LIST , T-TEST 'T - Test' 2SLS '2 
SLS' List],
     +  [0], [dnl
     +string="T-TEST":
     +  command="T-TEST" match=yes exact=yes missing_words=0
     +  command="2SLS" match=no
     +  command="LIST" match=no
     +match: T-TEST, missing_words=0
     +
     +string="T - Test":
     +  command="T-TEST" match=yes exact=yes missing_words=0
     +  command="2SLS" match=no
     +  command="LIST" match=no
     +match: T-TEST, missing_words=0
     +
     +string="2SLS":
     +  command="T-TEST" match=no
     +  command="2SLS" match=yes exact=yes missing_words=0
     +  command="LIST" match=no
     +match: 2SLS, missing_words=0
     +
     +string="2 SLS":
     +  command="T-TEST" match=no
     +  command="2SLS" match=yes exact=yes missing_words=0
     +  command="LIST" match=no
     +match: 2SLS, missing_words=0
     +
     +string="List":
     +  command="T-TEST" match=no
     +  command="2SLS" match=no
     +  command="LIST" match=yes exact=yes missing_words=0
     +match: LIST, missing_words=0
     +])
     +AT_CLEANUP
     +
     +AT_SETUP([off by more than one word])
     +AT_KEYWORDS([command name matching])
     +AT_CHECK([command-name-test 'a b c' , a 'a b' 'a b c' 'a b c d' 'a b c d 
e'],
     +  [0], [dnl
     +string="a":
     +  command="a b c" match=yes exact=yes missing_words=2
     +match: none, missing_words=1
     +
     +string="a b":
     +  command="a b c" match=yes exact=yes missing_words=1
     +match: none, missing_words=1
     +
     +string="a b c":
     +  command="a b c" match=yes exact=yes missing_words=0
     +match: a b c, missing_words=0
     +
     +string="a b c d":
     +  command="a b c" match=yes exact=yes missing_words=-1
     +match: a b c, missing_words=-1
     +
     +string="a b c d e":
     +  command="a b c" match=yes exact=yes missing_words=-2
     +match: a b c, missing_words=-2
     +])
     +AT_CLEANUP
     -- 
     1.7.1
     
     
     _______________________________________________
     pspp-dev mailing list
     address@hidden
     http://lists.gnu.org/mailman/listinfo/pspp-dev

-- 
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.


Attachment: signature.asc
Description: Digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]