Index: configure.ac
===================================================================
--- configure.ac (revision 1011)
+++ configure.ac (working copy)
@@ -162,6 +162,8 @@
fi
fi
+m4_include([m4/ax_path_lib_pcre.m4]) AX_PATH_LIB_PCRE([])
+
# check if rdtsc (read CPU cycle counter is available.
# This is expected only on Intel CPUs
AC_MSG_CHECKING([whether CPU has rdtsc (read CPU cycle counter) opcode])
Index: m4/ax_path_lib_pcre.m4
===================================================================
--- m4/ax_path_lib_pcre.m4 (nonexistent)
+++ m4/ax_path_lib_pcre.m4 (working copy)
@@ -0,0 +1,90 @@
+# ===========================================================================
+# https://www.gnu.org/software/autoconf-archive/ax_path_lib_pcre.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_PATH_LIB_PCRE [(A/NA)]
+#
+# DESCRIPTION
+#
+# check for pcre lib and set PCRE_LIBS and PCRE_CFLAGS accordingly.
+#
+# also provide --with-pcre option that may point to the $prefix of the
+# pcre installation - the macro will check $pcre/include and $pcre/lib to
+# contain the necessary files.
+#
+# the usual two ACTION-IF-FOUND / ACTION-IF-NOT-FOUND are supported and
+# they can take advantage of the LIBS/CFLAGS additions.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Guido U. Draheim
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see .
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 8
+
+AC_DEFUN([AX_PATH_LIB_PCRE],[dnl
+AC_MSG_CHECKING([lib pcre])
+AC_ARG_WITH(pcre,
+[ --with-pcre[[=prefix]] compile xmlpcre part (via libpcre check)],,
+ with_pcre="yes")
+if test ".$with_pcre" = ".no" ; then
+ AC_MSG_RESULT([disabled])
+ m4_ifval($2,$2)
+else
+ AC_MSG_RESULT([(testing)])
+ AC_CHECK_LIB(pcre2-32, pcre2_compile_32)
+ if test "$ac_cv_lib_pcre2-32_pcre2_compile_32" = "yes" ; then
+ PCRE_LIBS="-lpcre2-32"
+ AC_MSG_CHECKING([lib pcre])
+ AC_MSG_RESULT([$PCRE_LIBS])
+ m4_ifval($1,$1)
+ else
+ OLDLDFLAGS="$LDFLAGS" ; LDFLAGS="$LDFLAGS -L$with_pcre/lib"
+ OLDCPPFLAGS="$CPPFLAGS" ; CPPFLAGS="$CPPFLAGS -I$with_pcre/include"
+ AC_CHECK_LIB(pcre2-32, pcre2_match_data_create_from_pattern_32)
+ CPPFLAGS="$OLDCPPFLAGS"
+ LDFLAGS="$OLDLDFLAGS"
+ if test "$ac_cv_lib_pcre2-32_pcre2_match_data_create_from_pattern_32" = "yes" ; then
+ AC_MSG_RESULT(.setting PCRE_LIBS -L$with_pcre/lib -lpcre2-32)
+ PCRE_LIBS="-L$with_pcre/lib -lpcre2-32"
+ test -d "$with_pcre/include" && PCRE_CFLAGS="-I$with_pcre/include"
+ AC_MSG_CHECKING([lib pcre])
+ AC_MSG_RESULT([$PCRE_LIBS])
+ m4_ifval($1,$1)
+ else
+ AC_MSG_CHECKING([lib pcre])
+ AC_MSG_RESULT([no (WARNING)])
+ m4_ifval($2,$2)
+ fi
+ fi
+fi
+AC_SUBST([PCRE_LIBS])
+AC_SUBST([PCRE_CFLAGS])
+])
Index: src/Id.cc
===================================================================
--- src/Id.cc (revision 1011)
+++ src/Id.cc (working copy)
@@ -37,6 +37,7 @@
#include "QuadFunction.hh"
#include "Quad_DLX.hh"
#include "Quad_FX.hh"
+#include "Quad_RE.hh"
#include "Quad_SQL.hh"
#include "Quad_SVx.hh"
#include "Quad_TF.hh"
Index: src/Id.def
===================================================================
--- src/Id.def (revision 1011)
+++ src/Id.def (working copy)
@@ -201,6 +201,7 @@
qf( SVS , "竡百VS" , )
qv( SYL , "竡百YL" , )
pp( USER_SYMBOL , --- , )
+qf( RE , "竡紐E" , )
pp( STOP_LINE , --- , )
qf( STOP , "竡百TOP" , )
qf( SQL , "竡百QL" , )
Index: src/Makefile.am
===================================================================
--- src/Makefile.am (revision 1011)
+++ src/Makefile.am (working copy)
@@ -86,10 +86,12 @@
Quad_DLX.cc Quad_DLX.hh \
Quad_FIO.cc Quad_FIO.hh \
Quad_FX.cc Quad_FX.hh \
+Quad_RE.cc Quad_RE.hh \
Quad_RL.cc Quad_RL.hh \
Quad_SQL.cc Quad_SQL.hh \
Quad_SVx.cc Quad_SVx.hh \
Quad_TF.cc Quad_TF.hh \
+Regexp.cc Regexp.hh \
Parallel.cc Parallel.hh \
Performance.cc Performance.def Performance.hh \
RealCell.cc RealCell.hh \
Index: src/QuadFunction.cc
===================================================================
--- src/QuadFunction.cc (revision 1011)
+++ src/QuadFunction.cc (working copy)
@@ -36,6 +36,7 @@
#include "PrintOperator.hh"
#include "QuadFunction.hh"
#include "Quad_FX.hh"
+#include "Quad_RE.hh"
#include "Quad_SQL.hh"
#include "Quad_TF.hh"
#include "Tokenizer.hh"
Index: src/Quad_RE.cc
===================================================================
--- src/Quad_RE.cc (nonexistent)
+++ src/Quad_RE.cc (working copy)
@@ -0,0 +1,187 @@
+#include "Quad_RE.hh"
+#include "Workspace.hh"
+#include "PointerCell.hh"
+
+#include "Regexp.hh"
+
+class Flags
+{
+public:
+ Flags(const UCS_string &flags_in);
+ int get_compflags() const { return flags; }
+ bool get_error_on_no_match() const { return error_on_no_match; }
+ bool get_result_bitmap() const { return result_bitmap; }
+
+private:
+ int flags;
+ bool error_on_no_match;
+ bool result_bitmap;
+};
+
+Flags::Flags(const UCS_string &flags_string) : flags(0), error_on_no_match(false), result_bitmap(false)
+{
+ int result = 0;
+ UCS_string::iterator i = flags_string.begin();
+ while(i.more()) {
+ Unicode ch = i.next();
+ switch(static_cast(ch)) {
+ case 'i':
+ result |= PCRE2_CASELESS;
+ break;
+ case 's':
+ result |= PCRE2_DOTALL;
+ break;
+ case 'x':
+ result |= PCRE2_EXTENDED;
+ break;
+ case 'm':
+ result |= PCRE2_MULTILINE;
+ break;
+ case 'E':
+ error_on_no_match = true;
+ break;
+ case 'B':
+ result_bitmap = true;
+ break;
+ default:
+ MORE_ERROR() << "Unknown regexp flag: " << ch;
+ VALUE_ERROR;
+ }
+ }
+ flags = result;
+}
+
+
+Quad_RE Quad_RE::_fun;
+Quad_RE *Quad_RE::fun = &Quad_RE::_fun;
+
+Quad_RE::Quad_RE() : QuadFunction(TOK_Quad_RE)
+{
+}
+
+Token Quad_RE::eval_AB(Value_P A, Value_P B)
+{
+ return eval_AXB(A, Str0(LOC), B);
+}
+
+static Value_P fill_regex_results(Value_P &result, const Regexp ®exp, const Flags &flags, const UCS_string &matched)
+{
+ if(flags.get_result_bitmap()) {
+ vector> results;
+ ShapeItem pos = 0;
+ bool end = false;
+ while(!end && pos < matched.size()) {
+ unique_ptr match(regexp.match(matched, static_cast(pos)));
+ if(match->is_match()) {
+ const PCRE2_SIZE *ovector = match->get_ovector();
+ results.push_back(pair(ovector[0], ovector[1]));
+ pos = ovector[1];
+ }
+ else {
+ end = true;
+ }
+ }
+
+ Shape shape(matched.size());
+ Value_P result_value(shape, LOC);
+ ShapeItem w = 0;
+ int match_id = 1;
+ for(vector>::iterator i = results.begin() ; i != results.end() ; i++) {
+ while(w < i->first) {
+ new (result_value->next_ravel()) IntCell(0);
+ w++;
+ }
+ while(w < i->second) {
+ new (result_value->next_ravel()) IntCell(match_id);
+ w++;
+ }
+ match_id++;
+ }
+ while(w < matched.size()) {
+ new (result_value->next_ravel()) IntCell(0);
+ w++;
+ }
+ result_value->check_value(LOC);
+ return result_value;
+ }
+ else {
+ unique_ptr match(regexp.match(matched, 0));
+ if(!match->is_match()) {
+ if(flags.get_error_on_no_match()) {
+ MORE_ERROR() << "No match";
+ DOMAIN_ERROR;
+ }
+ else {
+ return Idx0(LOC);
+ }
+ }
+ else {
+ if(match->num_matches() == 1) {
+ Value_P res = Value_P(match->matched_string(), LOC);
+ return res;
+ }
+ else {
+ vector strings = match->matched_string_list();
+ Shape shape(strings.size());
+ Value_P result_value(shape, LOC);
+ for(vector::iterator i = strings.begin() ; i != strings.end() ; i++) {
+ Value_P field_value(*i, LOC);
+ field_value->check_value(LOC);
+ new (result_value->next_ravel()) PointerCell(field_value, result_value.getref());
+ }
+ result_value->check_value(LOC);
+ return result_value;
+ }
+ }
+ }
+}
+
+Token
+Quad_RE::eval_AXB(const Value_P A, const Value_P X, const Value_P B)
+{
+ if(!A->is_char_string()) {
+ MORE_ERROR() << "Regexp argument must be a string value";
+ VALUE_ERROR;
+ }
+
+ Flags flags(X->get_UCS_ravel());
+ Regexp regexp(A->get_UCS_ravel(), flags.get_compflags());
+
+ const Shape &shape = B->get_shape();
+ if(shape.get_rank() == 0) {
+ return Token(TOK_APL_VALUE1, Idx0(LOC));
+ }
+ else if(B->is_char_string()) {
+ Value_P result = fill_regex_results(result, regexp, flags, B->get_UCS_ravel());
+ return Token(TOK_APL_VALUE1, result);
+ }
+ else {
+ const Shape &shape = B->get_shape();
+ Value_P result(shape, LOC);
+ for(ShapeItem i = 0 ; i < shape.get_volume() ; i++) {
+ const Cell &cell = B->get_ravel(i);
+ Value_P value = cell.to_value(LOC);
+ if(!value->is_char_string()) {
+ MORE_ERROR() << "Cell does not contain a string";
+ DOMAIN_ERROR;
+ }
+
+ Value_P result_value = fill_regex_results(result, regexp, flags, value->get_UCS_ravel());
+ new (result->next_ravel()) PointerCell(result_value, result.getref());
+ }
+ result->check_value(LOC);
+ return Token(TOK_APL_VALUE1, result);
+ }
+}
+
+Token
+Quad_RE::eval_B(Value_P B)
+{
+ VALENCE_ERROR;
+}
+
+Token
+Quad_RE::eval_XB(Value_P X, Value_P B)
+{
+ VALENCE_ERROR;
+}
Index: src/Quad_RE.hh
===================================================================
--- src/Quad_RE.hh (nonexistent)
+++ src/Quad_RE.hh (working copy)
@@ -0,0 +1,54 @@
+/*
+ This file is part of GNU APL, a free implementation of the
+ ISO/IEC Standard 13751, "Programming Language APL, Extended"
+
+ Copyright (C) 2008-2016 Dr. Jテシrgen Sauermann
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#ifndef __Quad_RE_DEFINED__
+#define __Quad_RE_DEFINED__
+
+#include "QuadFunction.hh"
+#include "Value.hh"
+#include "Simple_string.hh"
+
+class Quad_RE : public QuadFunction
+{
+public:
+ /// Constructor.
+ Quad_RE();
+
+ static Quad_RE * fun; ///< Built-in function.
+ static Quad_RE _fun; ///< Built-in function.
+
+protected:
+ /// overloaded Function::eval_AB().
+ Token eval_AB(const Value_P A, const Value_P B);
+
+ /// overloaded Function::eval_AXB().
+ Token eval_AXB(const Value_P A, const Value_P X, const Value_P B);
+
+ /// overloaded Function::eval_B().
+ Token eval_B(Value_P B);
+
+ /// overloaded Function::eval_XB().
+ Token eval_XB(Value_P X, Value_P B);
+
+// virtual Token eval_AB(Value_P A, Value_P B);
+
+};
+
+#endif
Index: src/Regexp.cc
===================================================================
--- src/Regexp.cc (nonexistent)
+++ src/Regexp.cc (working copy)
@@ -0,0 +1,115 @@
+#include "Workspace.hh"
+#include "Regexp.hh"
+
+static const PCRE2_UCHAR32 *ucs_to_codepoints(const UCS_string &string)
+{
+ int size = string.size();
+ PCRE2_UCHAR32 *buf = new PCRE2_UCHAR32[size];
+ PCRE2_UCHAR32 *p = buf;
+ UCS_string::iterator i = string.begin();
+ while(i.more()) {
+ *p++ = i.next();
+ }
+ return buf;
+}
+
+static UCS_string make_ucs_string(PCRE2_UCHAR32 *buf)
+{
+ UCS_string result;
+ PCRE2_UCHAR32 *p = buf;
+ while(*p != 0) {
+ result.append(static_cast(*p++));
+ }
+ return result;
+}
+
+RegexpMatch::RegexpMatch(pcre2_code *code, const UCS_string &matched, PCRE2_SIZE start)
+{
+ matched_ucs = ucs_to_codepoints(matched);
+ match_data = pcre2_match_data_create_from_pattern_32(code, NULL);
+ match_result = pcre2_match_32(code, matched_ucs, matched.size(), start, 0, match_data, NULL);
+ if(match_result == 0) {
+ MORE_ERROR() << "Match buffer too small";
+ FIXME;
+ }
+ else if(match_result > 0) {
+ ovector = pcre2_get_ovector_pointer_32(match_data);
+ }
+ else {
+ ovector = NULL;
+ }
+}
+
+RegexpMatch::~RegexpMatch()
+{
+ delete[] matched_ucs;
+ pcre2_match_data_free(match_data);
+}
+
+bool RegexpMatch::is_match() const
+{
+ return match_result > 0;
+}
+
+int RegexpMatch::num_matches() const
+{
+ if(match_result < 0) {
+ MORE_ERROR() << "Attempt to call num_matches without matches";
+ FIXME;
+ }
+ return match_result;
+}
+
+UCS_string RegexpMatch::matched_string() const
+{
+ const PCRE2_SIZE *ovector = get_ovector();
+ UCS_string result(reinterpret_cast(matched_ucs + ovector[0]), ovector[1] - ovector[0]);
+ return result;
+}
+
+vector RegexpMatch::matched_string_list() const
+{
+ const PCRE2_SIZE *ovector = get_ovector();
+ vector result;
+ for(int i = 1 ; i < match_result ; i++) {
+ PCRE2_SIZE start = ovector[i * 2];
+ PCRE2_SIZE end = ovector[i * 2 + 1];
+ result.push_back(UCS_string(reinterpret_cast(matched_ucs + start), end - start));
+ }
+ return result;
+}
+
+Regexp::Regexp(const UCS_string &pattern, int flags)
+{
+ const PCRE2_UCHAR32 *pattern_ucs = ucs_to_codepoints(pattern);
+
+ int error_code;
+ PCRE2_SIZE error_offset;
+
+ code = pcre2_compile_32(pattern_ucs, pattern.size(), PCRE2_NO_UTF_CHECK | flags, &error_code, &error_offset, NULL);
+ delete[] pattern_ucs;
+ if(code == NULL) {
+ PCRE2_UCHAR32 buf[256];
+ pcre2_get_error_message_32(error_code, buf, sizeof(buf));
+ UCS_string error_message = make_ucs_string(buf);
+ MORE_ERROR() << "Error compiling regex at offset: " << error_offset << ": " << error_message;
+ VALUE_ERROR;
+ }
+}
+
+Regexp::~Regexp()
+{
+ pcre2_code_free(code);
+}
+
+RegexpMatch *Regexp::match(const UCS_string &match, PCRE2_SIZE size) const
+{
+ return new RegexpMatch(code, match, size);
+}
+
+int Regexp::expression_count() const
+{
+ uint32_t result;
+ pcre2_pattern_info(code, PCRE2_INFO_CAPTURECOUNT, &result);
+ return result;
+}
Index: src/Regexp.hh
===================================================================
--- src/Regexp.hh (nonexistent)
+++ src/Regexp.hh (working copy)
@@ -0,0 +1,40 @@
+#ifndef __Regexp__DEFINED__
+#define __Regexp__DEFINED__
+
+#include "UCS_string.hh"
+#include
+
+#define PCRE2_CODE_UNIT_WIDTH 32
+#include
+
+class RegexpMatch
+{
+public:
+ RegexpMatch(pcre2_code *code, const UCS_string &, PCRE2_SIZE start);
+ virtual ~RegexpMatch();
+ bool is_match() const;
+ int num_matches() const;
+ UCS_string matched_string() const;
+ const PCRE2_SIZE *get_ovector() const { return ovector; }
+ vector matched_string_list() const;
+
+private:
+ PCRE2_SIZE *ovector;
+ const PCRE2_UCHAR32 *matched_ucs;
+ pcre2_match_data *match_data;
+ int match_result;
+};
+
+class Regexp
+{
+public:
+ Regexp(const UCS_string &pattern, int flags);
+ virtual ~Regexp();
+ RegexpMatch *match(const UCS_string &match, PCRE2_SIZE size) const;
+ int expression_count() const;
+
+private:
+ pcre2_code *code;
+};
+
+#endif
Index: src/SystemVariable.def
===================================================================
--- src/SystemVariable.def (revision 1011)
+++ src/SystemVariable.def (working copy)
@@ -73,6 +73,7 @@
sf_def(Quad_NA, "NA", "Name Association" )
sf_def(Quad_NC, "NC", "Name Class" )
sf_def(Quad_NL, "NL", "Name List" )
+ sf_def(Quad_RE, "RE", "Regular expression" )
sf_def(Quad_SI, "SI", "State Indicator" )
sf_def(Quad_SQL, "SQL", "SQL functions" )
sf_def(Quad_SVC, "SVC", "Shared Variable Control" )
@@ -86,6 +87,3 @@
sf_def(Quad_UCS, "UCS", "Universal Char Set (Unicode)" )
# undef sf_def
#endif
-
-
-
Index: src/Token.def
===================================================================
--- src/Token.def (revision 1011)
+++ src/Token.def (working copy)
@@ -116,6 +116,7 @@
TD(TOK_Quad_EC , TC_FUN1 , TV_FUN , ID::Quad_EC )
TD(TOK_Quad_ENV , TC_FUN1 , TV_FUN , ID::Quad_ENV )
TD(TOK_Quad_EX , TC_FUN1 , TV_FUN , ID::Quad_EX )
+TD(TOK_Quad_RE , TC_FUN2 , TV_FUN , ID::Quad_RE )
TD(TOK_Quad_SQL , TC_FUN2 , TV_FUN , ID::Quad_SQL )
TD(TOK_Quad_SVQ , TC_FUN1 , TV_FUN , ID::Quad_SVQ )
TD(TOK_Quad_SVR , TC_FUN1 , TV_FUN , ID::Quad_SVR )
Index: src/Workspace.hh
===================================================================
--- src/Workspace.hh (revision 1011)
+++ src/Workspace.hh (working copy)
@@ -28,6 +28,7 @@
#include "Quad_CR.hh"
#include "Quad_DLX.hh"
#include "Quad_FIO.hh"
+#include "Quad_RE.hh"
#include "Quad_RL.hh"
#include "Quad_SVx.hh"
#include "ScalarFunction.hh"