>From 80b45f78026f1cda7f3ca17fd1bf226c6f6710fe Mon Sep 17 00:00:00 2001 From: Mario Domenech Goulart Date: Mon, 8 Sep 2014 21:54:24 -0300 Subject: [PATCH] Remove srfi-13 --- LICENSE | 60 +- README | 1 - chicken-bug.scm | 4 +- chicken-install.scm | 10 +- chicken-profile.scm | 4 +- chicken-uninstall.scm | 9 +- csc.scm | 12 +- defaults.make | 2 +- distribution/manifest | 6 - eval.scm | 2 +- manual/Modules | 1 - manual/Supported language | 1 - manual/Unit srfi-13 | 1351 ---------------------- manual/Unit srfi-14 | 2 +- manual/Unit srfi-4 | 2 +- manual/Unit utils | 2 +- rules.make | 4 +- scripts/compile-all | 2 +- scripts/mini-salmonella.scm | 2 +- setup-api.scm | 11 +- setup-download.scm | 49 +- setup.defaults | 2 +- srfi-13.import.scm | 133 --- srfi-13.scm | 2065 ---------------------------------- tests/reexport-m1.scm | 4 +- tests/reverser/tags/1.0/reverser.scm | 11 +- tests/reverser/tags/1.1/reverser.scm | 11 +- tests/runtests.bat | 4 - tests/runtests.sh | 5 +- tests/srfi-13-tests.scm | 776 ------------- types.db | 177 +-- utils.scm | 7 +- 32 files changed, 106 insertions(+), 4626 deletions(-) delete mode 100644 manual/Unit srfi-13 delete mode 100644 srfi-13.import.scm delete mode 100644 srfi-13.scm delete mode 100644 tests/srfi-13-tests.scm diff --git a/LICENSE b/LICENSE index ec57517..158bdb4 100644 --- a/LICENSE +++ b/LICENSE @@ -118,7 +118,7 @@ srfi-1.scm: this code as long as you do not remove this copyright notice or hold me liable for its use. Please send bug reports to address@hidden -srfi-13.scm, srfi-14.scm: +srfi-14.scm: Copyright (c) 1988-1994 Massachusetts Institute of Technology. Copyright (c) 1988-1995 Massachusetts Institute of Technology @@ -151,33 +151,6 @@ srfi-13.scm, srfi-14.scm: promotional, or sales literature without prior written consent from MIT in each case. -srfi-13.scm: - - Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. The name of the authors may not be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, - INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - === Public domain / unencumbered Since we would still like to acknowledge all the useful contributions @@ -254,34 +227,3 @@ tests/r4rstest.scm: Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA; or view http://swissnet.ai.mit.edu/~jaffer/GPL.html - -tests/srfi-13-tests.scm: - - Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the authors nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README b/README index ec1d6e4..8a90bb5 100644 --- a/README +++ b/README @@ -299,7 +299,6 @@ | | |-- setup-download.import.so | | |-- setup-download.so | | |-- srfi-1.import.so - | | |-- srfi-13.import.so | | |-- srfi-14.import.so | | |-- srfi-18.import.so | | |-- srfi-4.import.so diff --git a/chicken-bug.scm b/chicken-bug.scm index f02219c..ef2bd2b 100644 --- a/chicken-bug.scm +++ b/chicken-bug.scm @@ -24,7 +24,7 @@ ; POSSIBILITY OF SUCH DAMAGE. -(require-extension srfi-13 posix tcp data-structures utils extras) +(require-extension posix tcp data-structures utils extras) (define-constant +bug-report-file+ "chicken-bug-report.~a-~a-~a") @@ -101,7 +101,7 @@ EOF (let loop ((data '())) (let ((ln (read-line))) (cond ((or (eof-object? ln) (string=? "." ln)) - (string-concatenate-reverse data) ) + (string-intersperse (reverse data) "")) (else (loop (cons ln data))))))) (define (justify n) diff --git a/chicken-install.scm b/chicken-install.scm index 2ef6ef4..6021469 100644 --- a/chicken-install.scm +++ b/chicken-install.scm @@ -25,12 +25,12 @@ (require-library setup-download setup-api) -(require-library srfi-1 posix data-structures utils irregex ports extras srfi-13 files) +(require-library srfi-1 posix data-structures utils irregex ports extras files) (module main () (import scheme chicken srfi-1 posix data-structures utils irregex ports extras - srfi-13 files) + files) (import setup-download setup-api) (import foreign) @@ -47,7 +47,6 @@ "ports.import.so" "files.import.so" "posix.import.so" - "srfi-13.import.so" "srfi-69.import.so" "extras.import.so" "srfi-14.import.so" @@ -370,7 +369,7 @@ (next))))))) (define (make-replace-extension-question e+d+v upgrade) - (string-concatenate + (string-intersperse (append (list "The following installed extensions are outdated, because `" (car e+d+v) @@ -392,7 +391,8 @@ " -> " (cdr e) ")" #\newline) ))) upgrade) - '("\nDo you want to replace the existing extensions?")))) + '("\nDo you want to replace the existing extensions?")) + "")) (define (override-version egg) (let ((name (string->symbol (if (pair? egg) (car egg) egg)))) diff --git a/chicken-profile.scm b/chicken-profile.scm index f38b257..46a8637 100644 --- a/chicken-profile.scm +++ b/chicken-profile.scm @@ -28,7 +28,7 @@ (declare (block) (uses srfi-1 - srfi-13 + data-structures srfi-69 posix utils)) @@ -225,7 +225,7 @@ EOF (list 0 0 0 0 0) (cons headers data))]) (define (print-row row) - (print (string-join (map format-string row column-widths alignments) spacer))) + (print (string-intersperse (map format-string row column-widths alignments) spacer))) (print-row headers) (print (make-string (+ (reduce + 0 column-widths) (* spacing (- (length alignments) 1))) diff --git a/chicken-uninstall.scm b/chicken-uninstall.scm index 9f19907..b008e93 100644 --- a/chicken-uninstall.scm +++ b/chicken-uninstall.scm @@ -26,14 +26,14 @@ (require-library setup-api - srfi-1 posix data-structures utils ports irregex srfi-13 files) + srfi-1 posix data-structures utils ports irregex files) (module main () (import scheme chicken foreign) (import setup-api) - (import srfi-1 posix data-structures utils ports irregex srfi-13 files) + (import srfi-1 posix data-structures utils ports irregex files) (define-foreign-variable C_TARGET_LIB_HOME c-string) (define-foreign-variable C_BINARY_VERSION int) @@ -70,11 +70,12 @@ (fini 1) (signal ex)) (yes-or-no? - (string-concatenate + (string-intersperse (append '("About to delete the following extensions:\n\n") (map (cut string-append " " <> "\n") eggs) - '("\nDo you want to proceed?"))) + '("\nDo you want to proceed?")) + "") default: "no" abort: (abort-setup)))) diff --git a/csc.scm b/csc.scm index 59556aa..d38cc27 100644 --- a/csc.scm +++ b/csc.scm @@ -27,7 +27,7 @@ (declare (block) - (uses data-structures ports srfi-1 srfi-13 utils files extras)) + (uses data-structures ports srfi-1 utils files extras)) (define-foreign-variable INSTALL_BIN_HOME c-string "C_INSTALL_BIN_HOME") (define-foreign-variable INSTALL_CC c-string "C_INSTALL_CC") @@ -1031,6 +1031,16 @@ EOF (string-append "\"" (string-translate* s '(("\"" . "\\\""))) "\"") s) ) ) +;; Simpler replacement for SRFI-13's string-any +(define (string-any criteria s) + (let ((end (string-length s))) + (let lp ((i 0)) + (let ((c (string-ref s i)) + (i1 (+ i 1))) + (if (= i1 end) (criteria c) + (or (criteria c) + (lp i1))))))) + (define (quote-option x) (cond ((string-any (cut char=? #\" <>) x) x) ((string-any (lambda (c) diff --git a/defaults.make b/defaults.make index 25b176f..589306c 100644 --- a/defaults.make +++ b/defaults.make @@ -275,7 +275,7 @@ CHICKEN_INSTALL_PROGRAM = $(PROGRAM_PREFIX)chicken-install$(PROGRAM_SUFFIX) CHICKEN_UNINSTALL_PROGRAM = $(PROGRAM_PREFIX)chicken-uninstall$(PROGRAM_SUFFIX) CHICKEN_STATUS_PROGRAM = $(PROGRAM_PREFIX)chicken-status$(PROGRAM_SUFFIX) CHICKEN_BUG_PROGRAM = $(PROGRAM_PREFIX)chicken-bug$(PROGRAM_SUFFIX) -IMPORT_LIBRARIES = chicken lolevel srfi-1 srfi-4 data-structures ports files posix srfi-13 srfi-69 extras srfi-14 tcp foreign srfi-18 utils csi irregex +IMPORT_LIBRARIES = chicken lolevel srfi-1 srfi-4 data-structures ports files posix srfi-69 extras srfi-14 tcp foreign srfi-18 utils csi irregex IMPORT_LIBRARIES += setup-api setup-download ifdef STATICBUILD diff --git a/distribution/manifest b/distribution/manifest index 1188267..6f5747d 100644 --- a/distribution/manifest +++ b/distribution/manifest @@ -39,7 +39,6 @@ profiler.c scheduler.c srfi-69.c srfi-1.c -srfi-13.c srfi-14.c srfi-18.c srfi-4.c @@ -95,7 +94,6 @@ runtime.c scheduler.scm srfi-69.scm srfi-1.scm -srfi-13.scm srfi-14.scm srfi-18.scm srfi-4.scm @@ -126,7 +124,6 @@ tests/runtests.sh tests/runtests.bat tests/runbench.sh tests/srfi-4-tests.scm -tests/srfi-13-tests.scm tests/srfi-18-signal-test.scm tests/srfi-14-tests.scm tests/srfi-45-tests.scm @@ -267,7 +264,6 @@ srfi-1.import.scm srfi-4.import.scm data-structures.import.scm posix.import.scm -srfi-13.import.scm srfi-69.import.scm extras.import.scm irregex.import.scm @@ -280,7 +276,6 @@ srfi-1.import.c srfi-4.import.c data-structures.import.c posix.import.c -srfi-13.import.c srfi-69.import.c extras.import.c irregex.import.c @@ -361,7 +356,6 @@ manual-html/Unit ports.html manual-html/Unit posix.html manual-html/Unit irregex.html manual-html/Unit srfi-1.html -manual-html/Unit srfi-13.html manual-html/Unit srfi-14.html manual-html/Unit srfi-18.html manual-html/Unit srfi-4.html diff --git a/eval.scm b/eval.scm index 3c4777d..cfb0194 100644 --- a/eval.scm +++ b/eval.scm @@ -61,7 +61,7 @@ (define-foreign-variable install-lib-name c-string "C_INSTALL_LIB_NAME") (define ##sys#core-library-modules - '(extras lolevel utils files tcp irregex posix srfi-1 srfi-4 srfi-13 + '(extras lolevel utils files tcp irregex posix srfi-1 srfi-4 srfi-14 srfi-18 srfi-69 data-structures ports)) (define ##sys#core-syntax-modules diff --git a/manual/Modules b/manual/Modules index b4048fc..9a2b95c 100644 --- a/manual/Modules +++ b/manual/Modules @@ -280,7 +280,6 @@ Everything from the {{library}}, {{eval}} and {{expand}} library units. [module] regex [module] srfi-1 [module] srfi-4 - [module] srfi-13 [module] srfi-14 [module] srfi-18 [module] srfi-69 diff --git a/manual/Supported language b/manual/Supported language index 7905da0..849a26b 100644 --- a/manual/Supported language +++ b/manual/Supported language @@ -23,7 +23,6 @@ * [[Unit irregex]] Regular expressions * [[Unit srfi-1]] List Library * [[Unit srfi-4]] Homogeneous numeric vectors -* [[Unit srfi-13]] String library * [[Unit srfi-14]] Character set library * [[Unit srfi-18]] multithreading * [[Unit srfi-69]] Hashtable Library diff --git a/manual/Unit srfi-13 b/manual/Unit srfi-13 deleted file mode 100644 index 4ba0c97..0000000 --- a/manual/Unit srfi-13 +++ /dev/null @@ -1,1351 +0,0 @@ -[[tags: manual]] - -== Unit srfi-13 - -SRFI 13 (string library). Certain procedures contained in this SRFI, -such as {{string-append}}, are identical to R5RS versions and are -omitted from this document. For full documentation, see the -[[http://srfi.schemers.org/srfi-13/srfi-13.html|original SRFI-13 -document]]. - -On systems that support dynamic loading, the {{srfi-13}} unit can -be made available in the CHICKEN interpreter ({{csi}}) by entering - - -(require-extension srfi-13) - - -The {{string-hash}} and {{string-hash-ci}} procedures are -not provided in this library unit. [[Unit srfi-69]] has -compatible definitions. - -[[toc:]] - -== Notes - -=== Strings are code-point sequences - -This SRFI considers strings simply to be a sequence of "code points" or -character encodings. Operations such as comparison or reversal are always -done code point by code point. - -CHICKEN's native strings are simple byte sequences (not Unicode code points). -Comparison or reversal is done byte-wise. If Unicode semantics are -desired, see the [[/egg/utf8|utf8]] egg. - -=== Case mapping and case-folding - -Upper- and lower-casing characters is complex in super-ASCII encodings. -SRFI 13 makes no attempt to deal with these issues; it uses a simple 1-1 -locale- and context-independent case-mapping, specifically Unicode's 1-1 -case-mappings given in [[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]]. - -On CHICKEN, case-mapping is restricted to operate on ASCII characters. - -=== String equality & string normalisation - -SRFI 13 string equality is simply based upon comparing the encoding -values used for the characters. On CHICKEN, strings are compared -byte-wise. - -=== String inequality - -SRFI 13 string ordering is strictly based upon a -character-by-character comparison of the values used for representing -the string. - -=== Naming conventions - -* Procedures whose names end in "-ci" are case-insensitive variants. -* Procedures whose names end in "!" are side-effecting variants. What values these procedures return is usually not specified. -* The order of common parameters is consistent across the different procedures. -* Left/right/both directionality: Procedures that have left/right directional variants use the following convention: - - - - - -
DirectionSuffix
left-to-right''none''
right-to-left{{-right}}
both{{-both}}
- -=== Shared storage - -CHICKEN does not currently have shared-text substrings, nor does its -implementation of SRFI 13 routines ever return one of the -strings that was passed in as a parameter, as is allowed by the -specification. - -On the other hand, the functionality is present to allow one to write -efficient code ''without'' shared-text substrings. You can write -efficient code that works by passing around start/end ranges indexing -into a string instead of simply building a shared-text substring. - -== Procedure Specification - -In the following procedure specifications: - - -* An S parameter is a string. -* A CHAR parameter is a character. -* START and END parameters are half-open string indices specifying a substring within a string parameter; when optional, they default to 0 and the length of the string, respectively. When specified, it must be the case that 0 <= START <= END <= {{(string-length S)}}, for the corresponding parameter S. They typically restrict a procedure's action to the indicated substring. -* A PRED parameter is a unary character predicate procedure, returning a true/false value when applied to a character. -* A CHAR/CHAR-SET/PRED parameter is a value used to select/search for a character in a string. If it is a character, it is used in an equality test; if it is a character set, it is used as a membership test; if it is a procedure, it is applied to the characters as a test predicate. -* An I parameter is an exact non-negative integer specifying an index into a string. -* LEN and NCHARS parameters are exact non-negative integers specifying a length of a string or some number of characters. -* An OBJ parameter may be any value at all. - -Passing values to procedures with these parameters that do not satisfy -these types is an error. - -Parameters given in square brackets are optional. Unless otherwise noted in -the text describing the procedure, any prefix of these optional parameters -may be supplied, from zero arguments to the full list. When a procedure -returns multiple values, this is shown by listing the return values in -square brackets, as well. So, for example, the procedure with signature - - - halts? F [X INIT-STORE] -> [BOOLEAN INTEGER] - -would take one (F), two (F, X) or three (F, X, INIT-STORE) input -parameters, and return two values, a boolean and an integer. - -A parameter followed by "{{...}}" means zero-or-more elements. So the -procedure with the signature - - - sum-squares X ... -> NUMBER - -takes zero or more arguments (X ...), while the procedure with signature - - - spell-check DOC DICT_1 DICT_2 ... -> STRING-LIST - - -takes two required parameters (DOC and DICT_1) and zero or more optional -parameters (DICT_2 ...). - -If a procedure is said to return "unspecified," this means that nothing -at all is said about what the procedure returns. Such a procedure is not -even required to be consistent from call to call. It is simply required to -return a value (or values) that may be passed to a command continuation, -''e.g.'' as the value of an expression appearing as a non-terminal -subform of a {{begin}} expression. Note that in R5RS, this restricts such -a procedure to returning a single value; non-R5RS systems may not even -provide this restriction. - - -=== Main procedures - -==== Predicates - -(string-null? s) -> boolean
- -Is S the empty string? - -(string-every char/char-set/pred s [start end]) -> value
-(string-any char/char-set/pred s [start end]) -> value
- -Checks to see if the given criteria is true of every / any character in S, -proceeding from left (index START) to right (index END). - -If CHAR/CHAR-SET/PRED is a character, it is tested for equality with the -elements of S. - -If CHAR/CHAR-SET/PRED is a character set, the elements of S are tested for -membership in the set. - -If CHAR/CHAR-SET/PRED is a predicate procedure, it is applied to the -elements of S. The predicate is "witness-generating:" - - -* If {{string-any}} returns true, the returned true value is the one produced by the application of the predicate. -* If {{string-every}} returns true, the returned true value is the one produced by the final application of the predicate to S[END-1]. If {{string-every}} is applied to an empty sequence of characters, it simply returns {{#t}}. - -If {{string-every}} or {{string-any}} apply the predicate to the final -element of the selected sequence (''i.e.'', S[END-1]), that final -application is a tail call. - -The names of these procedures do not end with a question mark -- this is to -indicate that, in the predicate case, they do not return a simple boolean -({{#t}} or {{#f}}), but a general value. - - -==== Constructors - -(string-tabulate proc len) -> string
- -PROC is an integer->char procedure. Construct a string of size LEN by -applying PROC to each index to produce the corresponding string element. -The order in which PROC is applied to the indices is not specified. - - -==== List & string conversion - -(string->list s [start end]) -> char-list
- -{{string->list}} is extended from the R5RS definition to take optional -START/END arguments. - -(reverse-list->string char-list) -> string
- -An efficient implementation of {{(compose list->string reverse)}}: - - - (reverse-list->string '(#\a #\B #\c)) -> "cBa" - -This is a common idiom in the epilog of string-processing loops -that accumulate an answer in a reverse-order list. (See also -{{string-concatenate-reverse}} for the "chunked" variant.) - -(string-join string-list [delimiter grammar]) -> string
- -This procedure is a simple unparser --- it pastes strings together using -the delimiter string. - -The GRAMMAR argument is a symbol that determines how the delimiter is used, -and defaults to {{'infix}}. - - -* {{'infix}} means an infix or separator grammar: insert the delimiter between list elements. An empty list will produce an empty string -- note, however, that parsing an empty string with an infix or separator grammar is ambiguous. Is it an empty list, or a list of one element, the empty string? -* {{'strict-infix}} means the same as {{'infix}}, but will raise an error if given an empty list. -* {{'suffix}} means a suffix or terminator grammar: insert the delimiter after every list element. This grammar has no ambiguities. -* {{'prefix}} means a prefix grammar: insert the delimiter before every list element. This grammar has no ambiguities. - -The delimiter is the string used to delimit elements; it defaults to a -single space " ". - - - (string-join '("foo" "bar" "baz") ":") => "foo:bar:baz" - (string-join '("foo" "bar" "baz") ":" 'suffix) => "foo:bar:baz:" - - ;; Infix grammar is ambiguous wrt empty list vs. empty string, - (string-join '() ":") => "" - (string-join '("") ":") => "" - - ;; but suffix & prefix grammars are not. - (string-join '() ":" 'suffix) => "" - (string-join '("") ":" 'suffix) => ":" - - - -==== Selection - -(string-copy s [start end]) -> string
-(substring/shared s start [end]) -> string
- -[R5RS+] {{substring/shared}} returns a string whose contents are the -characters of S beginning with index START (inclusive) and ending with -index END (exclusive). It differs from the R5RS {{substring}} in two ways: - - -* The END parameter is optional, not required. -* {{substring/shared}} may return a value that shares memory with S or is {{eq?}} to S. - -{{string-copy}} is extended from its R5RS definition by the addition of its -optional START/END parameters. In contrast to {{substring/shared}}, it is -guaranteed to produce a freshly-allocated string. - -Use {{string-copy}} when you want to indicate explicitly in your code that -you wish to allocate new storage; use {{substring/shared}} when you don't -care if you get a fresh copy or share storage with the original string. - - - (string-copy "Beta substitution") => "Beta substitution" - (string-copy "Beta substitution" 1 10) - => "eta subst" - (string-copy "Beta substitution" 5) => "substitution" - -(string-copy! target tstart s [start end]) -> unspecified
- -Copy the sequence of characters from index range [START,END) in string -S to string TARGET, beginning at index TSTART. The characters are copied -left-to-right or right-to-left as needed -- the copy is guaranteed to work, -even if TARGET and S are the same string. - -It is an error if the copy operation runs off the end of the target string, -''e.g.'' - - - (string-copy! (string-copy "Microsoft") 0 - "Regional Microsoft Operating Companies") => ''error'' - -(string-take s nchars) -> string
-(string-drop s nchars) -> string
-(string-take-right s nchars) -> string
-(string-drop-right s nchars) -> string
- -{{string-take}} returns the first NCHARS of S; {{string-drop}} returns all -but the first NCHARS of S. {{string-take-right}} returns the last NCHARS -of S; {{string-drop-right}} returns all but the last NCHARS of S. If these -procedures produce the entire string, they may return either S or a copy of -S; in some implementations, proper substrings may share memory with S. - - - (string-take "Pete Szilagyi" 6) => "Pete S" - (string-drop "Pete Szilagyi" 6) => "zilagyi" - - (string-take-right "Beta rules" 5) => "rules" - (string-drop-right "Beta rules" 5) => "Beta " - -It is an error to take or drop more characters than are in the string: - - - (string-take "foo" 37) => ''error'' - - -(string-pad s len [char start end]) -> string
-(string-pad-right s len [char start end]) -> string
- -Build a string of length LEN comprised of S padded on the left (right) by -as many occurrences of the character CHAR as needed. If S has more than LEN -chars, it is truncated on the left (right) to length LEN. CHAR defaults to -#\space. - -If LEN <= END-START, the returned value is allowed to share storage with S, -or be exactly S (if LEN = END-START). - - - (string-pad "325" 5) => " 325" - (string-pad "71325" 5) => "71325" - (string-pad "8871325" 5) => "71325" - -(string-trim s [char/char-set/pred start end]) -> string
-(string-trim-right s [char/char-set/pred start end]) -> string
-(string-trim-both s [char/char-set/pred start end]) -> string
- -Trim S by skipping over all characters on the left / on the right / on both -sides that satisfy the second parameter CHAR/CHAR-SET/PRED: - - -* if it is a character CHAR, characters equal to CHAR are trimmed; -* if it is a char set CS, characters contained in CS are trimmed; -* if it is a predicate PRED, it is a test predicate that is applied to the characters in S; a character causing it to return true is skipped. - -CHAR/CHAR-SET/PRED defaults to the character set {{char-set:whitespace}} -defined in SRFI 14. - -If no trimming occurs, these functions may return either S or a copy of S; -in some implementations, proper substrings may share memory with S. - - - (string-trim-both " The outlook wasn't brilliant, \n\r") - => "The outlook wasn't brilliant," - - -==== Modification - -(string-fill! s char [start end]) -> unspecified
- -[R5RS+] Stores CHAR in every element of S. - -{{string-fill!}} is extended from the R5RS definition to take optional -START/END arguments. - - -==== Comparison - -(string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2]) -> values
-(string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2]) -> values
- -Apply PROC<, PROC=, or PROC> to the mismatch index, depending upon whether -S1 is less than, equal to, or greater than S2. The "mismatch index" is the -largest index I such that for every 0 <= J < I, S1[J] = S2[J] -- that is, I -is the first position that doesn't match. - -{{string-compare-ci}} is the case-insensitive variant. Case-insensitive -comparison is done by case-folding characters with the operation - - - (char-downcase (char-upcase C)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -The optional start/end indices restrict the comparison to the indicated -substrings of S1 and S2. The mismatch index is always an index into S1; -in the case of PROC=, it is always END1; we observe the protocol in this -redundant case for uniformity. - - - (string-compare "The cat in the hat" "abcdefgh" - values values values - 4 6 ; Select "ca" - 2 4) ; & "cd" - => 5 ; Index of S1's "a" - -Comparison is simply done on individual code-points of the string. True -text collation is not handled by this SRFI. - -(string= s1 s2 [start1 end1 start2 end2]) -> boolean
-(string<> s1 s2 [start1 end1 start2 end2]) -> boolean
-(string< s1 s2 [start1 end1 start2 end2]) -> boolean
-(string> s1 s2 [start1 end1 start2 end2]) -> boolean
-(string<= s1 s2 [start1 end1 start2 end2]) -> boolean
-(string>= s1 s2 [start1 end1 start2 end2]) -> boolean
- -These procedures are the lexicographic extensions to strings of the -corresponding orderings on characters. For example, {{string<}} is the -lexicographic ordering on strings induced by the ordering {{char(string-ci= s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-ci<> s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-ci< s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-ci> s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-ci<= s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-ci>= s1 s2 [start1 end1 start2 end2]) -> boolean
- -Case-insensitive variants. - -Case-insensitive comparison is done by case-folding characters with the -operation - - - (char-downcase (char-upcase C)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -(string-hash s [bound start end]) -> integer
-(string-hash-ci s [bound start end]) -> integer
- -Compute a hash value for the string S. BOUND is a non-negative exact -integer specifying the range of the hash function. A positive value -restricts the return value to the range [0,BOUND). - -If BOUND is either zero or not given, the implementation may use an -implementation-specific default value, chosen to be as large as is -efficiently practical. For instance, the default range might be chosen for -a given implementation to map all strings into the range of integers that -can be represented with a single machine word. - -The optional start/end indices restrict the hash operation to the indicated -substring of S. - -{{string-hash-ci}} is the case-insensitive variant. Case-insensitive -comparison is done by case-folding characters with the operation - - - (char-downcase (char-upcase C)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -Invariants: - - - (<= 0 (string-hash s b) (- b 1)) ; When B > 0. - (string= s1 s2) => (= (string-hash s1 b) (string-hash s2 b)) - (string-ci= s1 s2) => (= (string-hash-ci s1 b) (string-hash-ci s2 b)) - -A legal but nonetheless discouraged implementation: - - - (define (string-hash s . other-args) 1) - (define (string-hash-ci s . other-args) 1) - -Rationale: allowing the user to specify an explicit bound simplifies user -code by removing the mod operation that typically accompanies every hash -computation, and also may allow the implementation of the hash function to -exploit a reduced range to efficiently compute the hash value. ''E.g.'', -for small bounds, the hash function may be computed in a fashion such -that intermediate values never overflow into bignum integers, allowing -the implementor to provide a fixnum-specific "fast path" for computing the -common cases very rapidly. - - -==== Prefixes & suffixes - -(string-prefix-length s1 s2 [start1 end1 start2 end2]) -> integer
-(string-suffix-length s1 s2 [start1 end1 start2 end2]) -> integer
-(string-prefix-length-ci s1 s2 [start1 end1 start2 end2]) -> integer
-(string-suffix-length-ci s1 s2 [start1 end1 start2 end2]) -> integer
- -Return the length of the longest common prefix/suffix of the two strings. -For prefixes, this is equivalent to the "mismatch index" for the strings -(modulo the STARTi index offsets). - -The optional start/end indices restrict the comparison to the indicated -substrings of S1 and S2. - -{{string-prefix-length-ci}} and {{string-suffix-length-ci}} are the -case-insensitive variants. Case-insensitive comparison is done by -case-folding characters with the operation - - - (char-downcase (char-upcase c)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -Comparison is simply done on individual code-points of the string. - -(string-prefix? s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-suffix? s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-prefix-ci? s1 s2 [start1 end1 start2 end2]) -> boolean
-(string-suffix-ci? s1 s2 [start1 end1 start2 end2]) -> boolean
- -Is S1 a prefix/suffix of S2? - -The optional start/end indices restrict the comparison to the indicated -substrings of S1 and S2. - -{{string-prefix-ci?}} and {{string-suffix-ci?}} are the case-insensitive -variants. Case-insensitive comparison is done by case-folding characters -with the operation - - - (char-downcase (char-upcase c)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -Comparison is simply done on individual code-points of the string. - - -==== Searching - -(string-index s char/char-set/pred [start end]) -> integer or #f
-(string-index-right s char/char-set/pred [start end]) -> integer or #f
-(string-skip s char/char-set/pred [start end]) -> integer or #f
-(string-skip-right s char/char-set/pred [start end]) -> integer or #f
- -{{string-index}} ({{string-index-right}}) searches through the string -from the left (right), returning the index of the first occurrence of a -character which - - -* equals CHAR/CHAR-SET/PRED (if it is a character); -* is in CHAR/CHAR-SET/PRED (if it is a character set); -* satisfies the predicate CHAR/CHAR-SET/PRED (if it is a procedure). - -If no match is found, the functions return false. - -The START and END parameters specify the beginning and end indices of the -search; the search includes the start index, but not the end index. Be -careful of "fencepost" considerations: when searching right-to-left, the -first index considered is - -END-1 - -whereas when searching left-to-right, the first index considered is - -START - -That is, the start/end indices describe a same half-open interval -[START,END) in these procedures that they do in all the other SRFI 13 -procedures. - -The skip functions are similar, but use the complement of the criteria: -they search for the first char that ''doesn't'' satisfy the test. ''E.g.'', -to skip over initial whitespace, say - - - (cond ((string-skip s char-set:whitespace) => - - (lambda (i) ...)) ; s[i] is not whitespace. - ...) - -(string-count s char/char-set/pred [start end]) -> integer
- -Return a count of the number of characters in S that satisfy the -CHAR/CHAR-SET/PRED argument. If this argument is a procedure, it is applied -to the character as a predicate; if it is a character set, the character -is tested for membership; if it is a character, it is used in an equality -test. - -(string-contains s1 s2 [start1 end1 start2 end2]) -> integer or false
-(string-contains-ci s1 s2 [start1 end1 start2 end2]) -> integer or false
- -Does string S1 contain string S2? - -Return the index in S1 where S2 occurs as a substring, or false. The -optional start/end indices restrict the operation to the indicated -substrings. - -The returned index is in the range [START1,END1). A successful match must -lie entirely in the [START1,END1) range of S1. - - - (string-contains "eek -- what a geek." "ee" - 12 18) ; Searches "a geek" - => 15 - -{{string-contains-ci}} is the case-insensitive variant. Case-insensitive -comparison is done by case-folding characters with the operation - - - (char-downcase (char-upcase C)) - -where the two case-mapping operations are assumed to be 1-1, locale- and -context-insensitive, and compatible with the 1-1 case mappings specified by -Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - -Comparison is simply done on individual code-points of the string. - -The names of these procedures do not end with a question mark -- this is -to indicate that they do not return a simple boolean ({{#t}} or {{#f}}). -Rather, they return either false ({{#f}}) or an exact non-negative integer. - - -==== Alphabetic case mapping - -(string-titlecase s [start end]) -> string
-(string-titlecase! s [start end]) -> unspecified
- -For every character C in the selected range of S, if C is preceded by a -cased character, it is downcased; otherwise it is titlecased. - -{{string-titlecase}} returns the result string and does not alter its S -parameter. {{string-titlecase!}} is the in-place side-effecting variant. - - - (string-titlecase "--capitalize tHIS sentence.") => - "--Capitalize This Sentence." - - (string-titlecase "see Spot run. see Nix run.") => - "See Spot Run. See Nix Run." - - (string-titlecase "3com makes routers.") => - "3Com Makes Routers." - -Note that if a START index is specified, then the character preceding -S[START] has no effect on the titlecase decision for character S[START]: - - - (string-titlecase "greasy fried chicken" 2) => "Easy Fried Chicken" - -Titlecase and cased information must be compatible with the Unicode -specification. - -(string-upcase s [start end]) -> string
-(string-upcase! s [start end]) -> unspecified
-(string-downcase s [start end]) -> string
-(string-downcase! s [start end]) -> unspecified
- -Raise or lower the case of the alphabetic characters in the string. - -{{string-upcase}} and {{string-downcase}} return the result string and do -not alter their S parameter. {{string-upcase!}} and {{string-downcase!}} -are the in-place side-effecting variants. - -These procedures use the locale- and context-insensitive 1-1 case mappings -defined by Unicode's UnicodeData.txt table: - -[[ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt]] - - -==== Reverse & append - -(string-reverse s [start end]) -> string
-(string-reverse! s [start end]) -> unspecified
- -Reverse the string. - -{{string-reverse}} returns the result string and does not alter its S -parameter. {{string-reverse!}} is the in-place side-effecting variant. - - - (string-reverse "Able was I ere I saw elba.") - => ".able was I ere I saw elbA" - - ;;; In-place rotate-left, the Bell Labs way: - (lambda (s i) - (let ((i (modulo i (string-length s)))) - (string-reverse! s 0 i) - (string-reverse! s i) - (string-reverse! s))) - -Unicode note: Reversing a string simply reverses the sequence of -code-points it contains. So a zero-width accent character A coming -''after'' a base character B in string S would come out ''before'' B in the -reversed result. - -(string-concatenate string-list) -> string
- -Append the elements of {{string-list}} together into a single string. -Guaranteed to return a freshly allocated string. - -Note that the {{(apply string-append STRING-LIST)}} idiom is not robust for -long lists of strings, as some Scheme implementations limit the number of -arguments that may be passed to an n-ary procedure. - -(string-concatenate/shared string-list) -> string
-(string-append/shared s_1 ...) -> string
- -These two procedures are variants of {{string-concatenate}} and -{{string-append}} that are permitted to return results that share storage -with their parameters. In particular, if {{string-append/shared}} is -applied to just one argument, it may return exactly that argument, whereas -{{string-append}} is required to allocate a fresh string. - -(string-concatenate-reverse string-list [final-string end]) -> string
-(string-concatenate-reverse/shared string-list [final-string end]) -> string
- -With no optional arguments, these functions are equivalent to - - - (string-concatenate (reverse STRING-LIST)) - -and - - - (string-concatenate/shared (reverse STRING-LIST)) - -respectively. - -If the optional argument FINAL-STRING is specified, it is consed onto -the beginning of STRING-LIST before performing the list-reverse and -string-concatenate operations. - -If the optional argument END is given, only the first END characters of -FINAL-STRING are added to the string list, thus producing - - - (string-concatenate - (reverse (cons (substring/shared FINAL-STRING 0 END) - STRING-LIST))) - - -''E.g.'' - - - (string-concatenate-reverse '(" must be" "Hello, I") " going.XXXX" 7) - => "Hello, I must be going." - -This procedure is useful in the construction of procedures that accumulate -character data into lists of string buffers, and wish to convert the -accumulated data into a single string when done. - -Unicode note: Reversing a string simply reverses the sequence of -code-points it contains. So a zero-width accent character AC coming -''after'' a base character BC in string S would come out ''before'' BC in -the reversed result. - - -==== Fold, unfold & map - -(string-map proc s [start end]) -> string
-(string-map! proc s [start end]) -> unspecified
- -PROC is a char->char procedure; it is mapped over S. - -{{string-map}} returns the result string and does not alter its S -parameter. {{string-map!}} is the in-place side-effecting variant. - -Note: The order in which PROC is applied to the elements of S is not -specified. - -(string-fold kons knil s [start end]) -> value
-(string-fold-right kons knil s [start end]) -> value
- -These are the fundamental iterators for strings. - -The left-fold operator maps the KONS procedure across the string from left -to right - - - (... (KONS S[2] (KONS S[1] (KONS S[0] KNIL)))) - - -In other words, {{string-fold}} obeys the (tail) recursion - - - (string-fold KONS KNIL S START END) = - (string-fold KONS (KONS S[START] KNIL) START+1 END) - - -The right-fold operator maps the KONS procedure across the string from -right to left - - - (KONS S[0] (... (KONS S[END-3] (KONS S[END-2] (KONS S[END-1] KNIL))))) - - -obeying the (tail) recursion - - - (string-fold-right KONS KNIL S START END) = - (string-fold-right KONS (KONS S[END-1] KNIL) START END-1) - - -Examples: - - - ;;; Convert a string to a list of chars. - (string-fold-right cons '() s) - - ;;; Count the number of lower-case characters in a string. - (string-fold (lambda (c count) - (if (char-lower-case? c) - (+ count 1) - count)) - 0 - s) - - ;;; Double every backslash character in S. - (let* ((ans-len (string-fold (lambda (c sum) - (+ sum (if (char=? c #\\) 2 1))) - 0 s)) - (ans (make-string ans-len))) - (string-fold (lambda (c i) - (let ((i (if (char=? c #\\) - (begin (string-set! ans i #\\) (+ i 1)) - i))) - (string-set! ans i c) - (+ i 1))) - 0 s) - ans) - -The right-fold combinator is sometimes called a "catamorphism." - -(string-unfold p f g seed [base make-final]) -> string
- -This is a fundamental constructor for strings. - - -* G is used to generate a series of "seed" values from the initial seed: SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... -* P tells us when to stop -- when it returns true when applied to one of these seed values. -* F maps each seed value to the corresponding character in the result string. These chars are assembled into the string in a left-to-right order. -* BASE is the optional initial/leftmost portion of the constructed string; it defaults to the empty string "". -* MAKE-FINAL is applied to the terminal seed value (on which P returns true) to produce the final/rightmost portion of the constructed string. It defaults to {{(lambda (x) "")}}. - -More precisely, the following (simple, inefficient) definitions hold: - - - ;;; Iterative - (define (string-unfold p f g seed base make-final) - (let lp ((seed seed) (ans base)) - (if (p seed) - (string-append ans (make-final seed)) - (lp (g seed) (string-append ans (string (f seed))))))) - - ;;; Recursive - (define (string-unfold p f g seed base make-final) - (string-append base - (let recur ((seed seed)) - (if (p seed) (make-final seed) - (string-append (string (f seed)) - (recur (g seed))))))) - -{{string-unfold}} is a fairly powerful string constructor -- you can use it -to convert a list to a string, read a port into a string, reverse a string, -copy a string, and so forth. Examples: - - - (port->string p) = (string-unfold eof-object? values - (lambda (x) (read-char p)) - (read-char p)) - - (list->string lis) = (string-unfold null? car cdr lis) - - (string-tabulate f size) = (string-unfold (lambda (i) (= i size)) f add1 0) - -To map F over a list LIS, producing a string: - - - (string-unfold null? (compose f car) cdr lis) - -Interested functional programmers may enjoy noting that -{{string-fold-right}} and {{string-unfold}} are in some sense inverses. -That is, given operations KNULL?, KAR, KDR, KONS, and KNIL satisfying - - - (KONS (KAR x) (KDR x)) = x and (KNULL? KNIL) = #t - -then - - - (string-fold-right KONS KNIL (string-unfold KNULL? KAR KDR X)) = X - - -and - - - (string-unfold KNULL? KAR KDR (string-fold-right KONS KNIL S)) = S. - - -The final string constructed does not share storage with either BASE or the -value produced by MAKE-FINAL. - -This combinator sometimes is called an "anamorphism." - -Note: implementations should take care that runtime stack limits do not -cause overflow when constructing large (''e.g.'', megabyte) strings with -{{string-unfold}}. - -(string-unfold-right p f g seed [base make-final]) -> string
- -This is a fundamental constructor for strings. - - -* G is used to generate a series of "seed" values from the initial seed: SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... -* P tells us when to stop -- when it returns true when applied to one of these seed values. -* F maps each seed value to the corresponding character in the result string. These chars are assembled into the string in a right-to-left order. -* BASE is the optional initial/rightmost portion of the constructed string; it defaults to the empty string "". -* MAKE-FINAL is applied to the terminal seed value (on which P returns true) to produce the final/leftmost portion of the constructed string. It defaults to {{(lambda (x) "")}}. - -More precisely, the following (simple, inefficient) definitions hold: - - - ;;; Iterative - (define (string-unfold-right p f g seed base make-final) - (let lp ((seed seed) (ans base)) - (if (p seed) - (string-append (make-final seed) ans) - (lp (g seed) (string-append (string (f seed)) ans))))) - - ;;; Recursive - (define (string-unfold-right p f g seed base make-final) - (string-append (let recur ((seed seed)) - (if (p seed) (make-final seed) - (string-append (recur (g seed)) - (string (f seed))))) - base)) - -Interested functional programmers may enjoy noting that {{string-fold}} -and {{string-unfold-right}} are in some sense inverses. That is, given -operations KNULL?, KAR, KDR, KONS, and KNIL satisfying - -{{(KONS (KAR X) (KDR X))}} = X and {{(KNULL? KNIL)}} = #t - -then - - - (string-fold KONS KNIL (string-unfold-right KNULL? KAR KDR X)) = X - - -and - - - (string-unfold-right KNULL? KAR KDR (string-fold KONS KNIL S)) = S. - - -The final string constructed does not share storage with either BASE or the -value produced by MAKE-FINAL. - -Note: implementations should take care that runtime stack limits do not -cause overflow when constructing large (''e.g.'', megabyte) strings with -{{string-unfold-right.}} - -(string-for-each proc s [start end]) -> unspecified
- -Apply PROC to each character in S. {{string-for-each}} is required to -iterate from START to END in increasing order. - -(string-for-each-index proc s [start end]) -> unspecified
- -Apply PROC to each index of S, in order. The optional START/END pairs -restrict the endpoints of the loop. This is simply a method of looping over -a string that is guaranteed to be safe and correct. Example: - - - (let* ((len (string-length s)) - (ans (make-string len))) - (string-for-each-index - (lambda (i) (string-set! ans (- len i) (string-ref s i))) - s) - ans) - - -==== Replicate & rotate - -(xsubstring s from [to start end]) -> string
- -This is the "extended substring" procedure that implements replicated -copying of a substring of some string. - -S is a string; START and END are optional arguments that demarcate a -substring of S, defaulting to 0 and the length of S (''i.e.'', the whole -string). Replicate this substring up and down index space, in both the -positive and negative directions. For example, if S = "abcdefg", START=3, -and END=6, then we have the conceptual bidirectionally-infinite string - - - - -
...defdefdefdefdefdefd...
...-9-8-7-6-5-4-3-2-10+1+2+3+4+5+6+7+8+9...
- -{{xsubstring}} returns the substring of this string beginning at index -FROM, and ending at TO (which defaults to FROM+(END-START)). - -You can use {{xsubstring}} to perform a variety of tasks: - - -* To rotate a string left: {{(xsubstring "abcdef" 2)}} => {{"cdefab"}} -* To rotate a string right: {{(xsubstring "abcdef" -2)}} => {{"efabcd"}} -* To replicate a string: {{(xsubstring "abc" 0 7)}} => {{"abcabca"}} - -Note that - - -* The FROM/TO indices give a half-open range -- the characters from index FROM up to, but not including, index TO. -* The FROM/TO indices are not in terms of the index space for string S. They are in terms of the replicated index space of the substring defined by S, START, and END. - -It is an error if START=END -- although this is allowed by special -dispensation when FROM=TO. - -(string-xcopy! target tstart s sfrom [sto start end]) -> unspecified
- -Exactly the same as {{xsubstring,}} but the extracted text is written into -the string TARGET starting at index TSTART. This operation is not defined -if {{(eq? TARGET S)}} or these two arguments share storage -- you cannot -copy a string on top of itself. - - -==== Miscellaneous: insertion, parsing - -(string-replace s1 s2 start1 end1 [start2 end2]) -> string
- -Returns - - - (string-append (substring/shared S1 0 START1) - (substring/shared S2 START2 END2) - (substring/shared S1 END1 (string-length S1))) - - -That is, the segment of characters in S1 from START1 to END1 is replaced by -the segment of characters in S2 from START2 to END2. If START1=END1, this -simply splices the S2 characters into S1 at the specified index. - -Examples: - - - (string-replace "The TCL programmer endured daily ridicule." - "another miserable perl drone" 4 7 8 22 ) => - "The miserable perl programmer endured daily ridicule." - - (string-replace "It's easy to code it up in Scheme." "lots of fun" 5 9) => - "It's lots of fun to code it up in Scheme." - - (define (string-insert s i t) (string-replace s t i i)) - - (string-insert "It's easy to code it up in Scheme." 5 "really ") => - "It's really easy to code it up in Scheme." - -(string-tokenize s [token-set start end]) -> list
- -Split the string S into a list of substrings, where each substring is a -maximal non-empty contiguous sequence of characters from the character set -TOKEN-SET. - - -* TOKEN-SET defaults to {{char-set:graphic}} (see SRFI 14 for more on character sets and {{char-set:graphic}}). -* If START or END indices are provided, they restrict {{string-tokenize}} to operating on the indicated substring of S. - -This function provides a minimal parsing facility for simple applications. -More sophisticated parsers that handle quoting and backslash effects can -easily be constructed using regular-expression systems; be careful not to -use {{string-tokenize}} in contexts where more serious parsing is needed. - - - (string-tokenize "Help make programs run, run, RUN!") => - ("Help" "make" "programs" "run," "run," "RUN!") - - -==== Filtering & deleting - -(string-filter char/char-set/pred s [start end]) -> string
-(string-delete char/char-set/pred s [start end]) -> string
- -Filter the string S, retaining only those characters that satisfy / do not -satisfy the CHAR/CHAR-SET/PRED argument. If this argument is a procedure, -it is applied to the character as a predicate; if it is a char-set, the -character is tested for membership; if it is a character, it is used in an -equality test. - -If the string is unaltered by the filtering operation, these functions may -return either S or a copy of S. - - -=== Low-level procedures - -The following procedures are useful for writing other string-processing -functions. In a Scheme system that has a module or package system, these -procedures should be contained in a module named "string-lib-internals". - - -==== Start/end optional-argument parsing & checking utilities - -(string-parse-start+end proc s args) -> [rest start end]
-(string-parse-final-start+end proc s args) -> [start end]
- -{{string-parse-start+end}} may be used to parse a pair of optional -START/END arguments from an argument list, defaulting them to 0 and the -length of some string S, respectively. Let the length of string S be SLEN. - - -* If ARGS = (), the function returns {{(values '() 0 SLEN)}} -* If ARGS = (I), I is checked to ensure it is an exact integer, and that 0 <= i <= SLEN. Returns {{(values (cdr ARGS) I SLEN)}}. -* If ARGS = {{(I J ...)}}, I and J are checked to ensure they are exact integers, and that 0 <= I <= J <= SLEN. Returns {{(values (cddr ARGS) I J)}}. - -If any of the checks fail, an error condition is raised, and PROC is used -as part of the error condition -- it should be the client procedure whose -argument list {{string-parse-start+end}} is parsing. - -{{string-parse-final-start+end}} is exactly the same, except that the ARGS -list passed to it is required to be of length two or less; if it is longer, -an error condition is raised. It may be used when the optional START/END -parameters are final arguments to the procedure. - -Note that in all cases, these functions ensure that S is a string (by -necessity, since all cases apply {{string-length}} to S either to default -END or to bounds-check it). - -(let-string-start+end (start end [rest]) proc-exp s-exp args-exp body ...) -> value(s)
- -[Syntax] Syntactic sugar for an application of {{string-parse-start+end}} -or {{string-parse-final-start+end.}} - -If a REST variable is given, the form is equivalent to - - - (call-with-values - (lambda () (string-parse-start+end PROC-EXP S-EXP ARGS-EXP)) - (lambda (REST START END) BODY ...)) - - -If no REST variable is given, the form is equivalent to - - - (call-with-values - (lambda () (string-parse-final-start+end PROC-EXP S-EXP ARGS-EXP)) - (lambda (START END) BODY ...)) - - -(check-substring-spec proc s start end) -> unspecified
-(substring-spec-ok? s start end) -> boolean
- -Check values S, START and END to ensure they specify a valid substring. -This means that S is a string, START and END are exact integers, and 0 <= -START <= END <= {{(string-length S)}} - -If the values are not proper - - -* {{check-substring-spec}} raises an error condition. PROC is used as part of the error condition, and should be the procedure whose parameters we are checking. -* {{substring-spec-ok?}} returns false. - -Otherwise, {{substring-spec-ok?}} returns true, and -{{check-substring-spec}} simply returns (what it returns is not specified). - - -==== Knuth-Morris-Pratt searching - -The Knuth-Morris-Pratt string-search algorithm is a method of rapidly -scanning a sequence of text for the occurrence of some fixed string. It has -the advantage of never requiring backtracking -- hence, it is useful for -searching not just strings, but also other sequences of text that do not -support backtracking or random-access, such as input ports. These routines -package up the initialisation and searching phases of the algorithm for -general use. They also support searching through sequences of text that -arrive in buffered chunks, in that intermediate search state can be -carried across applications of the search loop from the end of one buffer -application to the next. - -A second critical property of KMP search is that it requires the allocation -of auxiliary memory proportional to the length of the pattern, but -''constant'' in the size of the character type. Alternate searching -algorithms frequently require the construction of a table with an entry for -every possible character -- which can be prohibitively expensive in a 16- -or 32-bit character representation. - -(make-kmp-restart-vector s [c= start end]) -> integer-vector
- -Build a Knuth-Morris-Pratt "restart vector," which is useful for quickly -searching character sequences for the occurrence of string S (or the -substring of S demarcated by the optional START/END parameters, if -provided). C= is a character-equality function used to construct the -restart vector. It defaults to {{char=?}}; use {{char-ci=?}} instead for -case-folded string search. - -The definition of the restart vector RV for string S is: If we have matched -chars 0..I-1 of S against some search string SS, and S[I] doesn't match -SS[K], then reset I := RV[I], and try again to match SS[K]. If RV[I] = -1, -then punt SS[K] completely, and move on to SS[K+1] and S[0]. - -In other words, if you have matched the first I chars of S, but the I+1'th -char doesn't match, RV[I] tells you what the next-longest prefix of S is -that you have matched. - -The following string-search function shows how a restart vector is used to -search. Note the attractive feature of the search process: it is "on line," -that is, it never needs to back up and reconsider previously seen data. It -simply consumes characters one-at-a-time until declaring a complete match -or reaching the end of the sequence. Thus, it can be easily adapted to -search other character sequences (such as ports) that do not provide random -access to their contents. - - - (define (find-substring pattern source start end) - (let ((plen (string-length pattern)) - (rv (make-kmp-restart-vector pattern))) - - ;; The search loop. SJ & PJ are redundant state. - (let lp ((si start) (pi 0) - (sj (- end start)) ; (- end si) -- how many chars left. - (pj plen)) ; (- plen pi) -- how many chars left. - - (if (= pi plen) (- si plen) ; Win. - - (and (<= pj sj) ; Lose. - - (if (char=? (string-ref source si) ; Test. - (string-ref pattern pi)) - (lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance. - - (let ((pi (vector-ref rv pi))) ; Retreat. - (if (= pi -1) - (lp (+ si 1) 0 (- sj 1) plen) ; Punt. - (lp si pi sj (- plen pi)))))))))) - -The optional START/END parameters restrict the restart vector to the -indicated substring of PAT; RV is END - START elements long. If START > -0, then RV is offset by START elements from PAT. That is, RV[I] describes -pattern element PAT[I + START]. Elements of RV are themselves indices that -range just over [0, END-START), ''not'' [START, END). - -Rationale: the actual value of RV is "position independent" -- it does -not depend on where in the PAT string the pattern occurs, but only on the -actual characters comprising the pattern. - -(kmp-step pat rv c i c= p-start) -> integer
- -This function encapsulates the work performed by one step of the KMP string -search; it can be used to scan strings, input ports, or other on-line -character sources for fixed strings. - -PAT is the non-empty string specifying the text for which we are searching. -RV is the Knuth-Morris-Pratt restart vector for the pattern, as constructed -by {{make-kmp-restart-vector.}} The pattern begins at PAT[P-START], and -is {{(string-length RV)}} characters long. C= is the character-equality -function used to construct the restart vector, typically {{char=?}} or -{{char-ci=?}}. - -Suppose the pattern is N characters in length: PAT[P-START, P-START + -N). We have already matched I characters: PAT[P-START, P-START + I). -(P-START is typically zero.) C is the next character in the input stream. -{{kmp-step}} returns the new I value -- that is, how much of the pattern -we have matched, ''including'' character C. When I reaches N, the entire -pattern has been matched. - -Thus a typical search loop looks like this: - - - (let lp ((i 0)) - (or (= i n) ; Win -- #t - (and (not (end-of-stream)) ; Lose -- #f - (lp (kmp-step pat rv (get-next-character) i char=? 0))))) - -Example: - - - ;; Read chars from IPORT until we find string PAT or hit EOF. - (define (port-skip pat iport) - (let* ((rv (make-kmp-restart-vector pat)) - (patlen (string-length pat))) - (let lp ((i 0) (nchars 0)) - (if (= i patlen) nchars ; Win -- nchars skipped - (let ((c (read-char iport))) - (if (eof-object? c) c ; Fail -- EOF - (lp (kmp-step pat rv c i char=? 0) ; Continue - (+ nchars 1)))))))) - -This procedure could be defined as follows: - - - (define (kmp-step pat rv c i c= p-start) - (let lp ((i i)) - (if (c= c (string-ref pat (+ i p-start))) ; Match => - (+ i 1) ; Done. - (let ((i (vector-ref rv i))) ; Back up in PAT. - (if (= i -1) 0 ; Can't back up more. - (lp i))))))) ; Keep going. - -Rationale: this procedure takes no optional arguments because it is -intended as an inner-loop primitive and we do not want any run-time penalty -for optional-argument parsing and defaulting, nor do we wish barriers to -procedure integration/inlining. - -(string-kmp-partial-search pat rv s i [c= p-start s-start s-end]) -> integer
- -Applies {{kmp-step}} across S; optional S-START/S-END bounds parameters -restrict search to a substring of S. The pattern is {{(vector-length RV)}} -characters long; optional P-START index indicates non-zero start of pattern -in PAT. - -Suppose PLEN = {{(vector-length RV)}} is the length of the pattern. I is an -integer index into the pattern (that is, 0 <= I < PLEN) indicating how much -of the pattern has already been matched. (This means the pattern must be -non-empty -- PLEN > 0.) - - -* On success, returns -J, where J is the index in S bounding the ''end'' of the pattern -- ''e.g.'', a value that could be used as the END parameter in a call to {{substring/shared}}. -* On continue, returns the current search state I' (an index into RV) when the search reached the end of the string. This is a non-negative integer. - -Hence: - - -* A negative return value indicates success, and says where in the string the match occurred. -* A non-negative return value provides the I to use for continued search in a following string. - -This utility is designed to allow searching for occurrences of a fixed -string that might extend across multiple buffers of text. This is why, -for example, we do not provide the index of the ''start'' of the match on -success -- it may have occurred in a previous buffer. - -To search a character sequence that arrives in "chunks," write a loop of -this form: - - - (let lp ((i 0)) - (and (not (end-of-data?)) ; Lose -- return #f. - (let* ((buf (get-next-chunk)) ; Get or fill up the buffer. - (i (string-kmp-partial-search pat rv buf i))) - (if (< i 0) (- i) ; Win -- return end index. - (lp i))))) ; Keep looking. - -Modulo start/end optional-argument parsing, this procedure could be defined -as follows: - - - (define (string-kmp-partial-search pat rv s i c= p-start s-start s-end) - (let ((patlen (vector-length rv))) - (let lp ((si s-start) ; An index into S. - (vi i)) ; An index into RV. - (cond ((= vi patlen) (- si)) ; Win. - ((= si end) vi) ; Ran off the end. - (else (lp (+ si 1) ; Match s[si] & loop. - (kmp-step pat rv (string-ref s si) - vi c= p-start))))))) - ----- - -Previous: [[Unit srfi-4]] - -Next: [[Unit srfi-14]] diff --git a/manual/Unit srfi-14 b/manual/Unit srfi-14 index 14d0763..f2d2995 100644 --- a/manual/Unit srfi-14 +++ b/manual/Unit srfi-14 @@ -935,6 +935,6 @@ The ASCII blank characters are the first two characters above -- horizontal tab and space. Latin-1 adds the no-break space. --- -Previous: [[Unit srfi-13]] +Previous: [[Unit srfi-4]] Next: [[Unit srfi-18]] diff --git a/manual/Unit srfi-4 b/manual/Unit srfi-4 index f2c263b..dfb37c4 100644 --- a/manual/Unit srfi-4 +++ b/manual/Unit srfi-4 @@ -323,4 +323,4 @@ undefined. --- Previous: [[Unit srfi-1]] -Next: [[Unit srfi-13]] +Next: [[Unit srfi-14]] diff --git a/manual/Unit utils b/manual/Unit utils index 8c1df37..ea4f730 100644 --- a/manual/Unit utils +++ b/manual/Unit utils @@ -8,7 +8,7 @@ This unit contains a "grab bag" of procedures without a good home, and which don't have to be available by default (as compared to the [[Unit extras|extras]] unit). -This unit uses the {{extras}} and {{srfi-13}} units. +This unit uses the {{extras}} unit. === Executing shell commands with formatstring and error checking diff --git a/rules.make b/rules.make index 19e620f..5936e4f 100644 --- a/rules.make +++ b/rules.make @@ -36,7 +36,7 @@ VPATH=$(SRCDIR) SETUP_API_OBJECTS_1 = setup-api setup-download LIBCHICKEN_SCHEME_OBJECTS_1 = \ - library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 \ + library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 \ srfi-14 srfi-18 srfi-69 $(POSIXFILE) irregex scheduler \ profiler stub expand modules chicken-syntax chicken-ffi-syntax build-version LIBCHICKEN_OBJECTS_1 = $(LIBCHICKEN_SCHEME_OBJECTS_1) runtime @@ -555,8 +555,6 @@ srfi-1.c: $(SRCDIR)srfi-1.scm $(SRCDIR)common-declarations.scm $(bootstrap-lib) srfi-4.c: $(SRCDIR)srfi-4.scm $(SRCDIR)common-declarations.scm $(bootstrap-lib) -srfi-13.c: $(SRCDIR)srfi-13.scm $(SRCDIR)common-declarations.scm - $(bootstrap-lib) srfi-14.c: $(SRCDIR)srfi-14.scm $(SRCDIR)common-declarations.scm $(bootstrap-lib) srfi-18.c: $(SRCDIR)srfi-18.scm $(SRCDIR)common-declarations.scm diff --git a/scripts/compile-all b/scripts/compile-all index c8aa16e..2be1b48 100755 --- a/scripts/compile-all +++ b/scripts/compile-all @@ -12,7 +12,7 @@ library_options="-optimize-level 2 -include-path . -include-path ./ -inline -ign compiler="$1" shift -for x in library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-13 srfi-14 srfi-18 srfi-69 posixunix posixwin irregex scheduler profiler stub expand modules chicken-syntax chicken-ffi-syntax build-version; do +for x in library eval data-structures ports files extras lolevel utils tcp srfi-1 srfi-4 srfi-14 srfi-18 srfi-69 posixunix posixwin irregex scheduler profiler stub expand modules chicken-syntax chicken-ffi-syntax build-version; do $compiler $x.scm $library_options -output-file /tmp/xxx.c "$@" done diff --git a/scripts/mini-salmonella.scm b/scripts/mini-salmonella.scm index ff0ef8b..6257a7e 100644 --- a/scripts/mini-salmonella.scm +++ b/scripts/mini-salmonella.scm @@ -4,7 +4,7 @@ (module mini-salmonella () (import scheme chicken) -(use posix files extras data-structures srfi-1 setup-api srfi-13 utils) +(use posix files extras data-structures srfi-1 setup-api utils) (define (usage code) (print "usage: mini-salmonella [-h] [-test] [-debug] [-download] [-trunk] EGGDIR [PREFIX]") diff --git a/setup-api.scm b/setup-api.scm index 1532a0f..cefa31b 100644 --- a/setup-api.scm +++ b/setup-api.scm @@ -24,7 +24,7 @@ ; POSSIBILITY OF SUCH DAMAGE. -(require-library srfi-1 irregex utils posix srfi-13 extras ports data-structures files) +(require-library srfi-1 irregex utils posix extras ports data-structures files) ; This code is partially quite messy and the API is not overly consistent, ; mainly because it has grown "organically" while the old chicken-setup program @@ -67,7 +67,7 @@ (import scheme chicken foreign irregex utils posix ports extras data-structures - srfi-1 srfi-13 files) + srfi-1 files) ;;; Constants, variables and parameters @@ -233,6 +233,11 @@ (and-let* ((tp (runtime-prefix))) (make-pathname tp fname))) +;; Simpler replacement for SRFI-13's string-prefix? +(define (string-prefix? prefix s) + (let ((pos (substring-index prefix s))) + (and pos (zero? pos)))) + (define (fixpath prg) (cond ((string=? prg "csc") (string-intersperse @@ -589,7 +594,7 @@ (define (extension-version #!optional defver) (let ([ver (cadr (extension-name-and-version))]) - (if (string-null? ver) + (if (equal? ver "") (and defver (->string defver)) ver ) ) ) diff --git a/setup-download.scm b/setup-download.scm index 1e70990..66145ac 100644 --- a/setup-download.scm +++ b/setup-download.scm @@ -24,8 +24,8 @@ ; POSSIBILITY OF SUCH DAMAGE. -(require-library extras irregex posix utils setup-api srfi-1 data-structures tcp srfi-13 - files) +(require-library extras irregex posix utils setup-api srfi-1 data-structures tcp + srfi-14 files) (module setup-download (retrieve-extension @@ -38,7 +38,7 @@ temporary-directory) (import scheme chicken foreign) - (import extras irregex posix utils srfi-1 data-structures tcp srfi-13 srfi-14 files + (import extras irregex posix utils srfi-1 data-structures tcp srfi-14 files setup-api) (define-constant +default-tcp-connect-timeout+ 30000) ; 30 seconds @@ -82,15 +82,24 @@ (when version (warning "extension has no such version - using default" egg version)) ) (define (list-eggs/local dir) - (string-concatenate (map (cut string-append <> "\n") (directory dir))) ) + (string-intersperse (map (cut string-append <> "\n") (directory dir)) "") ) (define (list-egg-versions/local name dir) (let ((eggdir (make-pathname dir (string-append name "/tags")))) (cond ((directory-exists? eggdir) - (string-concatenate - (map (cut string-append <> "\n") (directory eggdir)))) + (string-intersperse + (map (cut string-append <> "\n") (directory eggdir)) + "")) (else "unknown\n")))) + ;; Simpler replacement for SRFI-13's string-suffix? + (define (string-suffix? suffix s) + (let ((len-s (string-length s)) + (len-suffix (string-length suffix))) + (and (not (< len-s len-suffix)) + (string=? suffix + (substring s (fx- len-s len-suffix)))))) + (define (locate-egg/local egg dir #!optional version destination clean) (let* ((eggdir (make-pathname dir egg)) (tagdir (make-pathname eggdir "tags")) @@ -162,9 +171,10 @@ [parg (if password (string-append "--password='" password "'") "")]) (let ([cmd (make-svn-ls-cmd uarg parg repo)]) (d "listing extension directory ...~% ~a~%" cmd) - (string-concatenate + (string-intersperse (map (lambda (s) (string-append (string-chomp s "/") "\n")) - (with-input-from-pipe cmd read-lines))) ) ) ) + (with-input-from-pipe cmd read-lines)) + "")))) (define (list-egg-versions/svn name repo #!optional username password) (let* ((uarg (if username (string-append "--username='" username "'") "")) @@ -173,9 +183,10 @@ (input (with-input-from-pipe cmd read-lines))) (if (null? input) "unknown\n" - (string-concatenate + (string-intersperse (map (lambda (s) (string-append (string-chomp s "/") "\n")) - (with-input-from-pipe cmd read-lines))) ) )) + (with-input-from-pipe cmd read-lines)) + "")))) (define (locate-egg/svn egg repo #!optional version destination username password) (let* ([uarg (if username (string-append "--username='" username "'") "")] @@ -326,7 +337,7 @@ (network-failure "invalid response from server" h1))) (let loop () (let ([ln (read-line in)]) - (unless (string-null? ln) + (unless (equal? ln "") (when (match-chunked-transfer-encoding ln) (set! chunked #t)) (d "~a~%" ln) (loop) ) ) ) ) @@ -337,6 +348,14 @@ (set! in (open-input-string data))) ) ) (values in out))) + ;; Simpler replacement for SRFI-13's string-every + (define (string-every criteria s) + (let ((end (string-length s))) + (let lp ((i 0)) + (or (fx>= i end) + (and (char-set-contains? criteria (string-ref s i)) + (lp (fx+ i 1))))))) + (define (http-retrieve-files in out dest) (d "reading files ...~%") (let ((version #f)) @@ -415,18 +434,20 @@ (error "invalid response from server - please try again")) ((zero? size) (d "~%") - (string-concatenate-reverse data)) + (string-intersperse (reverse data) "")) (else (let ([chunk (read-string size in)]) (d ".") (read-line in) (get-chunks (cons chunk data)) ) ) ) ) )) - (define slashes (char-set #\\ #\/)) + (define slashes '("\\" "/")) (define (valid-extension-name? name) (and (not (member name '("" ".." "."))) - (not (string-index name slashes)))) + (not (any (lambda (slash) + (substring-index slash name)) + slashes)))) (define (check-egg-name name) (unless (valid-extension-name? name) diff --git a/setup.defaults b/setup.defaults index 3a14103..88638dd 100644 --- a/setup.defaults +++ b/setup.defaults @@ -30,7 +30,7 @@ (data-structures extras files foreign irregex lolevel ports tcp utils posix irregex setup-api setup-download - srfi-1 srfi-4 srfi-13 srfi-14 srfi-18 srfi-69 + srfi-1 srfi-4 srfi-14 srfi-18 srfi-69 ->) ) diff --git a/srfi-13.import.scm b/srfi-13.import.scm deleted file mode 100644 index 1dde448..0000000 --- a/srfi-13.import.scm +++ /dev/null @@ -1,133 +0,0 @@ -;;;; srfi-13.import.scm - import library for "srfi-13" module -; -; Copyright (c) 2008-2014, The CHICKEN Team -; All rights reserved. -; -; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following -; conditions are met: -; -; Redistributions of source code must retain the above copyright notice, this list of conditions and the following -; disclaimer. -; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following -; disclaimer in the documentation and/or other materials provided with the distribution. -; Neither the name of the author nor the names of its contributors may be used to endorse or promote -; products derived from this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS -; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY -; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -; POSSIBILITY OF SUCH DAMAGE. - - -(##sys#register-primitive-module - 'srfi-13 - '(check-substring-spec - kmp-step - make-kmp-restart-vector - reverse-list->string - string->list - string-any - string-append/shared - string-ci< - string-ci<= - string-ci<> - string-ci= - string-ci> - string-ci>= - string-compare - string-compare-ci - string-concatenate - string-concatenate-reverse - string-concatenate-reverse/shared - string-concatenate/shared - string-contains - string-contains-ci - string-copy - string-copy! - string-count - string-delete - string-downcase - string-downcase! - string-drop - string-drop-right - string-every - string-fill! - string-filter - string-fold - string-fold-right - string-for-each - string-for-each-index - string-index - string-index-right - string-join - string-kmp-partial-search - string-map - string-map! - string-null? - string-pad - string-pad-right - string-parse-final-start+end - string-parse-start+end - string-prefix-ci? - string-prefix-length - string-prefix-length-ci - string-prefix? - string-replace - string-reverse - string-reverse! - string-skip - string-skip-right - string-suffix-ci? - string-suffix-length - string-suffix-length-ci - string-suffix? - string-tabulate - string-take - string-take-right - string-titlecase - string-titlecase! - string-tokenize - string-trim - string-trim-both - string-trim-right - string-unfold - string-unfold-right - string-upcase - string-upcase! - string-xcopy! - string< - string<= - string<> - string= - string> - string>= - substring-spec-ok? - substring/shared - xsubstring) - `((let-string-start+end - () - ,(##sys#ensure-transformer - (##sys#er-transformer - (lambda (form r c) - (##sys#check-syntax 'let-string-start+end form '(_ _ _ _ _ . _)) - (let ((s-e-r (cadr form)) - (proc (caddr form)) - (s-exp (cadddr form)) - (args-exp (car (cddddr form))) - (body (cdr (cddddr form))) - (%receive (r 'receive)) - (%string-parse-start+end (r 'string-parse-start+end)) - (%string-parse-final-start+end (r 'string-parse-final-start+end))) - (if (pair? (cddr s-e-r)) - `(,%receive (,(caddr s-e-r) ,(car s-e-r) ,(cadr s-e-r)) - (,%string-parse-start+end ,proc ,s-exp ,args-exp) - ,@body) - `(,%receive ,s-e-r - (,%string-parse-final-start+end ,proc ,s-exp ,args-exp) - ,@body) ) ))) - 'let-string-start+end)))) diff --git a/srfi-13.scm b/srfi-13.scm deleted file mode 100644 index dec54b2..0000000 --- a/srfi-13.scm +++ /dev/null @@ -1,2065 +0,0 @@ -;;;; srfi-13.scm - Shivers' reference implementation of SRFI-13 - - -(declare - (unit srfi-13) - (uses srfi-14) - (fixnum) - (hide %string-prefix? %string-hash %finish-string-concatenate-reverse %string-suffix-length %string-prefix-length - %string-map %string-copy! %string-compare %substring/shared %string-suffix? %multispan-repcopy! - %string-prefix-length-ci %string-suffix-length-ci %string-prefix-ci? %string-suffix-ci? - ##srfi13#traverse - %string-titlecase! %string-map! %string-compare-ci ##srfi13#string-fill!) - (not standard-bindings string-copy string->list string-fill!) - (disable-interrupts) ) - -(include "common-declarations.scm") - -(register-feature! 'srfi-13) - - -(define-inline (char-cased? c) (char-alphabetic? c)) -(define-inline (char-titlecase c) (char-upcase c)) - - -;;; SRFI 13 string library reference implementation -*- Scheme -*- -;;; Olin Shivers 5/2000 -;;; -;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. -;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved. -;;; The details of the copyrights appear at the end of the file. Short -;;; summary: BSD-style open source. - -;;; Exports: -;;; string-map string-map! -;;; string-fold string-unfold -;;; string-fold-right string-unfold-right -;;; string-tabulate string-for-each string-for-each-index -;;; string-every string-any -;;; string-hash string-hash-ci -;;; string-compare string-compare-ci -;;; string= string< string> string<= string>= string<> -;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> -;;; string-downcase string-upcase string-titlecase -;;; string-downcase! string-upcase! string-titlecase! -;;; string-take string-take-right -;;; string-drop string-drop-right -;;; string-pad string-pad-right -;;; string-trim string-trim-right string-trim-both -;;; string-filter string-delete -;;; string-index string-index-right -;;; string-skip string-skip-right -;;; string-count -;;; string-prefix-length string-prefix-length-ci -;;; string-suffix-length string-suffix-length-ci -;;; string-prefix? string-prefix-ci? -;;; string-suffix? string-suffix-ci? -;;; string-contains string-contains-ci -;;; string-copy! substring/shared -;;; string-reverse string-reverse! reverse-list->string -;;; string-concatenate string-concatenate/shared string-concatenate-reverse -;;; string-append/shared -;;; xsubstring string-xcopy! -;;; string-null? -;;; string-join -;;; string-tokenize -;;; string-replace -;;; -;;; R5RS extended: -;;; string->list string-copy string-fill! -;;; -;;; R5RS re-exports: -;;; string? make-string string-length string-ref string-set! -;;; -;;; R5RS re-exports (also defined here but commented-out): -;;; string string-append list->string -;;; -;;; Low-level routines: -;;; make-kmp-restart-vector string-kmp-partial-search kmp-step -;;; string-parse-start+end -;;; string-parse-final-start+end -;;; let-string-start+end -;;; check-substring-spec -;;; substring-spec-ok? - -;;; Imports -;;; This is a fairly large library. While it was written for portability, you -;;; must be aware of its dependencies in order to run it in a given scheme -;;; implementation. Here is a complete list of the dependencies it has and the -;;; assumptions it makes beyond stock R5RS Scheme: -;;; -;;; This code has the following non-R5RS dependencies: -;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro; -;;; -;;; - Various imports from the char-set library for the routines that can -;;; take char-set arguments; -;;; -;;; - An n-ary ERROR procedure; -;;; -;;; - BITWISE-AND for the hash functions; -;;; -;;; - A simple CHECK-ARG procedure for checking parameter values; it is -;;; (lambda (pred val proc) -;;; (if (pred val) val (error "Bad arg" val pred proc))) -;;; -;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting & -;;; type-checking optional parameters from a rest argument; -;;; -;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE & -;;; STRING-TITLECASE! procedures. The former returns true iff a character is -;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z. -;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII & -;;; Latin-1, it is the same as CHAR-UPCASE. -;;; -;;; The code depends upon a small set of core string primitives from R5RS: -;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING -;;; (Actually, SUBSTRING is not a primitive, but we assume that an -;;; implementation's native version is probably faster than one we could -;;; define, so we import it from R5RS.) -;;; -;;; The code depends upon a small set of R5RS character primitives: -;;; char? char=? char-ci=? charinteger (for the hash functions) -;;; -;;; We assume the following: -;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE -;;; - CHAR-CI=? is equivalent to -;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1)) -;;; (char-downcase (char-upcase c2)))) -;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive -;;; and consistent with Unicode's 1-1 char-mapping spec. -;;; These things are typically true, but if not, you would need to modify -;;; the case-mapping and case-insensitive routines. - -;;; Enough introductory blather. On to the source code. (But see the end of -;;; the file for further notes on porting & performance tuning.) - - -;;; Support for START/END substring specs -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-syntax let-string-start+end2 - (syntax-rules () - ((_ (s-e1 s-e2 s-e3 s-e4) proc s1 s2 args . body) - (let ((procv proc)) - (let-string-start+end - (s-e1 s-e2 rest) procv s1 args - (let-string-start+end - (s-e3 s-e4) procv s2 rest - . body) ) ) ) ) ) - -(define-syntax let-string-start+end - (er-macro-transformer - (lambda (form r c) - (##sys#check-syntax 'let-string-start+end form '(_ _ _ _ _ . _)) - (let ((s-e-r (cadr form)) - (proc (caddr form)) - (s-exp (cadddr form)) - (args-exp (car (cddddr form))) - (body (cdr (cddddr form))) - (%receive (r 'receive)) - (%string-parse-start+end (r 'string-parse-start+end)) - (%string-parse-final-start+end (r 'string-parse-final-start+end))) - (if (pair? (cddr s-e-r)) - `(,%receive (,(caddr s-e-r) ,(car s-e-r) ,(cadr s-e-r)) - (,%string-parse-start+end ,proc ,s-exp ,args-exp) - ,@body) - `(,%receive ,s-e-r - (,%string-parse-final-start+end ,proc ,s-exp ,args-exp) - ,@body) ) )))) - - -;;; Returns three values: rest start end - -(define (string-parse-start+end proc s args) - (##sys#check-string s 'string-parse-start+end) - (let ((slen (string-length s))) - (if (pair? args) - - (let ((start (car args)) - (args (cdr args))) -; (if (and (integer? start) (exact? start) (>= start 0)) - (if (and (fixnum? start) (>= start 0)) - (receive (end args) - (if (pair? args) - (let ((end (car args)) - (args (cdr args))) -; (if (and (integer? end) (exact? end) (<= end slen)) - (if (and (fixnum? end) (<= end slen)) - (values end args) - (##sys#error 'string-parse-start+end "Illegal substring END spec" proc end s))) - (values slen args)) - (if (<= start end) (values args start end) - (##sys#error 'string-parse-start+end "Illegal substring START/END spec" - proc start end s))) - (##sys#error 'string-parse-start+end "Illegal substring START spec" proc start s))) - - (values '() 0 slen)))) - -(define (string-parse-final-start+end proc s args) - (receive (rest start end) (string-parse-start+end proc s args) - (if (pair? rest) (##sys#error 'string-parse-final-start+end "Extra arguments to procedure" proc rest) - (values start end)))) - -(define (substring-spec-ok? s start end) - (and (string? s) -; (integer? start) -; (exact? start) -; (integer? end) -; (exact? end) - (fixnum? start) - (fixnum? end) - (<= 0 start) - (<= start end) - (<= end (string-length s)))) - -(define (check-substring-spec proc s start end) - (if (not (substring-spec-ok? s start end)) - (##sys#error 'check-substring-spec "Illegal substring spec." proc s start end))) - - -;;; Defined by R5RS, so commented out here. -;(define (string . chars) -; (let* ((len (length chars)) -; (ans (make-string len))) -; (do ((i 0 (+ i 1)) -; (chars chars (cdr chars))) -; ((>= i len)) -; (string-set! ans i (car chars))) -; ans)) -; -;(define (string . chars) (string-unfold null? car cdr chars)) - - - -;;; substring/shared S START [END] -;;; string-copy S [START END] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;;; All this goop is just arg parsing & checking surrounding a call to the -;;; actual primitive, %SUBSTRING/SHARED. - -(define (substring/shared s start . maybe-end) -; (check-arg string? s substring/shared) - (let ((slen (string-length s))) -; (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start))) -; start substring/shared) - (let ([n (optional maybe-end slen)]) - (##sys#check-exact n 'substring/shared) - (check-substring-spec 'substring/shared s start n) - (%substring/shared s start n) ) ) ) -#| - (%substring/shared s start - (:optional maybe-end slen - (lambda (end) (and (integer? end) - (exact? end) - (<= start end) - (<= end slen))))))) -|# - -;;; Split out so that other routines in this library can avoid arg-parsing -;;; overhead for END parameter. -(define (%substring/shared s start end) - (if (and (zero? start) (= end (string-length s))) s - (##sys#substring s start end))) - -(define (string-copy s . maybe-start+end) - (let-string-start+end (start end) string-copy s maybe-start+end - (##sys#substring s start end))) - -;This library uses the R5RS SUBSTRING, but doesn't export it. -;Here is a definition, just for completeness. -;(define (substring s start end) -; (check-substring-spec substring s start end) -; (let* ((slen (- end start)) -; (ans (make-string slen))) -; (do ((i 0 (+ i 1)) -; (j start (+ j 1))) -; ((>= i slen) ans) -; (string-set! ans i (string-ref s j))))) - -;;; Basic iterators and other higher-order abstractions -;;; (string-map proc s [start end]) -;;; (string-map! proc s [start end]) -;;; (string-fold kons knil s [start end]) -;;; (string-fold-right kons knil s [start end]) -;;; (string-unfold p f g seed [base make-final]) -;;; (string-unfold-right p f g seed [base make-final]) -;;; (string-for-each proc s [start end]) -;;; (string-for-each-index proc s [start end]) -;;; (string-every char-set/char/pred s [start end]) -;;; (string-any char-set/char/pred s [start end]) -;;; (string-tabulate len proc) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; You want compiler support for high-level transforms on fold and unfold ops. -;;; You'd at least like a lot of inlining for clients of these procedures. -;;; Don't hold your breath. - -;;; Shut up, Olin (flw) - -(define (string-map proc s . maybe-start+end) -; (check-arg procedure? proc string-map) - (let-string-start+end (start end) string-map s maybe-start+end - (%string-map proc s start end))) - -(define (%string-map proc s start end) ; Internal utility - (let* ((len (- end start)) - (ans (make-string len))) - (do ((i 0 (+ i 1)) - (j start (+ j 1))) - ((>= i len)) - (string-set! ans i (proc (string-ref s j)))) - ans)) - -(define (string-map! proc s . maybe-start+end) -; (check-arg procedure? proc string-map!) - (let-string-start+end (start end) string-map! s maybe-start+end - (%string-map! proc s start end))) - -(define (%string-map! proc s start end) - (do ((i start (+ i 1))) - ((>= i end) s) - (string-set! s i (proc (string-ref s i))))) - -(define (string-fold kons knil s . maybe-start+end) -; (check-arg procedure? kons string-fold) - (let-string-start+end (start end) string-fold s maybe-start+end - (let lp ((v knil) (i start)) - (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) - v)))) - -(define (string-fold-right kons knil s . maybe-start+end) -; (check-arg procedure? kons string-fold-right) - (let-string-start+end (start end) string-fold-right s maybe-start+end - (let lp ((v knil) (i (- end 1))) - (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) - v)))) - -;;; (string-unfold p f g seed [base make-final]) -;;; This is the fundamental constructor for strings. -;;; - G is used to generate a series of "seed" values from the initial seed: -;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... -;;; - P tells us when to stop -- when it returns true when applied to one -;;; of these seed values. -;;; - F maps each seed value to the corresponding character -;;; in the result string. These chars are assembled into the -;;; string in a left-to-right order. -;;; - BASE is the optional initial/leftmost portion of the constructed string; -;;; it defaults to the empty string "". -;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns -;;; true) to produce the final/rightmost portion of the constructed string. -;;; It defaults to (LAMBDA (X) ""). -;;; -;;; In other words, the following (simple, inefficient) definition holds: -;;; (define (string-unfold p f g seed base make-final) -;;; (string-append base -;;; (let recur ((seed seed)) -;;; (if (p seed) (make-final seed) -;;; (string-append (string (f seed)) -;;; (recur (g seed))))))) -;;; -;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to -;;; reverse a string, copy a string, convert a list to a string, read -;;; a port into a string, and so forth. Examples: -;;; (port->string port) = -;;; (string-unfold (compose eof-object? peek-char) -;;; read-char values port) -;;; -;;; (list->string lis) = (string-unfold null? car cdr lis) -;;; -;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) - -;;; A problem with the following simple formulation is that it pushes one -;;; stack frame for every char in the result string -- an issue if you are -;;; using it to read a 100kchar string. So we don't use it -- but I include -;;; it to give a clear, straightforward description of what the function -;;; does. - -;(define (string-unfold p f g seed base make-final) -; (let ((ans (let recur ((seed seed) (i (string-length base))) -; (if (p seed) -; (let* ((final (make-final seed)) -; (ans (make-string (+ i (string-length final))))) -; (string-copy! ans i final) -; ans) -; -; (let* ((c (f seed)) -; (s (recur (g seed) (+ i 1)))) -; (string-set! s i c) -; s))))) -; (string-copy! ans 0 base) -; ans)) - -;;; The strategy is to allocate a series of chunks into which we stash the -;;; chars as we generate them. Chunk size goes up in powers of two starting -;;; with 40 and levelling out at 4k, i.e. -;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096... -;;; This should work pretty well for short strings, 1-line (80 char) strings, -;;; and longer ones. When done, we allocate an answer string and copy the -;;; chars over from the chunk buffers. - -(define (string-unfold p f g seed . base+make-final) -; (check-arg procedure? p string-unfold) -; (check-arg procedure? f string-unfold) -; (check-arg procedure? g string-unfold) - (let-optionals* base+make-final - ((base "") ; (string? base)) - (make-final (lambda (x) ""))) ;(procedure? make-final))) - (let lp ((chunks '()) ; Previously filled chunks - (nchars 0) ; Number of chars in CHUNKS - (chunk (make-string 40)) ; Current chunk into which we write - (chunk-len 40) - (i 0) ; Number of chars written into CHUNK - (seed seed)) - (let lp2 ((i i) (seed seed)) - (if (not (p seed)) - (let ((c (f seed)) - (seed (g seed))) - (if (< i chunk-len) - (begin (string-set! chunk i c) - (lp2 (+ i 1) seed)) - - (let* ((nchars2 (+ chunk-len nchars)) - (chunk-len2 (min 4096 nchars2)) - (new-chunk (make-string chunk-len2))) - (string-set! new-chunk 0 c) - (lp (cons chunk chunks) (+ nchars chunk-len) - new-chunk chunk-len2 1 seed)))) - - ;; We're done. Make the answer string & install the bits. - (let* ((final (make-final seed)) - (flen (string-length final)) - (base-len (string-length base)) - (j (+ base-len nchars i)) - (ans (make-string (+ j flen)))) - (%string-copy! ans j final 0 flen) ; Install FINAL. - (let ((j (- j i))) - (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I). - (let lp ((j j) (chunks chunks)) ; Install CHUNKS. - (if (pair? chunks) - (let* ((chunk (car chunks)) - (chunks (cdr chunks)) - (chunk-len (string-length chunk)) - (j (- j chunk-len))) - (%string-copy! ans j chunk 0 chunk-len) - (lp j chunks))))) - (%string-copy! ans 0 base 0 base-len) ; Install BASE. - ans)))))) - -(define (string-unfold-right p f g seed . base+make-final) - (let-optionals* base+make-final - ((base ""); (string? base)) - (make-final (lambda (x) ""))); (procedure? make-final))) - (let lp ((chunks '()) ; Previously filled chunks - (nchars 0) ; Number of chars in CHUNKS - (chunk (make-string 40)) ; Current chunk into which we write - (chunk-len 40) - (i 40) ; Number of chars available in CHUNK - (seed seed)) - (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right - (if (not (p seed)) ; to left. - (let ((c (f seed)) - (seed (g seed))) - (if (> i 0) - (let ((i (- i 1))) - (string-set! chunk i c) - (lp2 i seed)) - - (let* ((nchars2 (+ chunk-len nchars)) - (chunk-len2 (min 4096 nchars2)) - (new-chunk (make-string chunk-len2)) - (i (- chunk-len2 1))) - (string-set! new-chunk i c) - (lp (cons chunk chunks) (+ nchars chunk-len) - new-chunk chunk-len2 i seed)))) - - ;; We're done. Make the answer string & install the bits. - (let* ((final (make-final seed)) - (flen (string-length final)) - (base-len (string-length base)) - (chunk-used (- chunk-len i)) - (j (+ base-len nchars chunk-used)) - (ans (make-string (+ j flen)))) - (%string-copy! ans 0 final 0 flen) ; Install FINAL. - (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,). - (let lp ((j (+ flen chunk-used)) ; Install CHUNKS. - (chunks chunks)) - (if (pair? chunks) - (let* ((chunk (car chunks)) - (chunks (cdr chunks)) - (chunk-len (string-length chunk))) - (%string-copy! ans j chunk 0 chunk-len) - (lp (+ j chunk-len) chunks)) - (%string-copy! ans j base 0 base-len))); Install BASE. - ans)))))) - - -(define (string-for-each proc s . maybe-start+end) -; (check-arg procedure? proc string-for-each) - (let-string-start+end (start end) string-for-each s maybe-start+end - (let lp ((i start)) - (if (< i end) - (begin (proc (string-ref s i)) - (lp (+ i 1))))))) - -(define (string-for-each-index proc s . maybe-start+end) -; (check-arg procedure? proc string-for-each-index) - (let-string-start+end (start end) string-for-each-index s maybe-start+end - (let lp ((i start)) - (if (< i end) (begin (proc i) (lp (+ i 1))))))) - -(define (string-every criteria s . maybe-start+end) - (let-string-start+end (start end) string-every s maybe-start+end - (cond ((char? criteria) - (let lp ((i start)) - (or (>= i end) - (and (char=? criteria (string-ref s i)) - (lp (+ i 1)))))) - - ((char-set? criteria) - (let lp ((i start)) - (or (>= i end) - (and (char-set-contains? criteria (string-ref s i)) - (lp (+ i 1)))))) - - ((procedure? criteria) ; Slightly funky loop so that - (or (= start end) ; final (PRED S[END-1]) call - (let lp ((i start)) ; is a tail call. - (let ((c (string-ref s i)) - (i1 (+ i 1))) - (if (= i1 end) (criteria c) ; Tail call. - (and (criteria c) (lp i1))))))) - - (else (##sys#error 'string-every "Second param is neither char-set, char, or predicate procedure." - string-every criteria))))) - - -(define (string-any criteria s . maybe-start+end) - (let-string-start+end (start end) string-any s maybe-start+end - (cond ((char? criteria) - (let lp ((i start)) - (and (< i end) - (or (char=? criteria (string-ref s i)) - (lp (+ i 1)))))) - - ((char-set? criteria) - (let lp ((i start)) - (and (< i end) - (or (char-set-contains? criteria (string-ref s i)) - (lp (+ i 1)))))) - - ((procedure? criteria) ; Slightly funky loop so that - (and (< start end) ; final (PRED S[END-1]) call - (let lp ((i start)) ; is a tail call. - (let ((c (string-ref s i)) - (i1 (+ i 1))) - (if (= i1 end) (criteria c) ; Tail call - (or (criteria c) (lp i1))))))) - - (else (##sys#error 'string-any "Second param is neither char-set, char, or predicate procedure." - string-any criteria))))) - - -(define (string-tabulate proc len) -; (check-arg procedure? proc string-tabulate) -; (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val))) -; len string-tabulate) - (##sys#check-exact len 'string-tabulate) - (let ((s (make-string len))) - (do ((i (- len 1) (- i 1))) - ((< i 0)) - (string-set! s i (proc i))) - s)) - - - -;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2] -;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Find the length of the common prefix/suffix. -;;; It is not required that the two substrings passed be of equal length. -;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. -;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons, -;;; so should be as tense as possible. - -(define (%string-prefix-length s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (end1 (+ start1 delta))) - - (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path - delta - - (let lp ((i start1) (j start2)) ; Regular path - (if (or (>= i end1) - (not (char=? (string-ref s1 i) - (string-ref s2 j)))) - (- i start1) - (lp (+ i 1) (+ j 1))))))) - -(define (%string-suffix-length s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (start1 (- end1 delta))) - - (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path - delta - - (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path - (if (or (< i start1) - (not (char=? (string-ref s1 i) - (string-ref s2 j)))) - (- (- end1 i) 1) - (lp (- i 1) (- j 1))))))) - -(define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (end1 (+ start1 delta))) - - (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path - delta - - (let lp ((i start1) (j start2)) ; Regular path - (if (or (>= i end1) - (not (char-ci=? (string-ref s1 i) - (string-ref s2 j)))) - (- i start1) - (lp (+ i 1) (+ j 1))))))) - -(define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2) - (let* ((delta (min (- end1 start1) (- end2 start2))) - (start1 (- end1 delta))) - - (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path - delta - - (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path - (if (or (< i start1) - (not (char-ci=? (string-ref s1 i) - (string-ref s2 j)))) - (- (- end1 i) 1) - (lp (- i 1) (- j 1))))))) - - -(define (string-prefix-length s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-length s1 s2 maybe-starts+ends - (%string-prefix-length s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-length s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-length s1 s2 maybe-starts+ends - (%string-suffix-length s1 start1 end1 s2 start2 end2))) - -(define (string-prefix-length-ci s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-length-ci s1 s2 maybe-starts+ends - (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-length-ci s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-length-ci s1 s2 maybe-starts+ends - (%string-suffix-length-ci s1 start1 end1 s2 start2 end2))) - - -;;; string-prefix? s1 s2 [start1 end1 start2 end2] -;;; string-suffix? s1 s2 [start1 end1 start2 end2] -;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2] -;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; These are all simple derivatives of the previous counting funs. - -(define (string-prefix? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix? s1 s2 maybe-starts+ends - (%string-prefix? s1 start1 end1 s2 start2 end2))) - -(define (string-suffix? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix? s1 s2 maybe-starts+ends - (%string-suffix? s1 start1 end1 s2 start2 end2))) - -(define (string-prefix-ci? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-prefix-ci? s1 s2 maybe-starts+ends - (%string-prefix-ci? s1 start1 end1 s2 start2 end2))) - -(define (string-suffix-ci? s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-suffix-ci? s1 s2 maybe-starts+ends - (%string-suffix-ci? s1 start1 end1 s2 start2 end2))) - - -;;; Here are the internal routines that do the real work. - -(define (%string-prefix? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= (%string-prefix-length s1 start1 end1 - s2 start2 end2) - len1)))) - -(define (%string-suffix? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-suffix-length s1 start1 end1 - s2 start2 end2))))) - -(define (%string-prefix-ci? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-prefix-length-ci s1 start1 end1 - s2 start2 end2))))) - -(define (%string-suffix-ci? s1 start1 end1 s2 start2 end2) - (let ((len1 (- end1 start1))) - (and (<= len1 (- end2 start2)) ; Quick check - (= len1 (%string-suffix-length-ci s1 start1 end1 - s2 start2 end2))))) - - -;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2] -;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2] -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Primitive string-comparison functions. -;;; Continuation order is different from MIT Scheme. -;;; Continuations are applied to s1's mismatch index; -;;; in the case of equality, this is END1. - -(define (%string-compare s1 start1 end1 s2 start2 end2 - proc< proc= proc>) - (let ((size1 (- end1 start1)) - (size2 (- end2 start2))) - (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2))) - (if (= match size1) - ((if (= match size2) proc= proc<) end1) - ((if (= match size2) - proc> - (if (char)) - (+ match start1)))))) - -(define (%string-compare-ci s1 start1 end1 s2 start2 end2 - proc< proc= proc>) - (let ((size1 (- end1 start1)) - (size2 (- end2 start2))) - (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) - (if (= match size1) - ((if (= match size2) proc= proc<) end1) - ((if (= match size2) proc> - (if (char-ci)) - (+ start1 match)))))) - -(define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends) -; (check-arg procedure? proc< string-compare) -; (check-arg procedure? proc= string-compare) -; (check-arg procedure? proc> string-compare) - (let-string-start+end2 (start1 end1 start2 end2) - string-compare s1 s2 maybe-starts+ends - (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>))) - -(define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends) -; (check-arg procedure? proc< string-compare-ci) -; (check-arg procedure? proc= string-compare-ci) -; (check-arg procedure? proc> string-compare-ci) - (let-string-start+end2 (start1 end1 start2 end2) - string-compare-ci s1 s2 maybe-starts+ends - (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>))) - - - -;;; string= string<> string-ci= string-ci<> -;;; string< string> string-ci< string-ci> -;;; string<= string>= string-ci<= string-ci>= -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Simple definitions in terms of the previous comparison funs. -;;; I sure hope the %STRING-COMPARE calls get integrated. - -(define (string= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string= s1 s2 maybe-starts+ends - (and (= (- end1 start1) (- end2 start2)) ; Quick filter - (or (and (eq? s1 s2) (= start1 start2)) ; Fast path - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) (if i #t #f)) - (lambda (i) #f)))))) - -(define (string<> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string<> s1 s2 maybe-starts+ends - (or (not (= (- end1 start1) (- end2 start2))) ; Fast path - (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) #f) - (lambda (i) (if i #t #f))))))) - -(define (string< s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string< s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (< end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) #f) - (lambda (i) #f))))) - -(define (string> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string> s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (> end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) #f) - (lambda (i) (if i #t #f)))))) - -(define (string<= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string<= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (<= end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) (if i #t #f)) - (lambda (i) #f))))) - -(define (string>= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string>= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (>= end1 end2) - - (%string-compare s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) (if i #t #f)) - (lambda (i) (if i #t #f)))))) - -(define (string-ci= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci= s1 s2 maybe-starts+ends - (and (= (- end1 start1) (- end2 start2)) ; Quick filter - (or (and (eq? s1 s2) (= start1 start2)) ; Fast path - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) (if i #t #f)) - (lambda (i) #f)))))) - -(define (string-ci<> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci<> s1 s2 maybe-starts+ends - (or (not (= (- end1 start1) (- end2 start2))) ; Fast path - (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) #f) - (lambda (i) (if i #t #f))))))) - -(define (string-ci< s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci< s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (< end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) #f) - (lambda (i) #f))))) - -(define (string-ci> s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci> s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (> end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) #f) - (lambda (i) (if i #t #f)))))) - -(define (string-ci<= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci<= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (<= end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) (if i #t #f)) - (lambda (i) (if i #t #f)) - (lambda (i) #f))))) - -(define (string-ci>= s1 s2 . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-ci>= s1 s2 maybe-starts+ends - (if (and (eq? s1 s2) (= start1 start2)) ; Fast path - (>= end1 end2) - - (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test - (lambda (i) #f) - (lambda (i) (if i #t #f)) - (lambda (i) (if i #t #f)))))) - - -;;; Hash -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND. -;;; If you keep BOUND small enough, the intermediate calculations will -;;; always be fixnums. How small is dependent on the underlying Scheme system; -;;; we use a default BOUND of 2^22 = 4194304, which should hack it in -;;; Schemes that give you at least 29 signed bits for fixnums. The core -;;; calculation that you don't want to overflow is, worst case, -;;; (+ 65535 (* 37 (- bound 1))) -;;; where 65535 is the max character code. Choose the default BOUND to be the -;;; biggest power of two that won't cause this expression to fixnum overflow, -;;; and everything will be copacetic. - -(define (%string-hash s char->int bound start end) - (let ((iref (lambda (s i) (char->int (string-ref s i)))) - ;; Compute a 111...1 mask that will cover BOUND-1: - (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? - (if (>= i bound) (- i 1) (lp (+ i i)))))) - (let lp ((i start) (ans 0)) - (if (>= i end) (modulo ans bound) - (lp (+ i 1) (fxand mask (+ (* 37 ans) (iref s i)))))))) - -(define (string-hash s . maybe-bound+start+end) - (let-optionals* maybe-bound+start+end ((bound 4194304); (and (integer? bound) - ; (exact? bound) - ; (<= 0 bound))) - rest) - (if (zero? bound) (set! bound 4194304)) - (##sys#check-exact bound 'string-hash) - (let-string-start+end (start end) string-hash s rest - (%string-hash s char->integer bound start end)))) - -(define (string-hash-ci s . maybe-bound+start+end) - (let-optionals* maybe-bound+start+end ((bound 4194304) ;(and (integer? bound) - ; (exact? bound) - ; (<= 0 bound))) - rest) - (if (zero? bound) (set! bound 4194304)) - (##sys#check-exact bound 'string-hash-ci) - (let-string-start+end (start end) string-hash-ci s rest - (%string-hash s (lambda (c) (char->integer (char-downcase c))) - bound start end)))) - -;;; Case hacking -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-upcase s [start end] -;;; string-upcase! s [start end] -;;; string-downcase s [start end] -;;; string-downcase! s [start end] -;;; -;;; string-titlecase s [start end] -;;; string-titlecase! s [start end] -;;; Capitalize every contiguous alpha sequence: capitalise -;;; first char, lowercase rest. - -(define (string-upcase s . maybe-start+end) - (let-string-start+end (start end) string-upcase s maybe-start+end - (%string-map char-upcase s start end))) - -(define (string-upcase! s . maybe-start+end) - (let-string-start+end (start end) string-upcase! s maybe-start+end - (%string-map! char-upcase s start end))) - -(define (string-downcase s . maybe-start+end) - (let-string-start+end (start end) string-downcase s maybe-start+end - (%string-map char-downcase s start end))) - -(define (string-downcase! s . maybe-start+end) - (let-string-start+end (start end) string-downcase! s maybe-start+end - (%string-map! char-downcase s start end))) - -(define (%string-titlecase! s start end) - (let lp ((i start)) - (cond ((string-index s char-cased? i end) => - (lambda (i) - (string-set! s i (char-titlecase (string-ref s i))) - (let ((i1 (+ i 1))) - (cond ((string-skip s char-cased? i1 end) => - (lambda (j) - (string-downcase! s i1 j) - (lp (+ j 1)))) - (else (string-downcase! s i1 end))))))))) - -(define (string-titlecase! s . maybe-start+end) - (let-string-start+end (start end) string-titlecase! s maybe-start+end - (%string-titlecase! s start end))) - -(define (string-titlecase s . maybe-start+end) - (let-string-start+end (start end) string-titlecase! s maybe-start+end - (let ((ans (##sys#substring s start end))) - (%string-titlecase! ans 0 (- end start)) - ans))) - - -;;; Cutting & pasting strings -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-take string nchars -;;; string-drop string nchars -;;; -;;; string-take-right string nchars -;;; string-drop-right string nchars -;;; -;;; string-pad string k [char start end] -;;; string-pad-right string k [char start end] -;;; -;;; string-trim string [char/char-set/pred start end] -;;; string-trim-right string [char/char-set/pred start end] -;;; string-trim-both string [char/char-set/pred start end] -;;; -;;; These trimmers invert the char-set meaning from MIT Scheme -- you -;;; say what you want to trim. - -(define (string-take s n) -; (check-arg string? s string-take) -; (check-arg (lambda (val) (and (integer? n) (exact? n) -; (<= 0 n (string-length s)))) -; n string-take) - (##sys#check-string s 'string-take) - (##sys#check-range n 0 (fx+ 1 (##sys#size s)) 'string-take) - (%substring/shared s 0 n)) - -(define (string-take-right s n) -; (check-arg string? s string-take-right) - (##sys#check-string s 'string-take-right) - (##sys#check-range n 0 (fx+ 1 (##sys#size s)) 'string-take-right) - (let ((len (##sys#size s))) -; (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) -; n string-take-right) - (%substring/shared s (- len n) len))) - -(define (string-drop s n) -; (check-arg string? s string-drop) - (##sys#check-string s 'string-drop) - (##sys#check-range n 0 (fx+ 1 (##sys#size s)) 'string-drop) - (let ((len (##sys#size s))) -; (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) -; n string-drop) - (%substring/shared s n len))) - -(define (string-drop-right s n) -; (check-arg string? s string-drop-right) - (##sys#check-string s 'string-drop-right) - (##sys#check-range n 0 (fx+ 1 (##sys#size s)) 'string-drop-right) - (let ((len (##sys#size s))) -; (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) -; n string-drop-right) - (%substring/shared s 0 (- len n)))) - - -(define (string-trim s . criteria+start+end) - (let-optionals* criteria+start+end ((criteria char-set:whitespace) rest) - (let-string-start+end (start end) string-trim s rest - (cond ((string-skip s criteria start end) => - (lambda (i) (%substring/shared s i end))) - (else ""))))) - -(define (string-trim-right s . criteria+start+end) - (let-optionals* criteria+start+end ((criteria char-set:whitespace) rest) - (let-string-start+end (start end) string-trim-right s rest - (cond ((string-skip-right s criteria start end) => - (lambda (i) (%substring/shared s start (+ 1 i)))) - (else ""))))) - -(define (string-trim-both s . criteria+start+end) - (let-optionals* criteria+start+end ((criteria char-set:whitespace) rest) - (let-string-start+end (start end) string-trim-both s rest - (cond ((string-skip s criteria start end) => - (lambda (i) - (%substring/shared s i (+ 1 (string-skip-right s criteria i end))))) - (else ""))))) - - -(define (string-pad-right s n . char+start+end) - (##sys#check-exact n 'string-pad-right) - (let-optionals* char+start+end ((char #\space) rest) ; (char? char)) rest) - (let-string-start+end (start end) string-pad-right s rest - (let ((len (- end start))) - (if (<= n len) - (%substring/shared s start (+ start n)) - (let ((ans (make-string n char))) - (%string-copy! ans 0 s start end) - ans)))))) - -(define (string-pad s n . char+start+end) - (##sys#check-exact n 'string-pad) - (let-optionals* char+start+end ((char #\space) rest) ; (char? char)) rest) - (let-string-start+end (start end) string-pad s rest - (let ((len (- end start))) - (if (<= n len) - (%substring/shared s (- end n) end) - (let ((ans (make-string n char))) - (%string-copy! ans (- n len) s start end) - ans)))))) - - - -;;; Filtering strings -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-delete char/char-set/pred string [start end] -;;; string-filter char/char-set/pred string [start end] -;;; -;;; If the criteria is a char or char-set, we scan the string twice with -;;; string-fold -- once to determine the length of the result string, -;;; and once to do the filtered copy. -;;; If the criteria is a predicate, we don't do this double-scan strategy, -;;; because the predicate might have side-effects or be very expensive to -;;; compute. So we preallocate a temp buffer pessimistically, and only do -;;; one scan over S. This is likely to be faster and more space-efficient -;;; than consing a list. - -(define (string-delete criteria s . maybe-start+end) - (let-string-start+end (start end) string-delete s maybe-start+end - (if (procedure? criteria) - (let* ((slen (- end start)) - (temp (make-string slen)) - (ans-len (string-fold (lambda (c i) - (if (criteria c) i - (begin (string-set! temp i c) - (+ i 1)))) - 0 s start end))) - (if (= ans-len slen) temp (##sys#substring temp 0 ans-len))) - - (let* ((cset (cond ((char-set? criteria) criteria) - ((char? criteria) (char-set criteria)) - (else (##sys#error 'string-delete "string-delete criteria not predicate, char or char-set" criteria)))) - (len (string-fold (lambda (c i) (if (char-set-contains? cset c) - i - (+ i 1))) - 0 s start end)) - (ans (make-string len))) - (string-fold (lambda (c i) (if (char-set-contains? cset c) - i - (begin (string-set! ans i c) - (+ i 1)))) - 0 s start end) - ans)))) - -(define (string-filter criteria s . maybe-start+end) - (let-string-start+end (start end) string-filter s maybe-start+end - (if (procedure? criteria) - (let* ((slen (- end start)) - (temp (make-string slen)) - (ans-len (string-fold (lambda (c i) - (if (criteria c) - (begin (string-set! temp i c) - (+ i 1)) - i)) - 0 s start end))) - (if (= ans-len slen) temp (##sys#substring temp 0 ans-len))) - - (let* ((cset (cond ((char-set? criteria) criteria) - ((char? criteria) (char-set criteria)) - (else (##sys#error 'string-filter "string-delete criteria not predicate, char or char-set" criteria)))) - - (len (string-fold (lambda (c i) (if (char-set-contains? cset c) - (+ i 1) - i)) - 0 s start end)) - (ans (make-string len))) - (string-fold (lambda (c i) (if (char-set-contains? cset c) - (begin (string-set! ans i c) - (+ i 1)) - i)) - 0 s start end) - ans)))) - - -;;; String search -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-index string char/char-set/pred [start end] -;;; string-index-right string char/char-set/pred [start end] -;;; string-skip string char/char-set/pred [start end] -;;; string-skip-right string char/char-set/pred [start end] -;;; string-count char/char-set/pred string [start end] -;;; There's a lot of replicated code here for efficiency. -;;; For example, the char/char-set/pred discrimination has -;;; been lifted above the inner loop of each proc. - -(define (string-index str criteria . maybe-start+end) - (let-string-start+end (start end) string-index str maybe-start+end - (cond ((char? criteria) - (let lp ((i start)) - (and (< i end) - (if (char=? criteria (string-ref str i)) i - (lp (+ i 1)))))) - ((char-set? criteria) - (let lp ((i start)) - (and (< i end) - (if (char-set-contains? criteria (string-ref str i)) i - (lp (+ i 1)))))) - ((procedure? criteria) - (let lp ((i start)) - (and (< i end) - (if (criteria (string-ref str i)) i - (lp (+ i 1)))))) - (else (##sys#error 'string-index "Second param is neither char-set, char, or predicate procedure." - string-index criteria))))) - -(define (string-index-right str criteria . maybe-start+end) - (let-string-start+end (start end) string-index-right str maybe-start+end - (cond ((char? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (char=? criteria (string-ref str i)) i - (lp (- i 1)))))) - ((char-set? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (char-set-contains? criteria (string-ref str i)) i - (lp (- i 1)))))) - ((procedure? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (criteria (string-ref str i)) i - (lp (- i 1)))))) - (else (##sys#error 'string-index-right "Second param is neither char-set, char, or predicate procedure." - string-index-right criteria))))) - -(define (string-skip str criteria . maybe-start+end) - (let-string-start+end (start end) string-skip str maybe-start+end - (cond ((char? criteria) - (let lp ((i start)) - (and (< i end) - (if (char=? criteria (string-ref str i)) - (lp (+ i 1)) - i)))) - ((char-set? criteria) - (let lp ((i start)) - (and (< i end) - (if (char-set-contains? criteria (string-ref str i)) - (lp (+ i 1)) - i)))) - ((procedure? criteria) - (let lp ((i start)) - (and (< i end) - (if (criteria (string-ref str i)) (lp (+ i 1)) - i)))) - (else (##sys#error 'string-skip "Second param is neither char-set, char, or predicate procedure." - string-skip criteria))))) - -(define (string-skip-right str criteria . maybe-start+end) - (let-string-start+end (start end) string-skip-right str maybe-start+end - (cond ((char? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (char=? criteria (string-ref str i)) - (lp (- i 1)) - i)))) - ((char-set? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (char-set-contains? criteria (string-ref str i)) - (lp (- i 1)) - i)))) - ((procedure? criteria) - (let lp ((i (- end 1))) - (and (>= i start) - (if (criteria (string-ref str i)) (lp (- i 1)) - i)))) - (else (##sys#error 'string-skip-right "CRITERIA param is neither char-set or char." - string-skip-right criteria))))) - - -; [felix] Boooh! original code had "s" and "criteria" in the wrong order: - -(define (string-count s criteria . maybe-start+end) - (let-string-start+end (start end) string-count s maybe-start+end - (cond ((char? criteria) - (do ((i start (+ i 1)) - (count 0 (if (char=? criteria (string-ref s i)) - (+ count 1) - count))) - ((>= i end) count))) - - ((char-set? criteria) - (do ((i start (+ i 1)) - (count 0 (if (char-set-contains? criteria (string-ref s i)) - (+ count 1) - count))) - ((>= i end) count))) - - ((procedure? criteria) - (do ((i start (+ i 1)) - (count 0 (if (criteria (string-ref s i)) (+ count 1) count))) - ((>= i end) count))) - - (else (##sys#error 'string-count "CRITERIA param is neither char-set or char." - string-count criteria))))) - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; string-fill! string char [start end] -;;; -;;; string-copy! to tstart from [fstart fend] -;;; Guaranteed to work, even if s1 eq s2. - -(define (string-fill! s char . maybe-start+end) -; (check-arg char? char string-fill!) - (let-string-start+end (start end) string-fill! s maybe-start+end - (do ((i (- end 1) (- i 1))) - ((< i start)) - (string-set! s i char)))) - -(define (string-copy! to tstart from . maybe-fstart+fend) - (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend -; (check-arg integer? tstart string-copy!) - (##sys#check-exact tstart 'string-copy!) - (check-substring-spec string-copy! to tstart (+ tstart (- fend fstart))) - (%string-copy! to tstart from fstart fend))) - -;;; Library-internal routine -(define (%string-copy! to tstart from fstart fend) - (##core#inline "C_substring_copy" from to fstart fend tstart)) - - -;;; Returns starting-position in STRING or #f if not true. -;;; This implementation is slow & simple. It is useful as a "spec" or for -;;; comparison testing with fancier implementations. -;;; See below for fast KMP version. - -(define (string-contains string substring . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-contains string substring maybe-starts+ends - (let* ((len (fx- end2 start2)) - (i-bound (fx- end1 len))) - (let lp ((i start1)) - (and (fx<= i i-bound) - (if (string= string substring i (fx+ i len) start2 end2) - i - (lp (fx+ i 1)))))))) - -(define (string-contains-ci string substring . maybe-starts+ends) - (let-string-start+end2 (start1 end1 start2 end2) - string-contains string substring maybe-starts+ends - (let* ((len (fx- end2 start2)) - (i-bound (fx- end1 len))) - (let lp ((i start1)) - (and (fx<= i i-bound) - (if (string-ci= string substring i (fx+ i len) start2 end2) - i - (lp (fx+ i 1)))))))) - - -;;; Searching for an occurrence of a substring -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; this is completely broken and was probably never tested. Thanks, Olin! (flw) - - -;;; Knuth-Morris-Pratt string searching -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; See -;;; "Fast pattern matching in strings" -;;; SIAM J. Computing 6(2):323-350 1977 -;;; D. E. Knuth, J. H. Morris and V. R. Pratt -;;; also described in -;;; "Pattern matching in strings" -;;; Alfred V. Aho -;;; Formal Language Theory - Perspectives and Open Problems -;;; Ronald V. Brook (editor) -;;; This algorithm is O(m + n) where m and n are the -;;; lengths of the pattern and string respectively - - -;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Compute the KMP restart vector RV for string PATTERN. If -;;; we have matched chars 0..i-1 of PATTERN against a search string S, and -;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to -;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to -;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k]. -;;; -;;; In other words, if you have matched the first i chars of PATTERN, but -;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest -;;; prefix of PATTERN is that you have matched. -;;; -;;; - C= (default CHAR=?) is used to compare characters for equality. -;;; Pass in CHAR-CI=? for case-folded string search. -;;; -;;; - START & END restrict the pattern to the indicated substring; the -;;; returned vector will be of length END - START. The numbers stored -;;; in the vector will be values in the range [0,END-START) -- that is, -;;; they are valid indices into the restart vector; you have to add START -;;; to them to use them as indices into PATTERN. -;;; -;;; I've split this out as a separate function in case other constant-string -;;; searchers might want to use it. -;;; -;;; E.g.: -;;; a b d a b x -;;; #(-1 0 0 -1 1 2) - -(define (make-kmp-restart-vector pattern . maybe-c=+start+end) - (let-optionals* maybe-c=+start+end - ((c= char=?) rest) ; (procedure? c=)) - (receive (rest2 start end) (string-parse-start+end make-kmp-restart-vector pattern rest) - (let* ((rvlen (- end start)) - (rv (make-vector rvlen -1))) - (if (> rvlen 0) - (let ((rvlen-1 (- rvlen 1)) - (c0 (string-ref pattern start))) - - ;; Here's the main loop. We have set rv[0] ... rv[i]. - ;; K = I + START -- it is the corresponding index into PATTERN. - (let lp1 ((i 0) (j -1) (k start)) - (if (< i rvlen-1) - - ;; lp2 invariant: - ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1] - ;; or j = -1. - (let lp2 ((j j)) - - (cond ((= j -1) - (let ((i1 (+ i 1)) - (ck+1 (string-ref pattern (add1 k)))) - (vector-set! rv i1 (if (c= ck+1 c0) -1 0)) - (lp1 i1 0 (+ k 1)))) - - ;; pat[(k-j) .. k] matches pat[start..start+j]. - ((c= (string-ref pattern k) - (string-ref pattern (+ j start))) - (let* ((i1 (+ 1 i)) - (j1 (+ 1 j))) - (vector-set! rv i1 j1) - (lp1 i1 j1 (+ k 1)))) - - (else (lp2 (vector-ref rv j))))))))) - rv)))) - - -;;; We've matched I chars from PAT. C is the next char from the search string. -;;; Return the new I after handling C. -;;; -;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START -;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched -;;; are -;;; PAT[PAT-START .. PAT-START + I]. -;;; -;;; It's *not* an oversight that there is no friendly error checking or -;;; defaulting of arguments. This is a low-level, inner-loop procedure -;;; that we want integrated/inlined into the point of call. - -(define (kmp-step pat rv c i c= p-start) - (let lp ((i i)) - (if (c= c (string-ref pat (+ i p-start))) ; Match => - (+ i 1) ; Done. - (let ((i (vector-ref rv i))) ; Back up in PAT. - (if (= i -1) 0 ; Can't back up further. - (lp i)))))) ; Keep trying for match. - -;;; Zip through S[start,end), looking for a match of PAT. Assume we've -;;; already matched the first I chars of PAT when we commence at S[start]. -;;; - <0: If we find a match *ending* at index J, return -J. -;;; - >=0: If we get to the end of the S[start,end) span without finding -;;; a complete match, return the number of chars from PAT we'd matched -;;; when we ran off the end. -;;; -;;; This is useful for searching *across* buffers -- that is, when your -;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop -;;; for speed. - -(define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end) -; (check-arg vector? rv string-kmp-partial-search) - (let-optionals* c=+p-start+s-start+s-end - ((c= char=?) ; (procedure? c=)) - (p-start 0) rest) ; (and (integer? p-start) (exact? p-start) (<= 0 p-start))) - (receive (rest2 s-start s-end) (string-parse-start+end string-kmp-partial-search s rest) - ;; Enough prelude. Here's the actual code. - (let ((patlen (vector-length rv))) - (let lp ((si s-start) ; An index into S. - (vi i)) ; An index into RV. - (cond ((= vi patlen) (- si)) ; Win. - ((= si s-end) vi) ; Ran off the end. - (else ; Match s[si] & loop. - (let ((c (string-ref s si))) - (lp (+ si 1) - (let lp2 ((vi vi)) ; This is just KMP-STEP. - (if (c= c (string-ref pat (+ vi p-start))) - (+ vi 1) - (let ((vi (vector-ref rv vi))) - (if (= vi -1) 0 - (lp2 vi))))))))))))) ) - - -;;; Misc -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (string-null? s) -;;; (string-reverse s [start end]) -;;; (string-reverse! s [start end]) -;;; (reverse-list->string clist) -;;; (string->list s [start end]) - -(define (string-null? s) (##core#inline "C_i_string_null_p" s)) - -(define (string-reverse s . maybe-start+end) - (let-string-start+end (start end) string-reverse s maybe-start+end - (let* ((len (- end start)) - (ans (make-string len))) - (do ((i start (+ i 1)) - (j (- len 1) (- j 1))) - ((< j 0)) - (string-set! ans j (string-ref s i))) - ans))) - -(define (string-reverse! s . maybe-start+end) - (let-string-start+end (start end) string-reverse! s maybe-start+end - (do ((i (- end 1) (- i 1)) - (j start (+ j 1))) - ((<= i j)) - (let ((ci (string-ref s i))) - (string-set! s i (string-ref s j)) - (string-set! s j ci))))) - - -#| this is already available in library.scm: - -(define (reverse-list->string clist) - (let* ((len (length clist)) - (s (make-string len))) - (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) - ((not (pair? clist))) - (string-set! s i (car clist))) - s)) -|# - - -;(define (string->list s . maybe-start+end) -; (apply string-fold-right cons '() s maybe-start+end)) - -(define (string->list s . maybe-start+end) - (let-string-start+end (start end) string->list s maybe-start+end - (do ((i (- end 1) (- i 1)) - (ans '() (cons (string-ref s i) ans))) - ((< i start) ans)))) - -;;; Defined by R5RS, so commented out here. -;(define (list->string lis) (string-unfold null? car cdr lis)) - - -;;; string-concatenate string-list -> string -;;; string-concatenate/shared string-list -> string -;;; string-append/shared s ... -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; STRING-APPEND/SHARED has license to return a string that shares storage -;;; with any of its arguments. In particular, if there is only one non-empty -;;; string amongst its parameters, it is permitted to return that string as -;;; its result. STRING-APPEND, by contrast, always allocates new storage. -;;; -;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of -;;; strings, which they concatenate into a result string. STRING-CONCATENATE -;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may -;;; not) return a result that shares storage with any of its arguments. In -;;; particular, if it is applied to a singleton list, it is permitted to -;;; return the car of that list as its value. - -(define (string-append/shared . strings) (string-concatenate/shared strings)) - -(define (string-concatenate/shared strings) - (let lp ((strings strings) (nchars 0) (first #f)) - (cond ((pair? strings) ; Scan the args, add up total - (let* ((string (car strings)) ; length, remember 1st - (tail (cdr strings)) ; non-empty string. - (slen (string-length string))) - (if (zero? slen) - (lp tail nchars first) - (lp tail (+ nchars slen) (or first strings))))) - - ((zero? nchars) "") - - ;; Just one non-empty string! Return it. - ((= nchars (string-length (car first))) (car first)) - - (else (let ((ans (make-string nchars))) - (let lp ((strings first) (i 0)) - (if (pair? strings) - (let* ((s (car strings)) - (slen (string-length s))) - (%string-copy! ans i s 0 slen) - (lp (cdr strings) (+ i slen))))) - ans))))) - - -; Alas, Scheme 48's APPLY blows up if you have many, many arguments. -;(define (string-concatenate strings) (apply string-append strings)) - -;;; Here it is written out. I avoid using REDUCE to add up string lengths -;;; to avoid non-R5RS dependencies. -(define (string-concatenate strings) - (let* ((total (do ((strings strings (cdr strings)) - (i 0 (+ i (string-length (car strings))))) - ((not (pair? strings)) i))) - (ans (make-string total))) - (let lp ((i 0) (strings strings)) - (if (pair? strings) - (let* ((s (car strings)) - (slen (string-length s))) - (%string-copy! ans i s 0 slen) - (lp (+ i slen) (cdr strings))))) - ans)) - - -;;; Defined by R5RS, so commented out here. -;(define (string-append . strings) (string-concatenate strings)) - -;;; string-concatenate-reverse string-list [final-string end] -> string -;;; string-concatenate-reverse/shared string-list [final-string end] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Return -;;; (string-concatenate -;;; (reverse -;;; (cons (substring final-string 0 end) string-list))) - -(define (string-concatenate-reverse string-list . maybe-final+end) - (let-optionals* maybe-final+end ((final ""); (string? final)) - (end (string-length final)) ) -; (and (integer? end) -; (exact? end) -; (<= 0 end (string-length final))))) - (##sys#check-exact end 'string-concatenate-reverse) - (let ((len (let lp ((sum 0) (lis string-list)) - (if (pair? lis) - (lp (+ sum (string-length (car lis))) (cdr lis)) - sum)))) - - (%finish-string-concatenate-reverse len string-list final end)))) - -(define (string-concatenate-reverse/shared string-list . maybe-final+end) - (let-optionals* maybe-final+end ((final ""); (string? final)) - (end (string-length final))) -; (and (integer? end) -; (exact? end) -; (<= 0 end (string-length final))))) - (##sys#check-exact end 'string-concatenate-reverse/shared) - ;; Add up the lengths of all the strings in STRING-LIST; also get a - ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length - ;; string starts. - (let lp ((len 0) (nzlist #f) (lis string-list)) - (if (pair? lis) - (let ((slen (string-length (car lis)))) - (lp (+ len slen) - (if (or nzlist (zero? slen)) nzlist lis) - (cdr lis))) - - (cond ((zero? len) (substring/shared final 0 end)) - - ;; LEN > 0, so NZLIST is non-empty. - - ((and (zero? end) (= len (string-length (car nzlist)))) - (car nzlist)) - - (else (%finish-string-concatenate-reverse len nzlist final end))))))) - -(define (%finish-string-concatenate-reverse len string-list final end) - (let ((ans (make-string (+ end len)))) - (%string-copy! ans len final 0 end) - (let lp ((i len) (lis string-list)) - (if (pair? lis) - (let* ((s (car lis)) - (lis (cdr lis)) - (slen (string-length s)) - (i (- i slen))) - (%string-copy! ans i s 0 slen) - (lp i lis)))) - ans)) - - - - -;;; string-replace s1 s2 start1 end1 [start2 end2] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Replace S1[START1,END1) with S2[START2,END2). - -(define (string-replace s1 s2 start1 end1 . maybe-start+end) - (check-substring-spec string-replace s1 start1 end1) - (let-string-start+end (start2 end2) string-replace s2 maybe-start+end - (let* ((slen1 (string-length s1)) - (sublen2 (- end2 start2)) - (alen (+ (- slen1 (- end1 start1)) sublen2)) - (ans (make-string alen))) - (%string-copy! ans 0 s1 0 start1) - (%string-copy! ans start1 s2 start2 end2) - (%string-copy! ans (+ start1 sublen2) s1 end1 slen1) - ans))) - - -;;; string-tokenize s [token-set start end] -> list -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Break S up into a list of token strings, where a token is a maximal -;;; non-empty contiguous sequence of chars belonging to TOKEN-SET. -;;; (string-tokenize "hello, world") => ("hello," "world") - -(define (string-tokenize s . token-chars+start+end) - (let-optionals* token-chars+start+end - ((token-chars char-set:graphic) rest) ; (char-set? token-chars)) rest) - (let-string-start+end (start end) string-tokenize s rest - (let lp ((i end) (ans '())) - (cond ((and (< start i) (string-index-right s token-chars start i)) => - (lambda (tend-1) - (let ((tend (+ 1 tend-1))) - (cond ((string-skip-right s token-chars start tend-1) => - (lambda (tstart-1) - (lp tstart-1 - (cons (##sys#substring s (+ 1 tstart-1) tend) - ans)))) - (else (cons (##sys#substring s start tend) ans)))))) - (else ans)))))) - - -;;; xsubstring s from [to start end] -> string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; S is a string; START and END are optional arguments that demarcate -;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole -;;; string). Replicate this substring up and down index space, in both the -;; positive and negative directions. For example, if S = "abcdefg", START=3, -;;; and END=6, then we have the conceptual bidirectionally-infinite string -;;; ... d e f d e f d e f d e f d e f d e f d e f ... -;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... -;;; XSUBSTRING returns the substring of this string beginning at index FROM, -;;; and ending at TO (which defaults to FROM+(END-START)). -;;; -;;; You can use XSUBSTRING in many ways: -;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" -;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" -;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" -;;; -;;; Note that -;;; - The FROM/TO indices give a half-open range -- the characters from -;;; index FROM up to, but not including index TO. -;;; - The FROM/TO indices are not in terms of the index space for string S. -;;; They are in terms of the replicated index space of the substring -;;; defined by S, START, and END. -;;; -;;; It is an error if START=END -- although this is allowed by special -;;; dispensation when FROM=TO. - -(define (xsubstring s from . maybe-to+start+end) -; (check-arg (lambda (val) (and (integer? val) (exact? val))) -; from xsubstring) - (##sys#check-exact from 'xsubstring) - (receive (to start end) - (if (pair? maybe-to+start+end) - (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end) - (let ((to (car maybe-to+start+end))) -; (check-arg (lambda (val) (and (integer? val) -; (exact? val) -; (<= from val))) -; to xsubstring) - (##sys#check-exact to 'xsubstring) - (values to start end))) -; (let ((slen (string-length (check-arg string? s xsubstring)))) - (let ((slen (string-length s))) - (values (+ from slen) 0 slen))) - (let ((slen (- end start)) - (anslen (- to from))) - (cond ((zero? anslen) "") - ((zero? slen) (##sys#error 'xsubstring "Cannot replicate empty (sub)string" - xsubstring s from to start end)) - - ((= 1 slen) ; Fast path for 1-char replication. - (make-string anslen (string-ref s start))) - - ;; CHICKEN compiles this file with (declare (fixnum)), so - ;; flonum operations are not reliable. Since this clause - ;; just provides a shorter path to avoid calling - ;; %multispan-repcopy!, we comment it out and leave the - ;; fixnum declaration. - ;; - ;; Selected text falls entirely within one span. - ;; ((= (floor (/ from slen)) (floor (/ to slen))) - ;; (##sys#substring s (+ start (modulo from slen)) - ;; (+ start (modulo to slen)))) - - ;; Selected text requires multiple spans. - (else (let ((ans (make-string anslen))) - (%multispan-repcopy! ans 0 s from to start end) - ans)))))) - - -;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Exactly the same as xsubstring, but the extracted text is written -;;; into the string TARGET starting at index TSTART. -;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy -;;; a string on top of itself. - -(define ##srfi13#string-fill! string-fill!) ; or we use std-binding. - -(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) -; (check-arg (lambda (val) (and (integer? val) (exact? val))) -; sfrom string-xcopy!) - (##sys#check-exact sfrom 'string-xcopy!) - (receive (sto start end) - (if (pair? maybe-sto+start+end) - (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) - (let ((sto (car maybe-sto+start+end))) -; (check-arg (lambda (val) (and (integer? val) (exact? val))) -; sto string-xcopy!) - (##sys#check-exact sto 'string-xcopy!) - (values sto start end))) - (let ((slen (string-length s))) - (values (+ sfrom slen) 0 slen))) - - (let* ((tocopy (- sto sfrom)) - (tend (+ tstart tocopy)) - (slen (- end start))) - (check-substring-spec string-xcopy! target tstart tend) - (cond ((zero? tocopy)) - ((zero? slen) (##sys#error 'string-xcopy! "Cannot replicate empty (sub)string" - string-xcopy! - target tstart s sfrom sto start end)) - - ((= 1 slen) ; Fast path for 1-char replication. - (##srfi13#string-fill! target (string-ref s start) tstart tend)) - - ;; CHICKEN compiles this file with (declare (fixnum)), so - ;; flonum operations are not reliable. Since this clause - ;; just provides a shorter path to avoid calling - ;; %multispan-repcopy!, we comment it out and leave the - ;; fixnum declaration. - ;; - ;; Selected text falls entirely within one span. - ;; ((= (floor (/ sfrom slen)) (floor (/ sto slen))) - ;; (%string-copy! target tstart s - ;; (+ start (modulo sfrom slen)) - ;; (+ start (modulo sto slen)))) - - ;; Multi-span copy. - (else (%multispan-repcopy! target tstart s sfrom sto start end)))))) - -;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! -;;; Internal -- not exported, no careful arg checking. -(define (%multispan-repcopy! target tstart s sfrom sto start end) - (let* ((slen (- end start)) - (i0 (+ start (modulo sfrom slen))) - (total-chars (- sto sfrom))) - - ;; Copy the partial span @ the beginning - (%string-copy! target tstart s i0 end) - - (let* ((ncopied (- end i0)) ; We've copied this many. - (nleft (- total-chars ncopied)) ; # chars left to copy. - (nspans (quotient nleft slen))) ; # whole spans to copy - - ;; Copy the whole spans in the middle. - (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. - (nspans nspans (- nspans 1))) ; # spans to copy - ((zero? nspans) - ;; Copy the partial-span @ the end & we're done. - (%string-copy! target i s start (+ start (- total-chars (- i tstart))))) - - (%string-copy! target i s start end))))); Copy a whole span. - - - -;;; (string-join string-list [delimiter grammar]) => string -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Paste strings together using the delimiter string. -;;; -;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" -;;; -;;; DELIMITER defaults to a single space " " -;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix} -;;; and defaults to 'infix. -;;; -;;; I could rewrite this more efficiently -- precompute the length of the -;;; answer string, then allocate & fill it in iteratively. Using -;;; STRING-CONCATENATE is less efficient. - -(define (string-join strings . delim+grammar) - (let-optionals* delim+grammar ((delim " ") ; (string? delim)) - (grammar 'infix)) - (let ((buildit (lambda (lis final) - (let recur ((lis lis)) - (if (pair? lis) - (cons delim (cons (car lis) (recur (cdr lis)))) - final))))) - - (cond ((pair? strings) - (string-concatenate - (case grammar - - ((infix strict-infix) - (cons (car strings) (buildit (cdr strings) '()))) - - ((prefix) (buildit strings '())) - - ((suffix) - (cons (car strings) (buildit (cdr strings) (list delim)))) - - (else (##sys#error 'string-join "Illegal join grammar" - grammar string-join))))) - - ((not (null? strings)) - (##sys#error 'string-join "STRINGS parameter not list." strings string-join)) - - ;; STRINGS is () - - ((eq? grammar 'strict-infix) - (##sys#error 'string-join "Empty list cannot be joined with STRICT-INFIX grammar." - string-join)) - - (else ""))))) ; Special-cased for infix grammar. - - -;;; Porting & performance-tuning notes -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; See the section at the beginning of this file on external dependencies. -;;; -;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro. -;;; There are many, many optional arguments in this library; the complexity -;;; of parsing, defaulting & type-testing these parameters is handled with the -;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can -;;; rewrite the uses, port the hairy macro definition (which is implemented -;;; using a Clinger-Rees low-level explicit-renaming macro system), or port -;;; the simple, high-level definition, which is less efficient. -;;; -;;; There is a fair amount of argument checking. This is, strictly speaking, -;;; unnecessary -- the actual body of the procedures will blow up if, say, a -;;; START/END index is improper. However, the error message will not be as -;;; good as if the error were caught at the "higher level." Also, a very, very -;;; smart Scheme compiler may be able to exploit having the type checks done -;;; early, so that the actual body of the procedures can assume proper values. -;;; This isn't likely; this kind of compiler technology isn't common any -;;; longer. -;;; -;;; The overhead of optional-argument parsing is irritating. The optional -;;; arguments must be consed into a rest list on entry, and then parsed out. -;;; Function call should be a matter of a few register moves and a jump; it -;;; should not involve heap allocation! Your Scheme system may have a superior -;;; non-R5RS optional-argument system that can eliminate this overhead. If so, -;;; then this is a prime candidate for optimising these procedures, -;;; *especially* the many optional START/END index parameters. -;;; -;;; Note that optional arguments are also a barrier to procedure integration. -;;; If your Scheme system permits you to specify alternate entry points -;;; for a call when the number of optional arguments is known in a manner -;;; that enables inlining/integration, this can provide performance -;;; improvements. -;;; -;;; There is enough *explicit* error checking that *all* string-index -;;; operations should *never* produce a bounds error. Period. Feel like -;;; living dangerously? *Big* performance win to be had by replacing -;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops. -;;; Similarly, fixnum-specific operators can speed up the arithmetic done on -;;; the index values in the inner loops. The only arguments that are not -;;; completely error checked are -;;; - string lists (complete checking requires time proportional to the -;;; length of the list) -;;; - procedure arguments, such as char->char maps & predicates. -;;; There is no way to check the range & domain of procedures in Scheme. -;;; Procedures that take these parameters cannot fully check their -;;; arguments. But all other types to all other procedures are fully -;;; checked. -;;; -;;; This does open up the alternate possibility of simply *removing* these -;;; checks, and letting the safe primitives raise the errors. On a dumb -;;; Scheme system, this would provide speed (by eliminating the redundant -;;; error checks) at the cost of error-message clarity. -;;; -;;; See the comments preceding the hash function code for notes on tuning -;;; the default bound so that the code never overflows your implementation's -;;; fixnum size into bignum calculation. -;;; -;;; In an interpreted Scheme, some of these procedures, or the internal -;;; routines with % prefixes, are excellent candidates for being rewritten -;;; in C. Consider STRING-HASH, %STRING-COMPARE, the -;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX & -;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED, -;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!. -;;; -;;; It would also be nice to have the ability to mark some of these -;;; routines as candidates for inlining/integration. -;;; -;;; All the %-prefixed routines in this source code are written -;;; to be called internally to this library. They do *not* perform -;;; friendly error checks on the inputs; they assume everything is -;;; proper. They also do not take optional arguments. These two properties -;;; save calling overhead and enable procedure integration -- but they -;;; are not appropriate for exported routines. - - -;;; Copyright details -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; The prefix/suffix and comparison routines in this code had (extremely -;;; distant) origins in MIT Scheme's string lib, and was substantially -;;; reworked by Olin Shivers (address@hidden) 9/98. As such, it is -;;; covered by MIT Scheme's open source copyright. See below for details. -;;; -;;; The KMP string-search code was influenced by implementations written -;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this -;;; version was written from scratch by myself. - -;;; I guessed that much. (flw) - -;;; -;;; The remainder of this code was written from scratch by myself for scsh. -;;; The scsh copyright is a BSD-style open source copyright. See below for -;;; details. -;;; -Olin Shivers - -;;; MIT Scheme copyright terms -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; This material was developed by the Scheme project at the Massachusetts -;;; Institute of Technology, Department of Electrical Engineering and -;;; Computer Science. Permission to copy and modify this software, to -;;; redistribute either the original software or a modified version, and -;;; to use this software for any purpose is granted, subject to the -;;; following restrictions and understandings. -;;; -;;; 1. Any copy made of this software must include this copyright notice -;;; in full. -;;; -;;; 2. Users of this software agree to make their best efforts (a) to -;;; return to the MIT Scheme project any improvements or extensions that -;;; they make, so that these may be included in future releases; and (b) -;;; to inform MIT of noteworthy uses of this software. -;;; -;;; 3. All materials developed as a consequence of the use of this -;;; software shall duly acknowledge such use, in accordance with the usual -;;; standards of acknowledging credit in academic research. -;;; -;;; 4. MIT has made no warrantee or representation that the operation of -;;; this software will be error-free, and MIT is under no obligation to -;;; provide any services, by way of maintenance, update, or otherwise. -;;; -;;; 5. In conjunction with products arising from the use of this material, -;;; there shall be no use of the name of the Massachusetts Institute of -;;; Technology nor of any adaptation thereof in any advertising, -;;; promotional, or sales literature without prior written consent from -;;; MIT in each case. - -;;; Scsh copyright terms -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; All rights reserved. -;;; -;;; Redistribution and use in source and binary forms, with or without -;;; modification, are permitted provided that the following conditions -;;; are met: -;;; 1. Redistributions of source code must retain the above copyright -;;; notice, this list of conditions and the following disclaimer. -;;; 2. Redistributions in binary form must reproduce the above copyright -;;; notice, this list of conditions and the following disclaimer in the -;;; documentation and/or other materials provided with the distribution. -;;; 3. The name of the authors may not be used to endorse or promote products -;;; derived from this software without specific prior written permission. -;;; -;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR -;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, -;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tests/reexport-m1.scm b/tests/reexport-m1.scm index 96ac9bc..0253877 100644 --- a/tests/reexport-m1.scm +++ b/tests/reexport-m1.scm @@ -2,5 +2,5 @@ (module reexport-m1 () (import scheme chicken) - (require-library srfi-1 srfi-13) - (reexport (only srfi-1 cons*) srfi-13)) + (require-library srfi-1 srfi-69) + (reexport (only srfi-1 cons*) srfi-69)) diff --git a/tests/reverser/tags/1.0/reverser.scm b/tests/reverser/tags/1.0/reverser.scm index 4159bb8..130b7df 100644 --- a/tests/reverser/tags/1.0/reverser.scm +++ b/tests/reverser/tags/1.0/reverser.scm @@ -1,7 +1,16 @@ (module reverser * (import scheme chicken) - (use srfi-13) (define rev-version 1.0) + + (define (string-reverse s) + (let* ((len (string-length s)) + (ans (make-string len))) + (do ((i 0 (+ i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (string-ref s i))) + ans)) + (define (rev x) (cond ((string? x) (string-reverse x)) ((symbol? x) (string->symbol (rev (symbol->string x)))) diff --git a/tests/reverser/tags/1.1/reverser.scm b/tests/reverser/tags/1.1/reverser.scm index 9815b7d..ceb1932 100644 --- a/tests/reverser/tags/1.1/reverser.scm +++ b/tests/reverser/tags/1.1/reverser.scm @@ -1,7 +1,16 @@ (module reverser * (import scheme chicken) - (use srfi-13) (define rev-version 1.1) + + (define (string-reverse s) + (let* ((len (string-length s)) + (ans (make-string len))) + (do ((i 0 (+ i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (string-ref s i))) + ans)) + (define (rev x) (cond ((string? x) (string-reverse x)) ((symbol? x) (string->symbol (rev (symbol->string x)))) diff --git a/tests/runtests.bat b/tests/runtests.bat index b037cb7..c7f0f2b 100644 --- a/tests/runtests.bat +++ b/tests/runtests.bat @@ -370,10 +370,6 @@ echo ======================================== srfi-4 tests ... %interpret% -s srfi-4-tests.scm if errorlevel 1 exit /b 1 -echo ======================================== srfi-13 tests ... -%interpret% -s srfi-13-tests.scm -if errorlevel 1 exit /b 1 - echo ======================================== srfi-14 tests ... %compile% srfi-14-tests.scm if errorlevel 1 exit /b 1 diff --git a/tests/runtests.sh b/tests/runtests.sh index 5b6f83c..1237f82 100755 --- a/tests/runtests.sh +++ b/tests/runtests.sh @@ -39,7 +39,7 @@ for x in setup-api.so setup-api.import.so setup-download.so \ setup-download.import.so chicken.import.so lolevel.import.so \ srfi-1.import.so srfi-4.import.so data-structures.import.so \ ports.import.so files.import.so posix.import.so \ - srfi-13.import.so srfi-69.import.so extras.import.so \ + srfi-69.import.so extras.import.so \ irregex.import.so srfi-14.import.so tcp.import.so \ foreign.import.so srfi-18.import.so \ utils.import.so csi.import.so irregex.import.so types.db; do @@ -309,9 +309,6 @@ $compile numbers-string-conversion-tests.scm echo "======================================== srfi-4 tests ..." $interpret -s srfi-4-tests.scm -echo "======================================== srfi-13 tests ..." -$interpret -s srfi-13-tests.scm - echo "======================================== srfi-14 tests ..." $compile srfi-14-tests.scm ./a.out diff --git a/tests/srfi-13-tests.scm b/tests/srfi-13-tests.scm deleted file mode 100644 index bc32885..0000000 --- a/tests/srfi-13-tests.scm +++ /dev/null @@ -1,776 +0,0 @@ -(define (fill text) - (let* ((len (string-length text)) - (max-text-len 60) - (last-col 70) - (text (if (> len max-text-len) - (begin - (set! len max-text-len) - (substring text 0 max-text-len)) - text))) - (string-append text (make-string (- last-col len) #\.)))) - -(define-syntax test - (syntax-rules () - ((_ comment expect form) - (begin - (display (fill (or comment ""))) - (cond ((equal? expect form) - (display "[ok]")) - (else - (display "[fail]") - (newline) - (exit 13))) - (newline) - (flush-output))))) - -(define-syntax test-assert - (syntax-rules () - ((_ comment form) - (test comment #t (and form #t))))) - -(use srfi-13) - -; Tests for SRFI-13 as implemented by the Gauche scheme system. -;; -;; Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. -;; -;; Redistribution and use in source and binary forms, with or without -;; modification, are permitted provided that the following conditions -;; are met: -;; -;; 1. Redistributions of source code must retain the above copyright -;; notice, this list of conditions and the following disclaimer. -;; -;; 2. Redistributions in binary form must reproduce the above copyright -;; notice, this list of conditions and the following disclaimer in the -;; documentation and/or other materials provided with the distribution. -;; -;; 3. Neither the name of the authors nor the names of its contributors -;; may be used to endorse or promote products derived from this -;; software without specific prior written permission. -;; -;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;; -;; See http://sourceforge.net/projects/gauche/ - -(test "string-null?" #f (string-null? "abc")) -(test "string-null?" #t (string-null? "")) -(test "string-every" #t (string-every #\a "")) -(test "string-every" #t (string-every #\a "aaaa")) -(test "string-every" #f (string-every #\a "aaba")) -(test "string-every" #t (string-every char-set:lower-case "aaba")) -(test "string-every" #f (string-every char-set:lower-case "aAba")) -(test "string-every" #t (string-every char-set:lower-case "")) -(test "string-every" #t (string-every (lambda (x) (char-ci=? x #\a)) "aAaA")) -(test "string-every" #f (string-every (lambda (x) (char-ci=? x #\a)) "aAbA")) -(test "string-every" (char->integer #\A) - (string-every (lambda (x) (char->integer x)) "aAbA")) -(test "string-every" #t - (string-every (lambda (x) (error "hoge")) "")) -(test "string-any" #t (string-any #\a "aaaa")) -(test "string-any" #f (string-any #\a "Abcd")) -(test "string-any" #f (string-any #\a "")) -(test "string-any" #t (string-any char-set:lower-case "ABcD")) -(test "string-any" #f (string-any char-set:lower-case "ABCD")) -(test "string-any" #f (string-any char-set:lower-case "")) -(test "string-any" #t (string-any (lambda (x) (char-ci=? x #\a)) "CAaA")) -(test "string-any" #f (string-any (lambda (x) (char-ci=? x #\a)) "ZBRC")) -(test "string-any" #f (string-any (lambda (x) (char-ci=? x #\a)) "")) -(test "string-any" (char->integer #\a) - (string-any (lambda (x) (char->integer x)) "aAbA")) -(test "string-tabulate" "0123456789" - (string-tabulate (lambda (code) - (integer->char (+ code (char->integer #\0)))) - 10)) -(test "string-tabulate" "" - (string-tabulate (lambda (code) - (integer->char (+ code (char->integer #\0)))) - 0)) -(test "reverse-list->string" "cBa" - (reverse-list->string '(#\a #\B #\c))) -(test "reverse-list->string" "" - (reverse-list->string '())) -; string-join : Gauche builtin. -(test "substring/shared" "cde" (substring/shared "abcde" 2)) -(test "substring/shared" "cd" (substring/shared "abcde" 2 4)) -(test "string-copy!" "abCDEfg" - (let ((x (string-copy "abcdefg"))) - (string-copy! x 2 "CDE") - x)) -(test "string-copy!" "abCDEfg" - (let ((x (string-copy "abcdefg"))) - (string-copy! x 2 "ZABCDE" 3) - x)) -(test "string-copy!" "abCDEfg" - (let ((x (string-copy "abcdefg"))) - (string-copy! x 2 "ZABCDEFG" 3 6) - x)) - -;; From Guile. Thanks to Mark H Weaver. -(test "string-copy!: overlapping src and dest, moving right" - "aabce" - (let ((str (string-copy "abcde"))) - (string-copy! str 1 str 0 3) str)) - -(test "string-copy!: overlapping src and dest, moving left" - "bcdde" - (let ((str (string-copy "abcde"))) - (string-copy! str 0 str 1 4) str)) - -(test "string-take" "Pete S" (string-take "Pete Szilagyi" 6)) -(test "string-take" "" (string-take "Pete Szilagyi" 0)) -(test "string-take" "Pete Szilagyi" (string-take "Pete Szilagyi" 13)) -(test "string-drop" "zilagyi" (string-drop "Pete Szilagyi" 6)) -(test "string-drop" "Pete Szilagyi" (string-drop "Pete Szilagyi" 0)) -(test "string-drop" "" (string-drop "Pete Szilagyi" 13)) - -(test "string-take-right" "rules" (string-take-right "Beta rules" 5)) -(test "string-take-right" "" (string-take-right "Beta rules" 0)) -(test "string-take-right" "Beta rules" (string-take-right "Beta rules" 10)) -(test "string-drop-right" "Beta " (string-drop-right "Beta rules" 5)) -(test "string-drop-right" "Beta rules" (string-drop-right "Beta rules" 0)) -(test "string-drop-right" "" (string-drop-right "Beta rules" 10)) - -(test "string-pad" " 325" (string-pad "325" 5)) -(test "string-pad" "71325" (string-pad "71325" 5)) -(test "string-pad" "71325" (string-pad "8871325" 5)) -(test "string-pad" "~~325" (string-pad "325" 5 #\~)) -(test "string-pad" "~~~25" (string-pad "325" 5 #\~ 1)) -(test "string-pad" "~~~~2" (string-pad "325" 5 #\~ 1 2)) -(test "string-pad-right" "325 " (string-pad-right "325" 5)) -(test "string-pad-right" "71325" (string-pad-right "71325" 5)) -(test "string-pad-right" "88713" (string-pad-right "8871325" 5)) -(test "string-pad-right" "325~~" (string-pad-right "325" 5 #\~)) -(test "string-pad-right" "25~~~" (string-pad-right "325" 5 #\~ 1)) -(test "string-pad-right" "2~~~~" (string-pad-right "325" 5 #\~ 1 2)) - -(test "string-trim" "a b c d \n" - (string-trim " \t a b c d \n")) -(test "string-trim" "\t a b c d \n" - (string-trim " \t a b c d \n" #\space)) -(test "string-trim" "a b c d \n" - (string-trim "4358948a b c d \n" char-set:digit)) - -(test "string-trim-right" " \t a b c d" - (string-trim-right " \t a b c d \n")) -(test "string-trim-right" " \t a b c d " - (string-trim-right " \t a b c d \n" (char-set #\newline))) -(test "string-trim-right" "349853a b c d" - (string-trim-right "349853a b c d03490" char-set:digit)) - -(test "string-trim-both" "a b c d" - (string-trim-both " \t a b c d \n")) -(test "string-trim-both" " \t a b c d " - (string-trim-both " \t a b c d \n" (char-set #\newline))) -(test "string-trim-both" "a b c d" - (string-trim-both "349853a b c d03490" char-set:digit)) - -;; string-fill - in string.scm - -(test "string-compare" 5 - (string-compare "The cat in the hat" "abcdefgh" - values values values - 4 6 2 4)) -(test "string-compare-ci" 5 - (string-compare-ci "The cat in the hat" "ABCDEFGH" - values values values - 4 6 2 4)) - -;; TODO: bunch of string= families - -(test "string-prefix-length" 5 - (string-prefix-length "cancaNCAM" "cancancan")) -(test "string-prefix-length-ci" 8 - (string-prefix-length-ci "cancaNCAM" "cancancan")) -(test "string-suffix-length" 2 - (string-suffix-length "CanCan" "cankancan")) -(test "string-suffix-length-ci" 5 - (string-suffix-length-ci "CanCan" "cankancan")) - -(test "string-prefix?" #t (string-prefix? "abcd" "abcdefg")) -(test "string-prefix?" #f (string-prefix? "abcf" "abcdefg")) -(test "string-prefix-ci?" #t (string-prefix-ci? "abcd" "aBCDEfg")) -(test "string-prefix-ci?" #f (string-prefix-ci? "abcf" "aBCDEfg")) -(test "string-suffix?" #t (string-suffix? "defg" "abcdefg")) -(test "string-suffix?" #f (string-suffix? "aefg" "abcdefg")) -(test "string-suffix-ci?" #t (string-suffix-ci? "defg" "aBCDEfg")) -(test "string-suffix-ci?" #f (string-suffix-ci? "aefg" "aBCDEfg")) - -(test "string-index #1" 4 - (string-index "abcd:efgh:ijkl" #\:)) -(test "string-index #2" 4 - (string-index "abcd:efgh;ijkl" (char-set-complement char-set:letter))) -(test "string-index #3" #f - (string-index "abcd:efgh;ijkl" char-set:digit)) -(test "string-index #4" 9 - (string-index "abcd:efgh:ijkl" #\: 5)) -(test "string-index-right #1" 4 - (string-index-right "abcd:efgh;ijkl" #\:)) -(test "string-index-right #2" 9 - (string-index-right "abcd:efgh;ijkl" (char-set-complement char-set:letter))) -(test "string-index-right #3" #f - (string-index-right "abcd:efgh;ijkl" char-set:digit)) -(test "string-index-right #4" 4 - (string-index-right "abcd:efgh;ijkl" (char-set-complement char-set:letter) 2 5)) - -(test "string-count #1" 2 - (string-count "abc def\tghi jkl" #\space)) -(test "string-count #2" 3 - (string-count "abc def\tghi jkl" char-set:whitespace)) -(test "string-count #3" 2 - (string-count "abc def\tghi jkl" char-set:whitespace 4)) -(test "string-count #4" 1 - (string-count "abc def\tghi jkl" char-set:whitespace 4 9)) -(test "string-contains" 3 - (string-contains "Ma mere l'oye" "mer")) -(test "string-contains" #f - (string-contains "Ma mere l'oye" "Mer")) -(test "string-contains-ci" 3 - (string-contains-ci "Ma mere l'oye" "Mer")) -(test "string-contains-ci" #f - (string-contains-ci "Ma mere l'oye" "Meer")) - -(test "string-titlecase" "--Capitalize This Sentence." - (string-titlecase "--capitalize tHIS sentence.")) -(test "string-titlecase" "3Com Makes Routers." - (string-titlecase "3com makes routers.")) -(test "string-titlecase!" "alSo Whatever" - (let ((s (string-copy "also whatever"))) - (string-titlecase! s 2 9) - s)) - -(test "string-upcase" "SPEAK LOUDLY" - (string-upcase "speak loudly")) -(test "string-upcase" "PEAK" - (string-upcase "speak loudly" 1 5)) -(test "string-upcase!" "sPEAK loudly" - (let ((s (string-copy "speak loudly"))) - (string-upcase! s 1 5) - s)) - -(test "string-downcase" "speak softly" - (string-downcase "SPEAK SOFTLY")) -(test "string-downcase" "peak" - (string-downcase "SPEAK SOFTLY" 1 5)) -(test "string-downcase!" "Speak SOFTLY" - (let ((s (string-copy "SPEAK SOFTLY"))) - (string-downcase! s 1 5) - s)) - -(test "string-reverse" "nomel on nolem on" - (string-reverse "no melon no lemon")) -(test "string-reverse" "nomel on" - (string-reverse "no melon no lemon" 9)) -(test "string-reverse" "on" - (string-reverse "no melon no lemon" 9 11)) -(test "string-reverse!" "nomel on nolem on" - (let ((s (string-copy "no melon no lemon"))) - (string-reverse! s) s)) -(test "string-reverse!" "no melon nomel on" - (let ((s (string-copy "no melon no lemon"))) - (string-reverse! s 9) s)) -(test "string-reverse!" "no melon on lemon" - (let ((s (string-copy "no melon no lemon"))) - (string-reverse! s 9 11) s)) - -(test "string-append" #f - (let ((s "test")) (eq? s (string-append s)))) -(test "string-concatenate" #f - (let ((s "test")) (eq? s (string-concatenate (list s))))) -(test "string-concatenate" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - (string-concatenate - '("A" "B" "C" "D" "E" "F" "G" "H" - "I" "J" "K" "L" "M" "N" "O" "P" - "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" - "a" "b" "c" "d" "e" "f" "g" "h" - "i" "j" "k" "l" "m" "n" "o" "p" - "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"))) -(test "string-concatenate/shared" "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - (string-concatenate/shared - '("A" "B" "C" "D" "E" "F" "G" "H" - "I" "J" "K" "L" "M" "N" "O" "P" - "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" - "a" "b" "c" "d" "e" "f" "g" "h" - "i" "j" "k" "l" "m" "n" "o" "p" - "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"))) -(test "string-concatenate-reverse" "zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA" - (string-concatenate-reverse - '("A" "B" "C" "D" "E" "F" "G" "H" - "I" "J" "K" "L" "M" "N" "O" "P" - "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" - "a" "b" "c" "d" "e" "f" "g" "h" - "i" "j" "k" "l" "m" "n" "o" "p" - "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"))) -(test "string-concatenate-reverse" #f - (let ((s "test")) - (eq? s (string-concatenate-reverse (list s))))) -(test "string-concatenate-reverse/shared" "zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA" - (string-concatenate-reverse/shared - '("A" "B" "C" "D" "E" "F" "G" "H" - "I" "J" "K" "L" "M" "N" "O" "P" - "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z" - "a" "b" "c" "d" "e" "f" "g" "h" - "i" "j" "k" "l" "m" "n" "o" "p" - "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"))) - -(test "string-map" "svool" - (string-map (lambda (c) - (integer->char (- 219 (char->integer c)))) - "hello")) -(test "string-map" "vool" - (string-map (lambda (c) - (integer->char (- 219 (char->integer c)))) - "hello" 1)) -(test "string-map" "vo" - (string-map (lambda (c) - (integer->char (- 219 (char->integer c)))) - "hello" 1 3)) -(test "string-map!" "svool" - (let ((s (string-copy "hello"))) - (string-map! (lambda (c) - (integer->char (- 219 (char->integer c)))) - s) - s)) -(test "string-map!" "hvool" - (let ((s (string-copy "hello"))) - (string-map! (lambda (c) - (integer->char (- 219 (char->integer c)))) - s 1) - s)) -(test "string-map!" "hvolo" - (let ((s (string-copy "hello"))) - (string-map! (lambda (c) - (integer->char (- 219 (char->integer c)))) - s 1 3) - s)) - -(test "string-fold" '(#\o #\l #\l #\e #\h . #t) - (string-fold cons #t "hello")) -(test "string-fold" '(#\l #\e . #t) - (string-fold cons #t "hello" 1 3)) -(test "string-fold-right" '(#\h #\e #\l #\l #\o . #t) - (string-fold-right cons #t "hello")) -(test "string-fold-right" '(#\e #\l . #t) - (string-fold-right cons #t "hello" 1 3)) - -(test "string-unfold" "hello" - (string-unfold null? car cdr '(#\h #\e #\l #\l #\o))) -(test "string-unfold" "hi hello" - (string-unfold null? car cdr '(#\h #\e #\l #\l #\o) "hi ")) -(test "string-unfold" "hi hello ho" - (string-unfold null? car cdr - '(#\h #\e #\l #\l #\o) "hi " - (lambda (x) " ho"))) - -(test "string-unfold-right" "olleh" - (string-unfold-right null? car cdr '(#\h #\e #\l #\l #\o))) -(test "string-unfold-right" "olleh hi" - (string-unfold-right null? car cdr '(#\h #\e #\l #\l #\o) " hi")) -(test "string-unfold-right" "ho olleh hi" - (string-unfold-right null? car cdr - '(#\h #\e #\l #\l #\o) " hi" - (lambda (x) "ho "))) - -(test "string-for-each" "CLtL" - (let ((out (open-output-string)) - (prev #f)) - (string-for-each (lambda (c) - (if (or (not prev) - (char-whitespace? prev)) - (write-char c out)) - (set! prev c)) - "Common Lisp, the Language") - - (get-output-string out))) -(test "string-for-each" "oLtL" - (let ((out (open-output-string)) - (prev #f)) - (string-for-each (lambda (c) - (if (or (not prev) - (char-whitespace? prev)) - (write-char c out)) - (set! prev c)) - "Common Lisp, the Language" 1) - (get-output-string out))) -(test "string-for-each" "oL" - (let ((out (open-output-string)) - (prev #f)) - (string-for-each (lambda (c) - (if (or (not prev) - (char-whitespace? prev)) - (write-char c out)) - (set! prev c)) - "Common Lisp, the Language" 1 10) - (get-output-string out))) -(test "string-for-each-index" '(4 3 2 1 0) - (let ((r '())) - (string-for-each-index (lambda (i) (set! r (cons i r))) "hello") - r)) -(test "string-for-each-index" '(4 3 2 1) - (let ((r '())) - (string-for-each-index (lambda (i) (set! r (cons i r))) "hello" 1) - r)) -(test "string-for-each-index" '(2 1) - (let ((r '())) - (string-for-each-index (lambda (i) (set! r (cons i r))) "hello" 1 3) - r)) - -(test "xsubstring" "cdefab" - (xsubstring "abcdef" 2)) -(test "xsubstring" "efabcd" - (xsubstring "abcdef" -2)) -(test "xsubstring" "abcabca" - (xsubstring "abc" 0 7)) -;; (test "xsubstring" "abcabca" -;; (xsubstring "abc" -;; 30000000000000000000000000000000 -;; 30000000000000000000000000000007)) -(test "xsubstring" "defdefd" - (xsubstring "abcdefg" 0 7 3 6)) -(test "xsubstring" "" - (xsubstring "abcdefg" 9 9 3 6)) - -(test "string-xcopy!" "ZZcdefabZZ" - (let ((s (make-string 10 #\Z))) - (string-xcopy! s 2 "abcdef" 2) - s)) -(test "string-xcopy!" "ZZdefdefZZ" - (let ((s (make-string 10 #\Z))) - (string-xcopy! s 2 "abcdef" 0 6 3) - s)) - -(test "string-replace" "abcdXYZghi" - (string-replace "abcdefghi" "XYZ" 4 6)) -(test "string-replace" "abcdZghi" - (string-replace "abcdefghi" "XYZ" 4 6 2)) -(test "string-replace" "abcdZefghi" - (string-replace "abcdefghi" "XYZ" 4 4 2)) -(test "string-replace" "abcdefghi" - (string-replace "abcdefghi" "XYZ" 4 4 1 1)) -(test "string-replace" "abcdhi" - (string-replace "abcdefghi" "" 4 7)) - -(test "string-tokenize" '("Help" "make" "programs" "run," "run," "RUN!") - (string-tokenize "Help make programs run, run, RUN!")) -(test "string-tokenize" '("Help" "make" "programs" "run" "run" "RUN") - (string-tokenize "Help make programs run, run, RUN!" - char-set:letter)) -(test "string-tokenize" '("programs" "run" "run" "RUN") - (string-tokenize "Help make programs run, run, RUN!" - char-set:letter 10)) -(test "string-tokenize" '("elp" "make" "programs" "run" "run") - (string-tokenize "Help make programs run, run, RUN!" - char-set:lower-case)) - -(test "string-filter" "rrrr" - (string-filter #\r "Help make programs run, run, RUN!")) -(test "string-filter" "HelpmakeprogramsrunrunRUN" - (string-filter char-set:letter "Help make programs run, run, RUN!")) - -(test "string-filter" "programsrunrun" - (string-filter (lambda (c) (char-lower-case? c)) - "Help make programs run, run, RUN!" - 10)) -(test "string-filter" "" - (string-filter (lambda (c) (char-lower-case? c)) "")) -(test "string-delete" "Help make pogams un, un, RUN!" - (string-delete #\r "Help make programs run, run, RUN!")) -(test "string-delete" " , , !" - (string-delete char-set:letter "Help make programs run, run, RUN!")) -(test "string-delete" " , , RUN!" - (string-delete (lambda (c) (char-lower-case? c)) - "Help make programs run, run, RUN!" - 10)) -(test "string-delete" "" - (string-delete (lambda (c) (char-lower-case? c)) "")) - - ;;; Additional tests so that the suite at least touches all -;;; the functions. - -(test "string-hash" #t (<= 0 (string-hash "abracadabra" 20) 19)) - -(test "string-hash" #t (= (string-hash "abracadabra" 20) (string-hash "abracadabra" 20))) - -(test "string-hash" #t (= (string-hash "abracadabra" 20 2 7) - (string-hash (substring "abracadabra" 2 7) 20))) - -(test "string-hash-ci" #t (= (string-hash-ci "aBrAcAdAbRa" 20) - (string-hash-ci "AbRaCaDaBrA" 20))) - -(test "string-hash-ci" #t (= (string-hash-ci "aBrAcAdAbRa" 20 2 7) - (string-hash-ci (substring "AbRaCaDaBrA" 2 7) 20))) - -(test "string=" #t (string= "foo" "foo")) -(test "string=" #t (string= "foobar" "foo" 0 3)) -(test "string=" #t (string= "foobar" "barfoo" 0 3 3)) -(test "string=" #t (not (string= "foobar" "barfoo" 0 3 2 5))) - -(test "string<>" #t (string<> "flo" "foo")) -(test "string<>" #t (string<> "flobar" "foo" 0 3)) -(test "string<>" #t (string<> "flobar" "barfoo" 0 3 3)) -(test "string<>" #t (not (string<> "foobar" "foobar" 0 3 0 3))) - -(test "string<=" #t (string<= "fol" "foo")) -(test "string<=" #t (string<= "folbar" "foo" 0 3)) -(test "string<=" #t (string<= "foobar" "barfoo" 0 3 3)) -(test "string<=" #f (string<= "foobar" "barfoo" 0 3 1 4)) - -(test "string<" #t (string< "fol" "foo")) -(test "string<" #t (string< "folbar" "foo" 0 3)) -(test "string<" #t (string< "folbar" "barfoo" 0 3 3)) -(test "string<" #t (not (string< "foobar" "barfoo" 0 3 1 4))) - -(test "string>=" #t (string>= "foo" "fol")) -(test "string>=" #t (string>= "foo" "folbar" 0 3 0 3)) -(test "string>=" #t (string>= "barfoo" "foo" 3 6 0)) -(test "string>=" #t (not (string>= "barfoo" "foobar" 1 4 0 3))) - -(test "string>" #t (string> "foo" "fol")) -(test "string>" #t (string> "foo" "folbar" 0 3 0 3)) -(test "string>" #t (string> "barfoo" "fol" 3 6 0)) -(test "string>" #t (not (string> "barfoo" "foobar" 1 4 0 3))) - -(test "string-ci=" #t (string-ci= "Foo" "foO")) -(test "string-ci=" #t (string-ci= "Foobar" "fOo" 0 3)) -(test "string-ci=" #t (string-ci= "Foobar" "bArfOo" 0 3 3)) -(test "string-ci=" #t (not (string-ci= "foobar" "BARFOO" 0 3 2 5))) - -(test "string-ci<>" #t (string-ci<> "flo" "FOO")) -(test "string-ci<>" #t (string-ci<> "FLOBAR" "foo" 0 3)) -(test "string-ci<>" #t (string-ci<> "flobar" "BARFOO" 0 3 3)) -(test "string-ci<>" #t (not (string-ci<> "foobar" "FOOBAR" 0 3 0 3))) - -(test "string-ci<=" #t (string-ci<= "FOL" "foo")) -(test "string-ci<=" #t (string-ci<= "folBAR" "fOO" 0 3)) -(test "string-ci<=" #t (string-ci<= "fOOBAR" "BARFOO" 0 3 3)) -(test "string-ci<=" #t (not (string-ci<= "foobar" "BARFOO" 0 3 1 4))) - -(test "string-ci<" #t (string-ci< "fol" "FOO")) -(test "string-ci<" #t (string-ci< "folbar" "FOO" 0 3)) -(test "string-ci<" #t (string-ci< "folbar" "BARFOO" 0 3 3)) -(test "string-ci<" #t (not (string-ci< "foobar" "BARFOO" 0 3 1 4))) - -(test "string-ci>=" #t (string-ci>= "FOO" "fol")) -(test "string-ci>=" #t (string-ci>= "foo" "FOLBAR" 0 3 0 3)) -(test "string-ci>=" #t (string-ci>= "BARFOO" "foo" 3 6 0)) -(test "string-ci>=" #t (not (string-ci>= "barfoo" "FOOBAR" 1 4 0 3))) - -(test "string-ci>" #t (string-ci> "FOO" "fol")) -(test "string-ci>" #t (string-ci> "foo" "FOLBAR" 0 3 0 3)) -(test "string-ci>" #t (string-ci> "barfoo" "FOL" 3 6 0)) -(test "string-ci>" #t (not (string-ci> "barfoo" "FOOBAR" 1 4 0 3))) - -(test "string=?" #t (string=? "abcd" (string-append/shared "a" "b" "c" "d"))) - -(test "string-parse-start+end" - #t - (let-values (((rest start end) (string-parse-start+end #t "foo" '(1 3 fnord)))) - (and (= start 1) - (= end 3) - (equal? rest '(fnord))))) - -(test "string-parse-start+end" - #t - (call-with-current-continuation - (lambda (k) - (handle-exceptions exn - (k #t) - (string-parse-start+end #t "foo" '(1 4)) - #f)))) - -(test "string-parse-start+end" - #t - (let-values (((start end) (string-parse-final-start+end #t "foo" '(1 3)))) - (and (= start 1) - (= end 3)))) - -(test "string-parse-start+end" - #t - (let-string-start+end (start end rest) #t "foo" '(1 3 fnord) - (and (= start 1) - (= end 3) - (equal? rest '(fnord))))) - -(test-assert "check-substring-spec" (check-substring-spec #t "foo" 1 3)) - -(test-assert "check-substring-spec" - (call-with-current-continuation - (lambda (k) - (handle-exceptions exn - (k #t) - (check-substring-spec #t "foo" 1 4) - #f)))) - -(test-assert "substring-spec-ok?" (substring-spec-ok? "foo" 1 3)) - -(test-assert "substring-spec-ok?" (not (substring-spec-ok? "foo" 1 4))) - -(test "make-kmp-restart-vector" '#() (make-kmp-restart-vector "")) - -(test "make-kmp-restart-vector" '#(-1) (make-kmp-restart-vector "a")) - -(test "make-kmp-restart-vector" '#(-1 0) (make-kmp-restart-vector "ab")) - -; The following is from an example in the code. It is the "optimised" -; version; it's also valid to return #(-1 0 0 0 1 2), but that will -; needlessly check the "a" twice before giving up. -(test "make-kmp-restart-vector" - '#(-1 0 0 -1 1 2) - (make-kmp-restart-vector "abdabx")) - -;; Each entry in kmp-cases is a pattern, a string to match against and -;; the expected run of the algorithm through the positions in the -;; pattern. So for example 0 1 2 means it looks at position 0 first, -;; then at 1 and then at 2. -;; -;; This is easy to verify in simple cases; If there's a shared -;; substring and matching fails, you try matching again starting at -;; the end of the shared substring, otherwise you rewind. For more -;; complex cases, it's increasingly difficult for humans to verify :) -(define kmp-cases - '(("abc" "xx" #f 0 0) - ("abc" "abc" #t 0 1 2) - ("abcd" "abc" #f 0 1 2) - ("abc" "abcd" #t 0 1 2) - ("abc" "aabc" #t 0 1 1 2) - ("ab" "aa" #f 0 1) - ("ab" "aab" #t 0 1 1) - ("abdabx" "abdbbabda" #f 0 1 2 3 0 0 1 2 3) - ("aabc" "axaabc" #t 0 1 0 1 2 3) - ("aabac" "aabaabac" #t 0 1 2 3 4 2 3 4))) - -(for-each - (lambda (test-case) - (let* ((pat (car test-case)) - (n (string-length pat)) - (str (cadr test-case)) - (match? (caddr test-case)) - (steps (cdddr test-case)) - (rv (make-kmp-restart-vector pat))) - (call-with-input-string - str - (lambda (p) - (let lp ((i 0) - (step 0) - (steps steps)) - (cond - ((or (= i n) (eof-object? (peek-char p))) - (test-assert (sprintf "KMP match? ~S, case: ~S" match? test-case) - (eq? (= i n) match?)) - (test-assert (sprintf "KMP empty remaining steps: ~S, case: ~S" - steps test-case) - (null? steps))) - (else - (let ((new-i (kmp-step pat rv (read-char p) i char=? 0)) - (expected-i (and (not (null? steps)) (car steps)))) - (test (sprintf "KMP step ~S (exp: ~S, act: ~S), case: ~S" - step expected-i i test-case) - expected-i i) - (lp new-i (add1 step) (cdr steps)))))))))) - kmp-cases) - -; FIXME! Implement tests for these: -; string-kmp-partial-search -; kmp-step - - - ;;; Regression tests: check that reported bugs have been fixed - -; From: Matthias Radestock -; Date: Wed, 10 Dec 2003 21:05:22 +0100 -; -; Chris Double has found the following bug in the reference implementation: -; -; (string-contains "xabc" "ab") => 1 ;good -; (string-contains "aabc" "ab") => #f ;bad -; -; Matthias. - -(test "string-contains" 1 (string-contains "aabc" "ab")) - -(test "string-contains" 5 (string-contains "ababdabdabxxas" "abdabx")) - -(test "string-contains-ci" 1 (string-contains-ci "aabc" "ab")) - -; (message continues) -; -; PS: There is also an off-by-one error in the bounds check of the -; unoptimized version of string-contains that is included as commented out -; code in the reference implementation. This breaks things like -; (string-contains "xab" "ab") and (string-contains "ab" "ab"). - -; This off-by-one bug has been fixed in the comments of the version -; of SRFI-13 shipped with Larceny. In a version of the code without -; the fix the following test will catch the bug: - -(test "string-contains" 0 (string-contains "ab" "ab")) - -; From: address@hidden -; Date: Wed, 26 Mar 2003 08:46:41 +0100 -; -; The SRFI document gives, -; -; string-filter s char/char-set/pred [start end] -> string -; string-delete s char/char-set/pred [start end] -> string -; -; Yet the reference implementation switches the order giving, -; -; ;;; string-delete char/char-set/pred string [start end] -; ;;; string-filter char/char-set/pred string [start end] -; ... -; (define (string-delete criterion s . maybe-start+end) -; ... -; (define (string-filter criterion s . maybe-start+end) -; -; I reviewed the SRFI-13 mailing list and c.l.scheme, but found no mention of -; this issue. Apologies if I've missed something. - -(test-assert "string=? + string-filter" - (call-with-current-continuation - (lambda (k) - (handle-exceptions exn - (k #f) - (string=? "ADR" (string-filter char-set:upper-case "abrAcaDabRa")))))) - -(test-assert "string=? + string-delete" - (call-with-current-continuation - (lambda (k) - (handle-exceptions exn - (k #f) - (string=? "abrcaaba" (string-delete char-set:upper-case "abrAcaDabRa")))))) - - -; http://srfi.schemers.org/srfi-13/post-mail-archive/msg00007.html -; From: David Van Horn -; Date: Wed, 01 Nov 2006 07:53:34 +0100 -; -; Both string-index-right and string-skip-right will continue to search -; left past a given start index. -; -; (string-index-right "abbb" #\a 1) ;; => 0, but should be #f -; (string-skip-right "abbb" #\b 1) ;; => 0, but should be #f -; -; This also causes incorrect results for string-trim-right, -; string-trim-both and string-tokenize when given a non-zero start -; argument. - -(test "string-index-right" #f (string-index-right "abbb" #\a 1)) -(test "string-skip-right" #f (string-skip-right "abbb" #\b 1)) - -;; Tests to check the string-trim-right issue found by Seth Alves -;; http://lists.gnu.org/archive/html/chicken-hackers/2014-01/msg00016.html -(test "string-trim-right" "" (string-trim-right "" char-whitespace? 0 0)) -(test "string-trim-right" "" (string-trim-right "a" char-whitespace? 0 0)) -(test "string-trim-right" "" (string-trim-right "a " char-whitespace? 0 0)) -(test "string-trim-right" "bc" (string-trim-right "abc " char-whitespace? 1)) -(test "string-trim-right" "" (string-trim-right "abc " char-whitespace? 4 4)) diff --git a/types.db b/types.db index 2621686..17b2c35 100644 --- a/types.db +++ b/types.db @@ -551,7 +551,7 @@ (string-append (#(procedure #:clean #:enforce) string-append (#!rest string) string) ((string string) (##sys#string-append #(1) #(2)))) -;(string-copy (#(procedure #:clean #:enforce) string-copy (string) string)) - we use the more general version from srfi-13 +(string-copy (#(procedure #:clean #:enforce) string-copy (string) string)) (string->list (#(procedure #:clean #:enforce) string->list (string) (list-of char))) (list->string (#(procedure #:clean #:enforce) list->string ((list-of char)) string)) @@ -2063,183 +2063,8 @@ (xcons (forall (a b) (#(procedure #:pure) xcons (a b) (pair b a)))) (zip (forall (a) (#(procedure #:clean #:enforce) zip ((list-of a) #!rest list) (list-of (pair a *))))) - -;; srfi-13 - -(check-substring-spec (#(procedure #:clean #:enforce) check-substring-spec (* string fixnum fixnum) undefined)) -(kmp-step (#(procedure #:enforce) kmp-step (string vector char fixnum (procedure (char char) *) fixnum) fixnum)) -(make-kmp-restart-vector (#(procedure #:clean #:enforce) make-kmp-restart-vector (string #!optional (procedure (* *) *) fixnum fixnum) vector)) - -(string-any - (forall (a) - (#(procedure #:enforce) - string-any - ((or char (struct char-set) (procedure (char) a)) string #!optional fixnum fixnum) - (or boolean a)))) - -(string-append/shared (#(procedure #:clean #:enforce) string-append/shared (#!rest string) string) - ((string string) (##sys#string-append #(1) #(2)))) - -(string-ci< (#(procedure #:clean #:enforce) string-ci< (string string #!optional fixnum fixnum) boolean) - ((string string) (string-ci (#(procedure #:clean #:enforce) string-ci<> (string string #!optional fixnum fixnum) boolean) - ((string string) (not (##core#inline "C_i_string_ci_equal_p" #(1) #(2))))) - -(string-ci= (#(procedure #:clean #:enforce) string-ci= (string string #!optional fixnum fixnum) boolean) - ((string string) (##core#inline "C_i_string_ci_equal_p" #(1) #(2)))) - -(string-ci> (#(procedure #:clean #:enforce) string-ci> (string string #!optional fixnum fixnum) boolean) - ((string string) (string-ci>? #(1) #(2)))) - -(string-ci>= (#(procedure #:clean #:enforce) string-ci>= (string string #!optional fixnum fixnum) boolean) - ((string string) (string-ci>=? #(1) #(2)))) - -(string-compare (#(procedure #:enforce) string-compare (string string (procedure (fixnum) *) (procedure (fixnum) *) (procedure (fixnum) *) #!optional fixnum fixnum fixnum fixnum) *)) -(string-compare-ci (#(procedure #:enforce) string-compare (string string (procedure (fixnum) *) (procedure (fixnum) *) (procedure (fixnum) *) #!optional fixnum fixnum fixnum fixnum) *)) -(string-concatenate (#(procedure #:clean #:enforce) string-concatenate ((list-of string)) string)) -(string-concatenate-reverse (#(procedure #:clean #:enforce) string-concatenate-reverse ((list-of string) #!optional string fixnum) string)) -(string-concatenate-reverse/shared (#(procedure #:clean #:enforce) string-concatenate-reverse/shared ((list-of string) #!optional string fixnum) string)) -(string-concatenate/shared (#(procedure #:clean #:enforce) string-concatenate/shared ((list-of string)) string)) -(string-contains (#(procedure #:clean #:enforce) string-contains (string string #!optional fixnum fixnum fixnum fixnum) (or fixnum false))) -(string-contains-ci (#(procedure #:clean #:enforce) string-contains-ci (string string #!optional fixnum fixnum fixnum fixnum) (or fixnum false))) -(string-copy (#(procedure #:clean #:enforce) string-copy (string #!optional fixnum fixnum) string)) -(string-copy! (#(procedure #:clean #:enforce) string-copy! (string fixnum string #!optional fixnum fixnum) undefined)) -(string-count (#(procedure #:clean #:enforce) string-count (string * #!optional fixnum fixnum) fixnum)) -(string-delete (#(procedure #:clean #:enforce) string-delete (* string #!optional fixnum fixnum) string)) -(string-downcase (#(procedure #:clean #:enforce) string-downcase (string #!optional fixnum fixnum) string)) -(string-downcase! (#(procedure #:clean #:enforce) string-downcase! (string #!optional fixnum fixnum) string)) -(string-drop (#(procedure #:clean #:enforce) string-drop (string fixnum) string)) -(string-drop-right (#(procedure #:clean #:enforce) string-drop-right (string fixnum) string)) - -(string-every - (forall (a) - (#(procedure #:enforce) - string-every - ((or char (struct char-set) (procedure (char) a)) string #!optional fixnum fixnum) - (or boolean a)))) - (string-fill! (#(procedure #:clean #:enforce) string-fill! (string char #!optional fixnum fixnum) string)) -(string-filter - (#(procedure #:enforce) - string-filter - ((or char (struct char-set) (procedure (char) *)) string #!optional fixnum fixnum) - string)) - -(string-fold (#(procedure #:enforce) string-fold ((procedure (char *) *) * string #!optional fixnum fixnum) *)) ;XXX - -(string-fold-right (#(procedure #:enforce) string-fold-right ((procedure (char *) *) * string #!optional fixnum fixnum) *)) ;XXX -(string-for-each (#(procedure #:enforce) string-for-each ((procedure (char) . *) string #!optional fixnum fixnum) undefined)) -(string-for-each-index (#(procedure #:enforce) string-for-each-index ((procedure (fixnum) . *) string #!optional fixnum fixnum) undefined)) - -(string-index - (#(procedure #:enforce) - string-index - (string (or char (struct char-set) (procedure (char) *)) #!optional fixnum fixnum) - (or fixnum false))) - -(string-index-right - (#(procedure #:enforce) - string-index-right - (string (or char (struct char-set) (procedure (char) *)) #!optional fixnum fixnum) - (or fixnum false))) - -(string-join (#(procedure #:clean #:enforce) string-join (list #!optional string symbol) string)) -(string-kmp-partial-search (#(procedure #:enforce) string-kmp-partial-search (string vector string fixnum #!optional (procedure (char char) *) fixnum fixnum fixnum) fixnum)) -(string-map (#(procedure #:enforce) string-map ((procedure (char) char) string #!optional fixnum fixnum) string)) -(string-map! (#(procedure #:enforce) string-map! ((procedure (char) char) string #!optional fixnum fixnum) string)) - -(string-null? (#(procedure #:clean #:enforce) string-null? (string) boolean) - ((string) (##core#inline "C_zero_length_p" #(1)))) - -(string-pad (#(procedure #:clean #:enforce) string-pad (string fixnum #!optional char fixnum fixnum) string)) -(string-pad-right (#(procedure #:clean #:enforce) string-pad-right (string fixnum #!optional char fixnum fixnum) string)) -(string-parse-final-start+end (#(procedure #:enforce) string-parse-final-start+end (procedure string #!rest) . *)) -(string-parse-start+end (#(procedure #:enforce) string-parse-start+end (procedure string #!rest) . *)) -(string-prefix-ci? (#(procedure #:clean #:enforce) string-prefix-ci? (string string #!optional fixnum fixnum fixnum fixnum) boolean)) -(string-prefix-length (#(procedure #:clean #:enforce) string-prefix-length (string string #!optional fixnum fixnum fixnum fixnum) fixnum)) -(string-prefix-length-ci (#(procedure #:clean #:enforce) string-prefix-length-ci (string string #!optional fixnum fixnum fixnum fixnum) fixnum)) -(string-prefix? (#(procedure #:clean #:enforce) string-prefix? (string string #!optional fixnum fixnum fixnum fixnum) boolean)) -(string-replace (#(procedure #:clean #:enforce) string-replace (string string fixnum fixnum #!optional fixnum fixnum) string)) -(string-reverse (#(procedure #:clean #:enforce) string-reverse (string #!optional fixnum fixnum) string)) -(string-reverse! (#(procedure #:clean #:enforce) string-reverse! (string #!optional fixnum fixnum) string)) - -(string-skip - (#(procedure #:enforce) - string-skip - (string (or char (struct char-set) (procedure (char) *)) #!optional fixnum fixnum) - (or fixnum false))) - -(string-skip-right - (#(procedure #:enforce) - string-skip-right - (string (or char (struct char-set) (procedure (char) *)) #!optional fixnum fixnum) - (or fixnum false))) - -(string-suffix-ci? (#(procedure #:clean #:enforce) string-suffix-ci? (string string #!optional fixnum fixnum fixnum fixnum) boolean)) -(string-suffix-length (#(procedure #:clean #:enforce) string-suffix-length (string string #!optional fixnum fixnum fixnum fixnum) fixnum)) -(string-suffix-length-ci (#(procedure #:clean #:enforce) string-suffix-length-ci (string string #!optional fixnum fixnum fixnum fixnum) fixnum)) -(string-suffix? (#(procedure #:clean #:enforce) string-suffix? (string string #!optional fixnum fixnum fixnum fixnum) boolean)) -(string-tabulate (#(procedure #:enforce) string-tabulate ((procedure (fixnum) char) fixnum) string)) -(string-take (#(procedure #:clean #:enforce) string-take (string fixnum) string)) -(string-take-right (#(procedure #:clean #:enforce) string-take-right (string fixnum) string)) -(string-titlecase (#(procedure #:clean #:enforce) string-titlecase (string #!optional fixnum fixnum) string)) -(string-titlecase! (#(procedure #:clean #:enforce) string-titlecase! (string #!optional fixnum fixnum) string)) - -(string-tokenize - (#(procedure #:clean #:enforce) string-tokenize (string #!optional (struct char-set) fixnum fixnum) list)) - -(string-trim - (#(procedure #:enforce) - string-trim - (string #!optional (or char (struct char-set) (procedure (char) *)) fixnum fixnum) - string)) - -(string-trim-both - (#(procedure #:enforce) - string-trim-both - (string #!optional (or char (struct char-set) (procedure (char) *)) fixnum fixnum) - string)) - -(string-trim-right - (#(procedure #:enforce) - string-trim-right - (string #!optional (or char (struct char-set) (procedure (char) *)) fixnum fixnum) - string)) - -(string-unfold (#(procedure #:enforce) string-unfold (procedure procedure procedure * #!optional * procedure) string)) ;XXX -(string-unfold-right (#(procedure #:enforce) string-unfold-right (procedure procedure procedure * #!optional * procedure) string)) ;XXX -(string-upcase (#(procedure #:clean #:enforce) string-upcase (string #!optional fixnum fixnum) string)) -(string-upcase! (#(procedure #:clean #:enforce) string-upcase! (string #!optional fixnum fixnum) string)) -(string-xcopy! (#(procedure #:clean #:enforce) string-xcopy! (string string string fixnum #!optional fixnum fixnum fixnum) string)) - -(string< (#(procedure #:clean #:enforce) string< (string string #!optional fixnum fixnum fixnum fixnum) boolean) - ((string string) (string (#(procedure #:clean #:enforce) string<> (string string #!optional fixnum fixnum fixnum fixnum) boolean) - ((string string) (not (##core#inline "C_i_string_equal_p" #(1) #(2))))) - -(string= (#(procedure #:clean #:enforce) string= (string string #!optional fixnum fixnum fixnum fixnum) boolean) - ((string string) (##core#inline "C_i_string_equal_p" #(1) #(2)))) - -(string> (#(procedure #:clean #:enforce) string> (string string #!optional fixnum fixnum fixnum fixnum) boolean) - ((string string) (string>? #(1) #(2)))) - -(string>= (#(procedure #:clean #:enforce) string>= (string string #!optional fixnum fixnum fixnum fixnum) boolean) - ((string string) (string>=? #(1) #(2)))) - -(substring-spec-ok? (#(procedure #:clean #:enforce) substring-spec-ok? (string fixnum fixnum) boolean)) -(substring/shared (#(procedure #:clean #:enforce) substring/shared (string fixnum #!optional fixnum) string)) -(xsubstring (#(procedure #:clean #:enforce) xsubstring (string fixnum #!optional fixnum fixnum fixnum) string)) - - ;; srfi-14 (->char-set (procedure ->char-set (*) (struct char-set)) diff --git a/utils.scm b/utils.scm index 52969b9..d102272 100644 --- a/utils.scm +++ b/utils.scm @@ -27,7 +27,7 @@ (declare (unit utils) - (uses eval extras srfi-13 posix files irregex) + (uses data-structures eval extras posix files irregex) (fixnum) (disable-interrupts) ) @@ -62,13 +62,14 @@ (escaped (if (eq? platform 'mingw32) "\"\"" "'\\''"))) (string-append (string delim) - (string-concatenate + (string-intersperse (map (lambda (c) (cond ((char=? c delim) escaped) ((char=? c #\nul) (error 'qs "NUL character can not be represented in shell string" str)) (else (string c)))) - (string->list str))) + (string->list str)) + "") (string delim)))) -- 1.7.10.4