guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, boehm-demers-weiser-gc, updated. relea


From: Ludovic Courtès
Subject: [Guile-commits] GNU Guile branch, boehm-demers-weiser-gc, updated. release_1-9-2-331-gf28914d
Date: Wed, 09 Sep 2009 20:33:54 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=f28914d0511f0b41553d8e6b92cfeed802423dc5

The branch, boehm-demers-weiser-gc has been updated
       via  f28914d0511f0b41553d8e6b92cfeed802423dc5 (commit)
       via  e354d7689aca1f6482bd90a2c367617222052265 (commit)
       via  f7f4d0477e2f67f0c1248da5d6e64044019196a3 (commit)
       via  0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be (commit)
       via  7519234547acd3ced5cbe265f0bf1fcd6d6cda06 (commit)
       via  eebff6d7f1055bb59fbad24c23f8db8ce14391d5 (commit)
       via  56664c089cc6115dcfd38518e7e6c76cfd10b3b1 (commit)
       via  99e31c327e499a66c245e92836a8f1bbc50a26c8 (commit)
       via  060e305adcb5a80dba8bf137dc3d61151d1c0dff (commit)
       via  8748ffeaa770ed47192f970ef5302a7c7aa7a935 (commit)
       via  28cc8dac2f520fa9de29e93dca52e4892b945a3c (commit)
       via  18d8fcd43c8ea6b0122453b2d9f7ac10c1f36d6c (commit)
       via  25ebc0340d30d1ceb786dbc8c3fe80c6e9ae0e87 (commit)
       via  3d03f9395e2ca83183e846ee99d4f9e541771c20 (commit)
       via  5f5e7a2cd6db0a7068f00710d0cca340c043c0ea (commit)
       via  be3eb25c64eeda81eeaf1356362e0eee9b5b02fb (commit)
       via  bb15a36c25cc2dd7e9d3ea30b8bb6b99beed97d5 (commit)
       via  ba8477eccefae65191a5bc1de2b6f923fe195a91 (commit)
       via  719bb8cd5db10aeb0dad1c16227d6b6abc40e8b6 (commit)
       via  0dcd7e61534c9d1e33de904196cb505daf320a42 (commit)
       via  aa2cba9c882ba8bd69750b120d2b7ccd7250b562 (commit)
       via  4d07801b214c83761b6669b10dc0608c3081043b (commit)
       via  08ed805879a0c5fc5231522f99c7af782ac3cd8b (commit)
       via  aff31b0f9946999d9adbf582fabbbd3e523e29e8 (commit)
       via  f4cdfe6140923b5cf11d7af9a2098514d98a61a5 (commit)
       via  bde543e88b070b5fcd2b08ba8129ec1125ab0861 (commit)
       via  91772d8f8af742e864cccf5578776f09bfecf7e9 (commit)
       via  693e72891f28a1e009bd4661711d11d8f2b0e201 (commit)
       via  7165abeba8e97358410146a36c12b2a5fa5e2463 (commit)
       via  7f7b85cbf68a8b83e1ad7bc78379cf2764fc9a1b (commit)
       via  3f12aedb50122209cf773174d215dc64309b9fe4 (commit)
       via  bda0d85f0c550eeb4182de0fb6051fc51d0b25ce (commit)
       via  5f5920e012d953def86a1c04a8e8d1a14b85f1a5 (commit)
       via  fac32b518ef9f456e8f6465c00e6c6f40a1123a8 (commit)
       via  f84c500d2e29c619e6a989d0d11911fea414d795 (commit)
       via  0ffc78e38484536bd33c77439487af7a6f7dc079 (commit)
       via  6c2353e1d5b24691797572e3f6a1426c52c496ee (commit)
       via  1893df4145d045c51ec8748dac1e7f56c533f613 (commit)
       via  24d23822ee9d6a515aed8baaeff9d363fd7ec813 (commit)
       via  526ee76ac36921570708a746e73bba1cd7da2f62 (commit)
       via  6d736fdba2135de42e742924eac32e1c6bd9b79a (commit)
       via  5950cc3fcc7fb0d13be23fcccb8916fd8c2da4e2 (commit)
      from  d66b74dce74c2824726746e82a8a413463cb80fd (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit f28914d0511f0b41553d8e6b92cfeed802423dc5
Merge: d66b74dce74c2824726746e82a8a413463cb80fd 
e354d7689aca1f6482bd90a2c367617222052265
Author: Ludovic Courtès <address@hidden>
Date:   Wed Sep 9 22:32:02 2009 +0200

    Merge branch 'master' into boehm-demers-weiser-gc
    
    Conflicts:
        libguile/gc_os_dep.c

-----------------------------------------------------------------------

Summary of changes:
 NEWS                                     |  144 ++++++++++-
 doc/ref/api-control.texi                 |    6 +-
 doc/ref/api-data.texi                    |  281 +++++++++++++++------
 doc/ref/api-evaluation.texi              |   70 +++++
 doc/ref/api-io.texi                      |   84 ++++++-
 doc/ref/goops.texi                       |    2 +-
 doc/ref/scheme-scripts.texi              |    6 +
 guile-readline/ice-9/readline.scm        |   14 +-
 guile-readline/readline.c                |   11 +-
 libguile/chars.c                         |   68 +++---
 libguile/chars.h                         |    9 +-
 libguile/gc.h                            |    2 +-
 libguile/inet_aton.c                     |    8 +-
 libguile/load.c                          |    2 +-
 libguile/ports.c                         |   20 ++-
 libguile/print.c                         |   21 ++-
 libguile/read.c                          |   21 ++-
 libguile/srfi-14.c                       |  360 +++++++++++++++++++-------
 libguile/srfi-14.h                       |    4 +-
 libguile/srfi-14.i.c                     |   11 +-
 libguile/strings.c                       |    9 +-
 libguile/strings.h                       |    4 +
 libguile/strports.c                      |  107 ++------
 libguile/strports.h                      |    4 -
 libguile/unidata_to_charset.pl           |   14 +-
 libguile/vports.c                        |   23 ++-
 libguile/win32-socket.c                  |    2 +-
 module/language/tree-il/compile-glil.scm |    7 +-
 module/language/tree-il/primitives.scm   |    6 +-
 test-suite/tests/chars.test              |  265 +++++++++++++++++++-
 test-suite/tests/encoding-iso88591.test  |   82 +++++--
 test-suite/tests/encoding-iso88597.test  |   80 +++++--
 test-suite/tests/encoding-utf8.test      |   82 ++++++
 test-suite/tests/regexp.test             |   66 ++++--
 test-suite/tests/srfi-14.test            |  406 +++++++++++++++++++++++++++++-
 35 files changed, 1862 insertions(+), 439 deletions(-)

diff --git a/NEWS b/NEWS
index 0f2d693..9821ee2 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,75 @@ prerelease, and a full NEWS corresponding to 1.8 -> 2.0.)
 
 Changes in 1.9.3 (since the 1.9.2 prerelease):
 
+
+** Files loaded with `load' will now be compiled automatically.
+
+As with files loaded via `primitive-load-path', `load' will also compile
+its target if autocompilation is enabled, and a fresh compiled file is
+not found.
+
+There are two points of difference to note, however. First, `load' does
+not search `GUILE_LOAD_COMPILED_PATH' for the file; it only looks in the
+autocompilation directory, normally a subdirectory of ~/.cache/guile.
+
+Secondly, autocompilation also applies to files loaded via the -l
+command-line argument -- so the user may experience a slight slowdown
+the first time they run a Guile script, as the script is autocompiled.
+
+** Support for non-ASCII source code files
+
+The default reader now handles source code files for some of the
+non-ASCII character encodings, such as UTF-8. A non-ASCII source file
+should have an encoding declaration near the top of the file. Also,
+there is a new function, `file-encoding', that scans a port for a coding
+declaration. See the section of the manual entitled, "Character Encoding
+of Source Files".
+
+The pre-1.9.3 reader handled 8-bit clean but otherwise unspecified source
+code.  This use is now discouraged.
+
+** Support for locale transcoding when reading from and writing to ports
+
+Ports now have an associated character encoding, and port read and write
+operations do conversion to and from locales automatically. Ports also
+have an associated strategy for how to deal with locale conversion
+failures.
+
+See the documentation in the manual for the four new support functions,
+`set-port-encoding!', `port-encoding', `set-port-conversion-strategy!',
+and `port-conversion-strategy'.
+
+** String and SRFI-13 functions can operate on Unicode strings
+
+** Unicode support for SRFI-14 character sets
+
+The default character sets are no longer locale dependent and contain
+characters from the whole Unicode range. There is a new predefined
+character set, `char-set:designated', which contains all assigned
+Unicode characters. There is a new debugging function, `%char-set-dump'.
+
+** Character functions operate on Unicode characters
+
+`char-upcase' and `char-downcase' use default Unicode casing rules.
+Character comparisons such as `char<?' and `char-ci<?' now sort based on
+Unicode code points.
+
+** Unicode symbol support
+
+One may now use U+03BB (GREEK SMALL LETTER LAMBDA) as an identifier.
+
+** New readline history functions
+
+The (ice-9 readline) module now provides add-history, read-history,
+write-history and clear-history, which wrap the corresponding GNU
+History library functions.
+
+** Removed deprecated uniform array procedures:
+   dimensions->uniform-array, list->uniform-array, array-prototype
+
+Instead, use make-typed-array, list->typed-array, or array-type,
+respectively.
+
 ** Removed deprecated uniform array procedures: scm_make_uve,
    scm_array_prototype, scm_list_to_uniform_array,
    scm_dimensions_to_uniform_array, scm_make_ra, scm_shap2ra, scm_cvref,
@@ -22,11 +91,10 @@ These functions have been deprecated since early 2005.
 Use of the second argument produced a deprecation warning, so it is
 unlikely that any code out there actually used this functionality.
 
-** Removed deprecated uniform array procedures:
-   dimensions->uniform-array, list->uniform-array, array-prototype
+** GOOPS documentation folded into Guile reference manual
 
-Instead, use make-typed-array, list->typed-array, or array-type,
-respectively.
+GOOPS, Guile's object system, used to be documented in separate manuals.
+This content is now included in Guile's manual directly.
 
 ** And of course, the usual collection of bugfixes
  
@@ -108,6 +176,20 @@ For example, the old (lang elisp) modules are meant to be 
interpreted,
 not compiled. This bug will be fixed before 2.0. FIXME 2.0: Should say
 something here about module-transformer called for compile.
 
+** Files loaded with `load' will now be compiled automatically.
+
+As with files loaded via `primitive-load-path', `load' will also compile
+its target if autocompilation is enabled, and a fresh compiled file is
+not found.
+
+There are two points of difference to note, however. First, `load' does
+not search `GUILE_LOAD_COMPILED_PATH' for the file; it only looks in the
+autocompilation directory, normally a subdirectory of ~/.cache/guile.
+
+Secondly, autocompilation also applies to files loaded via the -l
+command-line argument -- so the user may experience a slight slowdown
+the first time they run a Guile script, as the script is autocompiled.
+
 ** New POSIX procedures: `getrlimit' and `setrlimit'
 
 Note however that the interface of these functions is likely to change
@@ -492,9 +574,52 @@ Internally, strings are now represented either in the 
`latin-1'
 encoding, one byte per character, or in UTF-32, with four bytes per
 character. Strings manage their own allocation, switching if needed.
 
-Currently no locale conversion is performed. Extended characters may be
-written in a string using the hexadecimal escapes `\xXX', `\uXXXX', or
-`\UXXXXXX', for 8-bit, 16-bit, or 24-bit codepoints, respectively.
+Extended characters may be written in a literal string using the
+hexadecimal escapes `\xXX', `\uXXXX', or `\UXXXXXX', for 8-bit, 16-bit,
+or 24-bit codepoints, respectively, or entered directly in the native
+encoding of the port on which the string is read.
+
+** Unicode symbols
+
+One may now use U+03BB (GREEK SMALL LETTER LAMBDA) as an identifier.
+
+** Support for non-ASCII source code files
+
+The default reader now handles source code files for some of the
+non-ASCII character encodings, such as UTF-8. A non-ASCII source file
+should have an encoding declaration near the top of the file. Also,
+there is a new function, `file-encoding', that scans a port for a coding
+declaration. See the section of the manual entitled, "Character Encoding
+of Source Files".
+
+The pre-1.9.3 reader handled 8-bit clean but otherwise unspecified source
+code.  This use is now discouraged.
+
+** Support for locale transcoding when reading from and writing to ports
+
+Ports now have an associated character encoding, and port read and write
+operations do conversion to and from locales automatically. Ports also
+have an associated strategy for how to deal with locale conversion
+failures.
+
+See the documentation in the manual for the four new support functions,
+`set-port-encoding!', `port-encoding', `set-port-conversion-strategy!',
+and `port-conversion-strategy'.
+
+** String and SRFI-13 functions can operate on Unicode strings
+
+** Unicode support for SRFI-14 character sets
+
+The default character sets are no longer locale dependent and contain
+characters from the whole Unicode range. There is a new predefined
+character set, `char-set:designated', which contains all assigned
+Unicode characters. There is a new debugging function, `%char-set-dump'.
+
+** Character functions operate on Unicode characters
+
+`char-upcase' and `char-downcase' use default Unicode casing rules.
+Character comparisons such as `char<?' and `char-ci<?' now sort based on
+Unicode code points.
 
 ** Global variables `scm_charnames' and `scm_charnums' are removed
 
@@ -613,6 +738,11 @@ In other words the GNU Lesser General Public License, 
version 3 or
 later (at the discretion of each person that chooses to redistribute
 part of Guile).
 
+** GOOPS documentation folded into Guile reference manual
+
+GOOPS, Guile's object system, used to be documented in separate manuals.
+This content is now included in Guile's manual directly.
+
 ** `guile-config' will be deprecated in favor of `pkg-config'
 
 `guile-config' has been rewritten to get its information from
diff --git a/doc/ref/api-control.texi b/doc/ref/api-control.texi
index e7614d1..66fb99e 100644
--- a/doc/ref/api-control.texi
+++ b/doc/ref/api-control.texi
@@ -1,6 +1,6 @@
 @c -*-texinfo-*-
 @c This is part of the GNU Guile Reference Manual.
address@hidden Copyright (C)  1996, 1997, 2000, 2001, 2002, 2003, 2004
address@hidden Copyright (C)  1996, 1997, 2000, 2001, 2002, 2003, 2004, 2009
 @c   Free Software Foundation, Inc.
 @c See the file guile.texi for copying conditions.
 
@@ -1506,8 +1506,8 @@ which is the name of the procedure incorrectly invoked.
 @subsection Continuation Barriers
 
 The non-local flow of control caused by continuations might sometimes
-not be wanted.  You can use @code{with-continuation-barrier} etc to
-errect fences that continuations can not pass.
+not be wanted. You can use @code{with-continuation-barrier} to erect
+fences that continuations can not pass.
 
 @deffn {Scheme Procedure} with-continuation-barrier proc
 @deffnx {C Function} scm_with_continuation_barrier (proc)
diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi
index 0fd4ee1..cf0d321 100755
--- a/doc/ref/api-data.texi
+++ b/doc/ref/api-data.texi
@@ -539,7 +539,7 @@ error.  Instead, the result of the division is either plus 
or minus
 infinity, depending on the sign of the divided number.
 
 The infinities are written @samp{+inf.0} and @samp{-inf.0},
-respectivly.  This syntax is also recognized by @code{read} as an
+respectively.  This syntax is also recognized by @code{read} as an
 extension to the usual Scheme syntax.
 
 Dividing zero by zero yields something that is not a number at all:
@@ -637,7 +637,7 @@ magnitude.  The argument @var{val} must be a real number.
 @end deftypefn
 
 @deftypefn {C Function} SCM scm_from_double (double val)
-Return the @code{SCM} value that representats @var{val}.  The returned
+Return the @code{SCM} value that represents @var{val}.  The returned
 value is inexact according to the predicate @code{inexact?}, but it
 will be exactly equal to @var{val}.
 @end deftypefn
@@ -1779,16 +1779,66 @@ another manual.
 @subsection Characters
 @tpindex Characters
 
+In Scheme, there is a data type to describe a single character.  
+
+Defining what exactly a character @emph{is} can be more complicated
+than it seems.  Guile follows the advice of R6RS and uses The Unicode
+Standard to help define what a character is.  So, for Guile, a
+character is anything in the Unicode Character Database.
+
address@hidden code point
address@hidden Unicode code point
+
+The Unicode Character Database is basically a table of characters
+indexed using integers called 'code points'.  Valid code points are in
+the ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to
address@hidden inclusive, which is about 1.1 million code points.
+
address@hidden designated code point
address@hidden code point, designated
+
+Any code point that has been assigned to a character or that has
+otherwise been given a meaning by Unicode is called a 'designated code
+point'.  Most of the designated code points, about 200,000 of them,
+indicate characters, accents or other combining marks that modify
+other characters, symbols, whitespace, and control characters.  Some
+are not characters but indicators that suggest how to format or
+display neighboring characters.
+
address@hidden reserved code point
address@hidden code point, reserved
+
+If a code point is not a designated code point -- if it has not been
+assigned to a character by The Unicode Standard -- it is a 'reserved
+code point', meaning that they are reserved for future use.  Most of
+the code points, about 800,000, are 'reserved code points'.
+
+By convention, a Unicode code point is written as
+``U+XXXX'' where ``XXXX'' is a hexadecimal number.  Please note that
+this convenient notation is not valid code.  Guile does not interpret
+``U+XXXX'' as a character.
+
 In Scheme, a character literal is written as @address@hidden where
 @var{name} is the name of the character that you want.  Printable
 characters have their usual single character name; for example,
address@hidden is a lower case @code{a}.
address@hidden is a lower case @code{a}.  
+
+Some of the code points are 'combining characters' that are not meant
+to be printed by themselves but are instead meant to modify the
+appearance of the previous character.  For combining characters, an
+alternate form of the character literal is @code{#\} followed by
+U+25CC (a small, dotted circle), followed by the combining character.
+This allows the combining character to be drawn on the circle, not on
+the backslash of @code{#\}.
+
+Many of the non-printing characters, such as whitespace characters and
+control characters, also have names.
 
-Most of the ``control characters'' (those below codepoint 32) in the
address@hidden character set, as well as the space, may be referred
-to by longer names: for example, @code{#\tab}, @code{#\esc},
address@hidden, and so on.  The following table describes the
address@hidden names for each character.
+The most commonly used non-printing characters are space and
+newline.  Their character names are @code{#\space} and
address@hidden  There are also names for all of the ``C0 control
+characters'' (those with code points below 32).  The following table
+describes the names for each character.
 
 @multitable @columnfractions .25 .25 .25 .25
 @item 0 = @code{#\nul}
@@ -1801,9 +1851,9 @@ to by longer names: for example, @code{#\tab}, 
@code{#\esc},
  @tab 7 = @code{#\bel}
 @item 8 = @code{#\bs}
  @tab 9 = @code{#\ht}
- @tab 10 = @code{#\nl}
+ @tab 10 = @code{#\lf} 
  @tab 11 = @code{#\vt}
address@hidden 12 = @code{#\np}
address@hidden 12 = @code{#\ff}
  @tab 13 = @code{#\cr}
  @tab 14 = @code{#\so}
  @tab 15 = @code{#\si}
@@ -1826,85 +1876,112 @@ to by longer names: for example, @code{#\tab}, 
@code{#\esc},
 @item 32 = @code{#\sp}
 @end multitable
 
-The ``delete'' character (octal 177) may be referred to with the name
address@hidden
+The ``delete'' character (code point U+007F) may be referred to with the
+name @code{#\del}.
 
-Several characters have more than one name:
+One might note that the space character has two names --
address@hidden and @code{#\sp} -- as does the newline character.
+Several other non-printing characters have more than one name, for the
+sake of compatibility with previous versions.
 
address@hidden address@hidden {Original}
address@hidden Alias @tab Original
address@hidden @code{#\space} @tab @code{#\sp}
address@hidden @code{#\newline} @tab @code{#\nl}
address@hidden address@hidden {Preferred}
address@hidden Alternate @tab Standard
address@hidden @code{#\sp} @tab @code{#\space}
address@hidden @code{#\nl} @tab @code{#\newline}
address@hidden @code{#\lf} @tab @code{#\newline}
 @item @code{#\tab} @tab @code{#\ht}
 @item @code{#\backspace} @tab @code{#\bs}
 @item @code{#\return} @tab @code{#\cr}
address@hidden @code{#\page} @tab @code{#\np}
address@hidden @code{#\page} @tab @code{#\ff}
address@hidden @code{#\np} @tab @code{#\ff}
 @item @code{#\null} @tab @code{#\nul}
 @end multitable
 
+Characters may also be written using their code point values.  They can
+be written with as an octal number, such as @code{#\10} for
address@hidden or @code{#\177} for @code{#\del}.
+
 @rnindex char?
 @deffn {Scheme Procedure} char? x
 @deffnx {C Function} scm_char_p (x)
 Return @code{#t} iff @var{x} is a character, else @code{#f}.
 @end deffn
 
+Fundamentally, the character comparison operations below are
+numeric comparisons of the character's code points.
+
 @rnindex char=?
 @deffn {Scheme Procedure} char=? x y
-Return @code{#t} iff @var{x} is the same character as @var{y}, else @code{#f}.
+Return @code{#t} iff code point of @var{x} is equal to the code point
+of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char<?
 @deffn {Scheme Procedure} char<? x y
-Return @code{#t} iff @var{x} is less than @var{y} in the @acronym{ASCII} 
sequence,
-else @code{#f}.
+Return @code{#t} iff the code point of @var{x} is less than the code
+point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char<=?
 @deffn {Scheme Procedure} char<=? x y
-Return @code{#t} iff @var{x} is less than or equal to @var{y} in the
address@hidden sequence, else @code{#f}.
+Return @code{#t} iff the code point of @var{x} is less than or equal
+to the code point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char>?
 @deffn {Scheme Procedure} char>? x y
-Return @code{#t} iff @var{x} is greater than @var{y} in the @acronym{ASCII}
-sequence, else @code{#f}.
+Return @code{#t} iff the code point of @var{x} is greater than the
+code point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char>=?
 @deffn {Scheme Procedure} char>=? x y
-Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the
address@hidden sequence, else @code{#f}.
+Return @code{#t} iff the code point of @var{x} is greater than or
+equal to the code point of @var{y}, else @code{#f}.
 @end deffn
 
address@hidden case folding
+
+Case-insensitive character comparisons use @emph{Unicode case
+folding}.  In case folding comparisons, if a character is lowercase
+and has an uppercase form that can be expressed as a single character,
+it is converted to uppercase before comparison.  All other characters
+undergo no conversion before the comparison occurs.  This includes the
+German sharp S (Eszett) which is not uppercased before conversion
+because its uppercase form has two characters.  Unicode case folding
+is language independent: it uses rules that are generally true, but,
+it cannot cover all cases for all languages.
+
 @rnindex char-ci=?
 @deffn {Scheme Procedure} char-ci=? x y
-Return @code{#t} iff @var{x} is the same character as @var{y} ignoring
-case, else @code{#f}.
+Return @code{#t} iff the case-folded code point of @var{x} is the same
+as the case-folded code point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char-ci<?
 @deffn {Scheme Procedure} char-ci<? x y
-Return @code{#t} iff @var{x} is less than @var{y} in the @acronym{ASCII} 
sequence
-ignoring case, else @code{#f}.
+Return @code{#t} iff the case-folded code point of @var{x} is less
+than the case-folded code point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char-ci<=?
 @deffn {Scheme Procedure} char-ci<=? x y
-Return @code{#t} iff @var{x} is less than or equal to @var{y} in the
address@hidden sequence ignoring case, else @code{#f}.
+Return @code{#t} iff the case-folded code point of @var{x} is less
+than or equal to the case-folded code point of @var{y}, else
address@hidden
 @end deffn
 
 @rnindex char-ci>?
 @deffn {Scheme Procedure} char-ci>? x y
-Return @code{#t} iff @var{x} is greater than @var{y} in the @acronym{ASCII}
-sequence ignoring case, else @code{#f}.
+Return @code{#t} iff the case-folded code point of @var{x} is greater
+than the case-folded code point of @var{y}, else @code{#f}.
 @end deffn
 
 @rnindex char-ci>=?
 @deffn {Scheme Procedure} char-ci>=? x y
-Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the
address@hidden sequence ignoring case, else @code{#f}.
+Return @code{#t} iff the case-folded code point of @var{x} is greater
+than or equal to the case-folded code point of @var{y}, else
address@hidden
 @end deffn
 
 @rnindex char-alphabetic?
@@ -1946,14 +2023,15 @@ Return @code{#t} iff @var{chr} is either uppercase or 
lowercase, else
 @rnindex char->integer
 @deffn {Scheme Procedure} char->integer chr
 @deffnx {C Function} scm_char_to_integer (chr)
-Return the number corresponding to ordinal position of @var{chr} in the
address@hidden sequence.
+Return the code point of @var{chr}.
 @end deffn
 
 @rnindex integer->char
 @deffn {Scheme Procedure} integer->char n
 @deffnx {C Function} scm_integer_to_char (n)
-Return the character at position @var{n} in the @acronym{ASCII} sequence.
+Return the character that has code point @var{n}.  The integer @var{n}
+must be a valid code point.  Valid code points are in the ranges 0 to
address@hidden inclusive or @code{#xE000} to @code{#x10FFFF} inclusive.
 @end deffn
 
 @rnindex char-upcase
@@ -1981,12 +2059,6 @@ handling them are provided.
 Character sets can be created, extended, tested for the membership of a
 characters and be compared to other character sets.
 
-The Guile implementation of character sets currently deals only with
-8-bit characters.  In the future, when Guile gets support for
-international character sets, this will change, but the functions
-provided here will always then be able to efficiently cope with very
-large character sets.
-
 @menu
 * Character Set Predicates/Comparison::
 * Iterating Over Character Sets::  Enumerate charset elements.
@@ -2185,7 +2257,7 @@ character codes lie in the half-open range
 If @var{error} is a true value, an error is signalled if the
 specified range contains characters which are not contained in
 the implemented character range.  If @var{error} is @code{#f},
-these characters are silently left out of the resultung
+these characters are silently left out of the resulting
 character set.
 
 The characters in @var{base_cs} are added to the result, if
@@ -2201,7 +2273,7 @@ character codes lie in the half-open range
 If @var{error} is a true value, an error is signalled if the
 specified range contains characters which are not contained in
 the implemented character range.  If @var{error} is @code{#f},
-these characters are silently left out of the resultung
+these characters are silently left out of the resulting
 character set.
 
 The characters are added to @var{base_cs} and @var{base_cs} is
@@ -2210,7 +2282,10 @@ returned.
 
 @deffn {Scheme Procedure} ->char-set x
 @deffnx {C Function} scm_to_char_set (x)
-Coerces x into a char-set. @var{x} may be a string, character or char-set. A 
string is converted to the set of its constituent characters; a character is 
converted to a singleton set; a char-set is returned as-is.
+Coerces x into a char-set. @var{x} may be a string, character or
+char-set. A string is converted to the set of its constituent
+characters; a character is converted to a singleton set; a char-set is
+returned as-is.
 @end deffn
 
 @c ===================================================================
@@ -2221,6 +2296,23 @@ Coerces x into a char-set. @var{x} may be a string, 
character or char-set. A str
 Access the elements and other information of a character set with these
 procedures.
 
address@hidden {Scheme Procedure} %char-set-dump cs
+Returns an association list containing debugging information
+for @var{cs}. The association list has the following entries.
address@hidden @code
address@hidden char-set
+The char-set itself
address@hidden len
+The number of groups of contiguous code points the char-set
+contains
address@hidden ranges
+A list of lists where each sublist is a range of code points
+and their associated characters
address@hidden table
+The return value of this function cannot be relied upon to be
+consistent between versions of Guile and should not be used in code.
address@hidden deffn
+
 @deffn {Scheme Procedure} char-set-size cs
 @deffnx {C Function} scm_char_set_size (cs)
 Return the number of elements in character set @var{cs}.
@@ -2302,6 +2394,12 @@ must be a character set.
 Return the complement of the character set @var{cs}.
 @end deffn
 
+Note that the complement of a character set is likely to contain many
+reserved code points (code points that are not associated with
+characters).  It may be helpful to modify the output of
address@hidden by computing its intersection with the set
+of designated code points, @code{char-set:designated}.
+
 @deffn {Scheme Procedure} char-set-union . rest
 @deffnx {C Function} scm_char_set_union (rest)
 Return the union of all argument character sets.
@@ -2371,12 +2469,10 @@ useful, several predefined character set variables 
exist.
 @cindex charset
 @cindex locale
 
-Currently, the contents of these character sets are recomputed upon a
-successful @code{setlocale} call (@pxref{Locales}) in order to reflect
-the characters available in the current locale's codeset.  For
-instance, @code{char-set:letter} contains 52 characters under an ASCII
-locale (e.g., the default @code{C} locale) and 117 characters under an
-ISO-8859-1 (``Latin-1'') locale.
+These character sets are locale independent and are not recomputed
+upon a @code{setlocale} call.  They contain characters from the whole
+range of Unicode code points. For instance, @code{char-set:letter}
+contains about 94,000 characters.
 
 @defvr {Scheme Variable} char-set:lower-case
 @defvrx {C Variable} scm_char_set_lower_case
@@ -2390,13 +2486,16 @@ All upper-case characters.
 
 @defvr {Scheme Variable} char-set:title-case
 @defvrx {C Variable} scm_char_set_title_case
-This is empty, because ASCII has no titlecase characters.
+All single characters that function as if they were an upper-case
+letter followed by a lower-case letter.
 @end defvr
 
 @defvr {Scheme Variable} char-set:letter
 @defvrx {C Variable} scm_char_set_letter
-All letters, e.g. the union of @code{char-set:lower-case} and
address@hidden:upper-case}.
+All letters.  This includes @code{char-set:lower-case},
address@hidden:upper-case}, @code{char-set:title-case}, and many
+letters that have no case at all.  For example, Chinese and Japanese
+characters typically have no concept of case.
 @end defvr
 
 @defvr {Scheme Variable} char-set:digit
@@ -2426,23 +2525,26 @@ All whitespace characters.
 
 @defvr {Scheme Variable} char-set:blank
 @defvrx {C Variable} scm_char_set_blank
-All horizontal whitespace characters, that is @code{#\space} and
address@hidden
+All horizontal whitespace characters, which notably includes
address@hidden and @code{#\tab}.
 @end defvr
 
 @defvr {Scheme Variable} char-set:iso-control
 @defvrx {C Variable} scm_char_set_iso_control
-The ISO control characters with the codes 0--31 and 127.
+The ISO control characters are the C0 control characters (U+0000 to
+U+001F), delete (U+007F), and the C1 control characters (U+0080 to
+U+009F).
 @end defvr
 
 @defvr {Scheme Variable} char-set:punctuation
 @defvrx {C Variable} scm_char_set_punctuation
-The characters @code{!"#%&'()*,-./:;?@@address@hidden@}}
+All punctuation characters, such as the characters
address@hidden"#%&'()*,-./:;?@@address@hidden@}}
 @end defvr
 
 @defvr {Scheme Variable} char-set:symbol
 @defvrx {C Variable} scm_char_set_symbol
-The characters @code{$+<=>^`|~}.
+All symbol characters, such as the characters @code{$+<=>^`|~}.
 @end defvr
 
 @defvr {Scheme Variable} char-set:hex-digit
@@ -2460,9 +2562,17 @@ All ASCII characters.
 The empty character set.
 @end defvr
 
address@hidden {Scheme Variable} char-set:designated
address@hidden {C Variable} scm_char_set_designated
+This character set contains all designated code points.  This includes
+all the code points to which Unicode has assigned a character or other
+meaning.
address@hidden defvr
+
 @defvr {Scheme Variable} char-set:full
 @defvrx {C Variable} scm_char_set_full
-This character set contains all possible characters.
+This character set contains all possible code points.  This includes
+both designated and reserved code points.
 @end defvr
 
 @node Strings
@@ -2490,7 +2600,7 @@ memory.
 
 When one of these two strings is modified, as with @code{string-set!},
 their common memory does get copied so that each string has its own
-memory and modifying one does not accidently modify the other as well.
+memory and modifying one does not accidentally modify the other as well.
 Thus, Guile's strings are `copy on write'; the actual copying of their
 memory is delayed until one string is written to.
 
@@ -2580,6 +2690,14 @@ Vertical tab character (ASCII 11).
 @item @nicode{\xHH}
 Character code given by two hexadecimal digits.  For example
 @nicode{\x7f} for an ASCII DEL (127).
+
address@hidden @nicode{\uHHHH}
+Character code given by four hexadecimal digits.  For example
address@hidden for a capital A with macron (U+0100).
+
address@hidden @nicode{\UHHHHHH}
+Character code given by six hexadecimal digits.  For example
address@hidden
 @end table
 
 @noindent
@@ -2910,7 +3028,7 @@ characters.
 @deffnx {C Function} scm_string_trim (s, char_pred, start, end)
 @deffnx {C Function} scm_string_trim_right (s, char_pred, start, end)
 @deffnx {C Function} scm_string_trim_both (s, char_pred, start, end)
-Trim occurrances of @var{char_pred} from the ends of @var{s}.
+Trim occurrences of @var{char_pred} from the ends of @var{s}.
 
 @code{string-trim} trims @var{char_pred} characters from the left
 (start) of the string, @code{string-trim-right} trims them from the
@@ -3000,9 +3118,14 @@ The procedures in this section are similar to the 
character ordering
 predicates (@pxref{Characters}), but are defined on character sequences.
 
 The first set is specified in R5RS and has names that end in @code{?}.
-The second set is specified in SRFI-13 and the names have no ending
address@hidden  The predicates ending in @code{-ci} ignore the character case
-when comparing strings.  @xref{Text Collation, the @code{(ice-9
+The second set is specified in SRFI-13 and the names have not ending
address@hidden  
+
+The predicates ending in @code{-ci} ignore the character case
+when comparing strings.  For now, case-insensitive comparison is done
+using the R5RS rules, where every lower-case character that has a
+single character upper-case form is converted to uppercase before
+comparison.  See @xref{Text Collation, the @code{(ice-9
 i18n)} module}, for locale-dependent string comparison.
 
 @rnindex string=?
@@ -3192,14 +3315,14 @@ Compute a hash value for @var{S}.  the optional 
argument @var{bound} is a non-ne
 @deffn {Scheme Procedure} string-index s char_pred [start [end]]
 @deffnx {C Function} scm_string_index (s, char_pred, start, end)
 Search through the string @var{s} from left to right, returning
-the index of the first occurence of a character which
+the index of the first occurrence of a character which
 
 @itemize @bullet
 @item
 equals @var{char_pred}, if it is character,
 
 @item
-satisifies the predicate @var{char_pred}, if it is a procedure,
+satisfies the predicate @var{char_pred}, if it is a procedure,
 
 @item
 is in the set @var{char_pred}, if it is a character set.
@@ -3209,14 +3332,14 @@ is in the set @var{char_pred}, if it is a character set.
 @deffn {Scheme Procedure} string-rindex s char_pred [start [end]]
 @deffnx {C Function} scm_string_rindex (s, char_pred, start, end)
 Search through the string @var{s} from right to left, returning
-the index of the last occurence of a character which
+the index of the last occurrence of a character which
 
 @itemize @bullet
 @item
 equals @var{char_pred}, if it is character,
 
 @item
-satisifies the predicate @var{char_pred}, if it is a procedure,
+satisfies the predicate @var{char_pred}, if it is a procedure,
 
 @item
 is in the set if @var{char_pred} is a character set.
@@ -3270,14 +3393,14 @@ Is @var{s1} a suffix of @var{s2}, ignoring character 
case?
 @deffn {Scheme Procedure} string-index-right s char_pred [start [end]]
 @deffnx {C Function} scm_string_index_right (s, char_pred, start, end)
 Search through the string @var{s} from right to left, returning
-the index of the last occurence of a character which
+the index of the last occurrence of a character which
 
 @itemize @bullet
 @item
 equals @var{char_pred}, if it is character,
 
 @item
-satisifies the predicate @var{char_pred}, if it is a procedure,
+satisfies the predicate @var{char_pred}, if it is a procedure,
 
 @item
 is in the set if @var{char_pred} is a character set.
@@ -3287,14 +3410,14 @@ is in the set if @var{char_pred} is a character set.
 @deffn {Scheme Procedure} string-skip s char_pred [start [end]]
 @deffnx {C Function} scm_string_skip (s, char_pred, start, end)
 Search through the string @var{s} from left to right, returning
-the index of the first occurence of a character which
+the index of the first occurrence of a character which
 
 @itemize @bullet
 @item
 does not equal @var{char_pred}, if it is character,
 
 @item
-does not satisify the predicate @var{char_pred}, if it is a
+does not satisfy the predicate @var{char_pred}, if it is a
 procedure,
 
 @item
@@ -3305,7 +3428,7 @@ is not in the set if @var{char_pred} is a character set.
 @deffn {Scheme Procedure} string-skip-right s char_pred [start [end]]
 @deffnx {C Function} scm_string_skip_right (s, char_pred, start, end)
 Search through the string @var{s} from right to left, returning
-the index of the last occurence of a character which
+the index of the last occurrence of a character which
 
 @itemize @bullet
 @item
@@ -3330,7 +3453,7 @@ Return the count of the number of characters in the string
 equals @var{char_pred}, if it is character,
 
 @item
-satisifies the predicate @var{char_pred}, if it is a procedure.
+satisfies the predicate @var{char_pred}, if it is a procedure.
 
 @item
 is in the set @var{char_pred}, if it is a character set.
diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi
index d841215..9fc5ef5 100644
--- a/doc/ref/api-evaluation.texi
+++ b/doc/ref/api-evaluation.texi
@@ -17,6 +17,7 @@ loading, evaluating, and compiling Scheme code at run time.
 * Fly Evaluation::              Procedures for on the fly evaluation.
 * Compilation::                 How to compile Scheme files and procedures.
 * Loading::                     Loading Scheme code from file.
+* Character Encoding of Source Files:: Loading non-ASCII Scheme code from file.
 * Delayed Evaluation::          Postponing evaluation until it is needed.
 * Local Evaluation::            Evaluation in a local environment.
 * Evaluator Behaviour::         Modifying Guile's evaluator.
@@ -229,6 +230,12 @@ Thus a Guile script often starts like this.
 More details on Guile scripting can be found in the scripting section
 (@pxref{Guile Scripting}).
 
+There is one special case where the contents of a comment can actually
+affect the interpretation of code.  When a character encoding
+declaration, such as @code{coding: utf-8} appears in one of the first
+few lines of a source file, it indicates to Guile's default reader
+that this source code file is not ASCII.  For details see @ref{Character
+Encoding of Source Files}.
 
 @node Case Sensitivity
 @subsubsection Case Sensitivity
@@ -590,6 +597,69 @@ a file to load.  By default, @code{%load-extensions} is 
bound to the
 list @code{("" ".scm")}.
 @end defvar
 
address@hidden Character Encoding of Source Files
address@hidden Character Encoding of Source Files
+
address@hidden primitive-load
address@hidden load
+Scheme source code files are usually encoded in ASCII, but, the
+built-in reader can interpret other character encodings.  The
+procedure @code{primitive-load}, and by extension the functions that
+call it, such as @code{load}, first scan the top 500 characters of the
+file for a coding declaration.
+
+A coding declaration has the form @code{coding: XXXXXX}, where
address@hidden is the name of a character encoding in which the source
+code file has been encoded.  The coding declaration must appear in a
+scheme comment.  It can either be a semicolon-initiated comment or a block
address@hidden comment.
+
+The name of the character encoding in the coding declaration is
+typically lower case and containing only letters, numbers, and
+hyphens.  The most common examples of character encodings are
address@hidden and @code{iso-8859-1}.  This allows the coding
+declaration to be compatible with EMACS.
+
+For source code, only a subset of all possible character encodings can
+be interpreted by the built-in source code reader.  Only those
+character encodings in which ASCII text appears unmodified can be
+used.  This includes @code{UTF-8} and @code{ISO-8859-1} through
address@hidden  The multi-byte character encodings @code{UTF-16}
+and @code{UTF-32} may not be used because they are not compatible with
+ASCII.
+
address@hidden read
address@hidden set-port-encoding!
+There might be a scenario in which one would want to read non-ASCII
+code from a port, such as with the function @code{read}, instead of
+with @code{load}.  If the port's character encoding is the same as the
+encoding of the code to be read by the port, not other special
+handling is necessary.  The port will automatically do the character
+encoding conversion.  The functions @code{setlocale} or by
address@hidden are used to set port encodings.
+
+If a port is used to read code of unknown character encoding, it can
+accomplish this in three steps.  First, the character encoding of the
+port should be set to ISO-8859-1 using @code{set-port-encoding!}.
+Then, the procedure @code{file-encoding}, described below, is used to
+scan for a coding declaration when reading from the port.  As a side
+effect, it rewinds the port after its scan is complete. After that,
+the port's character encoding should be set to the encoding returned
+by @code{file-encoding}, if any, again by using
address@hidden  Then the code can be read as normal.
+
address@hidden {Scheme Procedure} file-encoding port
address@hidden {C Function} scm_file_encoding port
+Scans the port for an EMACS-like character coding declaration near the
+top of the contents of a port with random-acessible contents.  The
+coding declaration is of the form @code{coding: XXXXX} and must appear
+in a scheme comment.
+
+Returns a string containing the character encoding of the file
+if a declaration was found, or @code{#f} otherwise.  The port is
+rewound.
address@hidden deffn
+
 
 @node Delayed Evaluation
 @subsection Delayed Evaluation
diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi
index 96cd147..83a2fd7 100644
--- a/doc/ref/api-io.texi
+++ b/doc/ref/api-io.texi
@@ -47,7 +47,7 @@ are two interesting and powerful examples of this technique.
 
 Ports are garbage collected in the usual way (@pxref{Memory
 Management}), and will be closed at that time if not already closed.
-In this case any errors occuring in the close will not be reported.
+In this case any errors occurring in the close will not be reported.
 Usually a program will want to explicitly close so as to be sure all
 its operations have been successful.  Of course if a program has
 abandoned something due to an error or other condition then closing
@@ -70,6 +70,18 @@ All file access uses the ``LFS'' large file support 
functions when
 available, so files bigger than 2 Gbytes (@math{2^31} bytes) can be
 read and written on a 32-bit system.
 
+Each port has an associated character encoding that controls how bytes
+read from the port are converted to characters and string and controls
+how characters and strings written to the port are converted to bytes.
+When ports are created, they inherit their character encoding from the
+current locale, but, that can be modified after the port is created.
+
+Each port also has an associated conversion strategy: what to do when
+a Guile character can't be converted to the port's encoded character
+representation for output. There are three possible strategies: to
+raise an error, to replace the character with a hex escape, or to
+replace the character with a substitute character.
+
 @rnindex input-port?
 @deffn {Scheme Procedure} input-port? x
 @deffnx {C Function} scm_input_port_p (x)
@@ -93,6 +105,55 @@ Equivalent to @code{(or (input-port? @var{x}) (output-port?
 @var{x}))}.
 @end deffn
 
address@hidden {Scheme Procedure} set-port-encoding! port enc
address@hidden {C Function} scm_set_port_encoding_x (port, enc)
+Sets the character encoding that will be used to interpret all port
+I/O.  @var{enc} is a string containing the name of an encoding.  
address@hidden deffn
+
+New ports are created with the encoding appropriate for the current
+locale if @code{setlocale} has been called or ISO-8859-1 otherwise,
+and this procedure can be used to modify that encoding.
+
address@hidden {Scheme Procedure} port-encoding port
address@hidden {C Function} scm_port_encoding
+Returns, as a string, the character encoding that @var{port} uses to
+interpret its input and output.
address@hidden deffn
+
address@hidden {Scheme Procedure} set-port-conversion-strategy! port sym
address@hidden {C Function} scm_set_port_conversion_strategy_x (port, sym)
+Sets the behavior of the interpreter when outputting a character that
+is not representable in the port's current encoding.  @var{sym} can be
+either @code{'error}, @code{'substitute}, or @code{'escape}.  If it is
address@hidden'error}, an error will be thrown when an nonconvertible character
+is encountered.  If it is @code{'substitute}, then nonconvertible
+characters will be replaced with approximate characters, or with
+question marks if no approximately correct character is available.  If
+it is @code{'escape}, it will appear as a hex escape when output.
+
+If @var{port} is an open port, the conversion error behavior
+is set for that port.  If it is @code{#f}, it is set as the
+default behavior for any future ports that get created in
+this thread.
address@hidden deffn
+
address@hidden {Scheme Procedure} port-conversion-strategy port
address@hidden {C Function} scm_port_conversion_strategy (port)
+Returns the behavior of the port when outputting a character that is
+not representable in the port's current encoding.  It returns the
+symbol @code{error} if unrepresentable characters should cause
+exceptions, @code{substitute} if the port should try to replace
+unrepresentable characters with question marks or approximate
+characters, or @code{escape} if unrepresentable characters should be
+converted to string escapes.
+
+If @var{port} is @code{#f}, then the current default behavior will be
+returned.  New ports will have this default behavior when they are
+created.
address@hidden deffn
+
+
 
 @node Reading
 @subsection Reading
@@ -238,7 +299,7 @@ output port if not given.
 
 The output is designed to be machine readable, and can be read back
 with @code{read} (@pxref{Reading}).  Strings are printed in
-doublequotes, with escapes if necessary, and characters are printed in
+double quotes, with escapes if necessary, and characters are printed in
 @samp{#\} notation.
 @end deffn
 
@@ -248,7 +309,7 @@ Send a representation of @var{obj} to @var{port} or to the 
current
 output port if not given.
 
 The output is designed for human readability, it differs from
address@hidden in that strings are printed without doublequotes and
address@hidden in that strings are printed without double quotes and
 escapes, and characters are printed as per @code{write-char}, not in
 @samp{#\} form.
 @end deffn
@@ -496,7 +557,7 @@ used.  This function is equivalent to:
 @end lisp
 @end deffn
 
-Some of the abovementioned I/O functions rely on the following C
+Some of the aforementioned I/O functions rely on the following C
 primitives.  These will mainly be of interest to people hacking Guile
 internals.
 
@@ -815,11 +876,11 @@ Open @var{filename} for output.  Equivalent to
 Open @var{filename} for input or output, and call @code{(@var{proc}
 port)} with the resulting port.  Return the value returned by
 @var{proc}.  @var{filename} is opened as per @code{open-input-file} or
address@hidden respectively, and an error is signalled if it
address@hidden respectively, and an error is signaled if it
 cannot be opened.
 
 When @var{proc} returns, the port is closed.  If @var{proc} does not
-return (eg.@: if it throws an error), then the port might not be
+return (e.g.@: if it throws an error), then the port might not be
 closed automatically, though it will be garbage collected in the usual
 way if not otherwise referenced.
 @end deffn
@@ -834,7 +895,7 @@ setup as respectively the @code{current-input-port},
 @code{current-output-port}, or @code{current-error-port}.  Return the
 value returned by @var{thunk}.  @var{filename} is opened as per
 @code{open-input-file} or @code{open-output-file} respectively, and an
-error is signalled if it cannot be opened.
+error is signaled if it cannot be opened.
 
 When @var{thunk} returns, the port is closed and the previous setting
 of the respective current port is restored.
@@ -891,6 +952,13 @@ Determine whether @var{obj} is a port that is related to a 
file.
 The following allow string ports to be opened by analogy to R4R*
 file port facilities:
 
+With string ports, the port-encoding is treated differently than other
+types of ports.  When string ports are created, they do not inherit a
+character encoding from the current locale.  They are given a
+default locale that allows them to handle all valid string characters.
+Typically one should not modify a string port's character encoding
+away from its default.
+
 @deffn {Scheme Procedure} call-with-output-string proc
 @deffnx {C Function} scm_call_with_output_string (proc)
 Calls the one-argument procedure @var{proc} with a newly created output
@@ -1409,7 +1477,7 @@ is set.
 
 @node Port Implementation
 @subsubsection Port Implementation
address@hidden Port implemenation
address@hidden Port implementation
 
 This section describes how to implement a new port type in C.
 
diff --git a/doc/ref/goops.texi b/doc/ref/goops.texi
index c0a828f..9b0d057 100644
--- a/doc/ref/goops.texi
+++ b/doc/ref/goops.texi
@@ -17,7 +17,7 @@ Guile
 
 @goops{} is the object oriented extension to @guile{}. Its
 implementation is derived from @w{STk-3.99.3} by Erick Gallesio and
-version 1.3 of Gregor Kiczales @cite{Tiny-Clos}.  It is very close in
+version 1.3 of Gregor Kiczales' @cite{Tiny-Clos}.  It is very close in
 spirit to CLOS, the Common Lisp Object System (@cite{CLtL2}) but is
 adapted for the Scheme language.  While GOOPS is not compatible with any
 of these systems, GOOPS contains a compatibility module which allows for
diff --git a/doc/ref/scheme-scripts.texi b/doc/ref/scheme-scripts.texi
index e12eee6..249bc34 100644
--- a/doc/ref/scheme-scripts.texi
+++ b/doc/ref/scheme-scripts.texi
@@ -64,6 +64,12 @@ operating system never reads this far, but Guile treats this 
as the end
 of the comment begun on the first line by the @samp{#!} characters.
 
 @item
+If this source code file is not ASCII or ISO-8859-1 encoded, a coding
+declaration such as @code{coding: utf-8} should appear in a comment
+somewhere in the first five lines of the file: see @ref{Character
+Encoding of Source Files}.
+
address@hidden
 The rest of the file should be a Scheme program.
 
 @end itemize
diff --git a/guile-readline/ice-9/readline.scm 
b/guile-readline/ice-9/readline.scm
index 96af69e..4c852ee 100644
--- a/guile-readline/ice-9/readline.scm
+++ b/guile-readline/ice-9/readline.scm
@@ -24,11 +24,15 @@
 
 
 (define-module (ice-9 readline)
-  :use-module (ice-9 session)
-  :use-module (ice-9 regex)
-  :use-module (ice-9 buffered-input)
-  :no-backtrace
-  :export (filename-completion-function))
+  #:use-module (ice-9 session)
+  #:use-module (ice-9 regex)
+  #:use-module (ice-9 buffered-input)
+  #:no-backtrace
+  #:export (filename-completion-function
+           add-history
+           read-history
+           write-history
+           clear-history))
 
 
 
diff --git a/guile-readline/readline.c b/guile-readline/readline.c
index cbf4051..5f6719d 100644
--- a/guile-readline/readline.c
+++ b/guile-readline/readline.c
@@ -128,6 +128,7 @@ rl_free_line_state ()
 
 static int promptp;
 static SCM input_port;
+static SCM output_port;
 static SCM before_read;
 
 static int
@@ -138,7 +139,7 @@ current_input_getc (FILE *in SCM_UNUSED)
       scm_apply (before_read, SCM_EOL, SCM_EOL);
       promptp = 0;
     }
-  return scm_getc (input_port);
+  return scm_get_byte_or_eof (input_port);
 }
 
 static int in_readline = 0;
@@ -255,7 +256,12 @@ internal_readline (SCM text)
   promptp = 1;
   s = readline (prompt);
   if (s)
-    ret = scm_from_locale_string (s);
+    {
+      scm_t_port *pt = SCM_PTAB_ENTRY (output_port);
+      
+      ret = scm_from_stringn (s, strlen (s), pt->encoding, 
+                              SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
+    }
   else 
     ret = SCM_EOF_VAL;
 
@@ -311,6 +317,7 @@ scm_readline_init_ports (SCM inp, SCM outp)
   }
 
   input_port = inp;
+  output_port = outp;
 #ifndef __MINGW32__
   rl_instream = stream_from_fport (inp, "r", s_scm_readline);
   rl_outstream = stream_from_fport (outp, "w", s_scm_readline);
diff --git a/libguile/chars.c b/libguile/chars.c
index c7cb09c..59ac6f4 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -45,7 +45,8 @@ SCM_DEFINE (scm_char_p, "char?", 1, 0, 0,
 
 SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is the same character as @var{y}, 
else @code{#f}.")
+             "Return @code{#t} if the Unicode code point of @var{x} is equal 
to the\n"
+             "code point of @var{y}, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_eq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -57,8 +58,8 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr, 
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode 
sequence,\n"
-            "else @code{#f}.")
+             "Return @code{#t} iff the code point of @var{x} is less than the 
code\n"
+             "point of @var{y}, else @code{#f}.")
 #define FUNC_NAME s_scm_char_less_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -69,8 +70,8 @@ SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than or equal to @var{y} in 
the\n"
-            "Unicode sequence, else @code{#f}.")
+             "Return @code{#t} if the Unicode code point of @var{x} is less 
than or\n"
+             "equal to the code point of @var{y}, else @code{#f}.")
 #define FUNC_NAME s_scm_char_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -81,8 +82,8 @@ SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
Unicode\n"
-            "sequence, else @code{#f}.")
+             "Return @code{#t} if the Unicode code point of @var{x} is greater 
than\n"
+             "the code point of @var{y}, else @code{#f}.")
 #define FUNC_NAME s_scm_char_gr_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -93,8 +94,8 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than or equal to @var{y} 
in the\n"
-            "Unicode sequence, else @code{#f}.")
+             "Return @code{#t} if the Unicode code point of @var{x} is greater 
than\n"
+             "or equal to the code point of @var{y}, else @code{#f}.")
 #define FUNC_NAME s_scm_char_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -103,10 +104,17 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
 }
 #undef FUNC_NAME
 
+/* FIXME?: R6RS specifies that these comparisons are case-folded.
+   This is the same thing as comparing the uppercase characters in
+   practice, but, not in theory.  Unicode has table containing their
+   definition of case-folded character mappings.  A more correct
+   implementation would be to use that table and make a char-foldcase
+   function.  */
+
 SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is the same character as @var{y} 
ignoring\n"
-            "case, else @code{#f}.  Case is locale free and not context 
sensitive.")
+             "Return @code{#t} if the case-folded Unicode code point of 
@var{x} is\n"
+             "the same as the case-folded code point of @var{y}, else 
@code{#f}.")
 #define FUNC_NAME s_scm_char_ci_eq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -117,9 +125,8 @@ SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
-            "than the Unicode uppercase form @var{y} in the Unicode 
sequence,\n"
-            "else @code{#f}.")
+             "Return @code{#t} if the case-folded Unicode code point of 
@var{x} is\n"
+             "less than the case-folded code point of @var{y}, else 
@code{#f}.")
 #define FUNC_NAME s_scm_char_ci_less_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -130,9 +137,9 @@ SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", 
scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
-            "than or equal to the Unicode uppercase form of @var{y} in the\n"
-            "Unicode  sequence, else @code{#f}.")
+             "Return @code{#t} iff the case-folded Unicodd code point of 
@var{x} is\n"
+             "less than or equal to the case-folded code point of @var{y}, 
else\n"
+             "@code{#f}")
 #define FUNC_NAME s_scm_char_ci_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -143,9 +150,8 @@ SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", 
scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
-            "than the Unicode uppercase form of @var{y} in the Unicode\n"
-            "sequence, else @code{#f}.")
+             "Return @code{#t} iff the case-folded code point of @var{x} is 
greater\n"
+             "than the case-folded code point of @var{y}, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_gr_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -156,9 +162,9 @@ SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
-            "than or equal to the Unicode uppercase form of @var{y} in the\n"
-            "Unicode sequence, else @code{#f}.")
+             "Return @code{#t} iff the case-folded Unicode code point of 
@var{x} is\n"
+             "greater than or equal to the case-folded code point of @var{y}, 
else\n"
+             "@code{#f}.")
 #define FUNC_NAME s_scm_char_ci_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -196,7 +202,6 @@ SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 
0, 0,
 #undef FUNC_NAME
 
 
-
 SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 0, 0, 
            (SCM chr),
            "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
@@ -217,10 +222,10 @@ SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 
0, 0,
 #undef FUNC_NAME
 
 
-
 SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0, 
             (SCM chr),
-           "Return @code{#t} iff @var{chr} is either uppercase or lowercase, 
else @code{#f}.\n")
+           "Return @code{#t} iff @var{chr} is either uppercase or lowercase, 
else\n"
+            "@code{#f}.\n")
 #define FUNC_NAME s_scm_char_is_both_p
 {
   if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
@@ -230,12 +235,9 @@ SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
 #undef FUNC_NAME
 
 
-
-
 SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0, 
             (SCM chr),
-           "Return the number corresponding to ordinal position of @var{chr} 
in the\n"
-           "ASCII sequence.")
+            "Return the Unicode code point of @var{chr}.")
 #define FUNC_NAME s_scm_char_to_integer
 {
   SCM_VALIDATE_CHAR (1, chr);
@@ -244,10 +246,12 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
 #undef FUNC_NAME
 
 
-
 SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0, 
            (SCM n),
-           "Return the character at position @var{n} in the ASCII sequence.")
+            "Return the character that has Unicode code point @var{n}.  The 
integer\n"
+            "@var{n} must be a valid code point.  Valid code points are in 
the\n"
+            "ranges 0 to @code{#xD7FF} inclusive or @code{#xE000} to\n"
+            "@code{#x10FFFF} inclusive.")
 #define FUNC_NAME s_scm_integer_to_char
 {
   scm_t_wchar cn;
diff --git a/libguile/chars.h b/libguile/chars.h
index 85b1673..04eb9f0 100644
--- a/libguile/chars.h
+++ b/libguile/chars.h
@@ -47,10 +47,15 @@ typedef scm_t_int32 scm_t_wchar;
    ? SCM_MAKE_ITAG8 ((scm_t_bits) (unsigned char) (x), scm_tc8_char)    \
    : SCM_MAKE_ITAG8 ((scm_t_bits) (x), scm_tc8_char))
 
+#define SCM_CODEPOINT_DOTTED_CIRCLE (0x25cc)
+#define SCM_CODEPOINT_SURROGATE_START (0xd800)
+#define SCM_CODEPOINT_SURROGATE_END (0xdfff)
 #define SCM_CODEPOINT_MAX (0x10ffff)
 #define SCM_IS_UNICODE_CHAR(c)                                          \
-  ((scm_t_wchar) (c) <= 0xd7ff                                          \
-   || ((scm_t_wchar) (c) >= 0xe000 && (scm_t_wchar) (c) <= SCM_CODEPOINT_MAX))
+  (((scm_t_wchar) (c) >= 0                                              \
+    && (scm_t_wchar) (c) < SCM_CODEPOINT_SURROGATE_START)               \
+   || ((scm_t_wchar) (c) > SCM_CODEPOINT_SURROGATE_END                  \
+       && (scm_t_wchar) (c) <= SCM_CODEPOINT_MAX))
 
 
 
diff --git a/libguile/gc.h b/libguile/gc.h
index 40dab2f..1f03a78 100644
--- a/libguile/gc.h
+++ b/libguile/gc.h
@@ -123,7 +123,7 @@ typedef struct scm_t_cell
 SCM_API int scm_debug_cell_accesses_p;
 SCM_API int scm_expensive_debug_cell_accesses_p;
 SCM_API int scm_debug_cells_gc_interval ;
-void scm_i_expensive_validation_check (SCM cell);
+SCM_API void scm_i_expensive_validation_check (SCM cell);
 #endif
 
 SCM_INTERNAL scm_i_pthread_mutex_t scm_i_gc_admin_mutex;
diff --git a/libguile/inet_aton.c b/libguile/inet_aton.c
index ebef71f..c89378b 100644
--- a/libguile/inet_aton.c
+++ b/libguile/inet_aton.c
@@ -103,14 +103,14 @@ inet_aton(const char *cp_arg, struct in_addr *addr)
                                base = 8;
                }
                while ((c = *cp) != '\0') {
-                       if (isascii(c) && isdigit(c)) {
+                        if (isascii ((int) c) && isdigit ((int) c)) {
                                val = (val * base) + (c - '0');
                                cp++;
                                continue;
                        }
-                       if (base == 16 && isascii(c) && isxdigit(c)) {
+                        if (base == 16 && isascii ((int) c) && isxdigit ((int) 
c)) {
                                val = (val << 4) + 
-                                       (c + 10 - (islower(c) ? 'a' : 'A'));
+                                  (c + 10 - (islower((int) c) ? 'a' : 'A'));
                                cp++;
                                continue;
                        }
@@ -132,7 +132,7 @@ inet_aton(const char *cp_arg, struct in_addr *addr)
        /*
         * Check for trailing characters.
         */
-       if (*cp && (!isascii(*cp) || !isspace(*cp)))
+       if (*cp && (!isascii ((int) (*cp)) || !isspace ((int) (*cp))))
                return (0);
        /*
         * Concoct the address according to
diff --git a/libguile/load.c b/libguile/load.c
index fa25b0f..f2c06f0 100644
--- a/libguile/load.c
+++ b/libguile/load.c
@@ -752,7 +752,7 @@ SCM_DEFINE (scm_primitive_load_path, "primitive-load-path", 
1, 1, 0,
           scm_puts (";;; found fresh local cache at ", scm_current_error_port 
());
           scm_display (fallback, scm_current_error_port ());
           scm_newline (scm_current_error_port ());
-          return scm_load_compiled_with_vm (compiled_filename);
+          return scm_load_compiled_with_vm (fallback);
         }
     }
 
diff --git a/libguile/ports.c b/libguile/ports.c
index e3d2b0d..58c7cd0 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1358,7 +1358,7 @@ scm_c_read (SCM port, void *buffer, size_t size)
      requested number of bytes.  (Note that a single scm_fill_input
      call does not guarantee to fill the whole of the port's read
      buffer.) */
-  if (pt->read_buf_size <= 1)
+  if (pt->read_buf_size <= 1 && pt->encoding == NULL)
     {
       /* The port that we are reading from is unbuffered - i.e. does
         not have its own persistent buffer - but we have a buffer,
@@ -1370,7 +1370,14 @@ scm_c_read (SCM port, void *buffer, size_t size)
         We need to make sure that the port's normal (1 byte) buffer
         is reinstated in case one of the scm_fill_input () calls
         throws an exception; we use the scm_dynwind_* API to achieve
-        that. */
+        that. 
+
+         A consequence of this optimization is that the fill_input
+         functions can't unget characters.  That'll push data to the
+         pushback buffer instead of this psb buffer.  */
+#if SCM_DEBUG == 1
+      unsigned char *pback = pt->putback_buf;
+#endif      
       psb.pt = pt;
       psb.buffer = buffer;
       psb.size = size;
@@ -1385,8 +1392,15 @@ scm_c_read (SCM port, void *buffer, size_t size)
          pt->read_buf_size -= (pt->read_end - pt->read_pos);
          pt->read_pos = pt->read_buf = pt->read_end;
        }
+#if SCM_DEBUG == 1
+      if (pback != pt->putback_buf 
+          || pt->read_buf - (unsigned char *) buffer < 0)
+        scm_misc_error (FUNC_NAME, 
+                        "scm_c_read must not call a fill function that pushes "
+                        "back characters onto an unbuffered port", SCM_EOL);
+#endif      
       n_read += pt->read_buf - (unsigned char *) buffer;
-
+      
       /* Reinstate the port's normal buffer. */
       scm_dynwind_end ();
     }
diff --git a/libguile/print.c b/libguile/print.c
index 3bb6cb1..1dc97c2 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -463,13 +463,26 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
                 /* Print the character if is graphic character.  */
                 {
                   scm_t_wchar *wbuf;
-                  SCM wstr = scm_i_make_wide_string (1, &wbuf);
+                  SCM wstr;
                   char *buf;
                   size_t len;
                   const char *enc;
 
                   enc = scm_i_get_port_encoding (port);
-                  wbuf[0] = i;
+                  if (uc_combining_class (i) == UC_CCC_NR)
+                    {
+                      wstr = scm_i_make_wide_string (1, &wbuf);
+                      wbuf[0] = i;
+                    }
+                  else
+                    {
+                      /* Character is a combining character: print it connected
+                         to a dotted circle instead of connecting it to the 
+                         backslash in '#\'  */
+                      wstr = scm_i_make_wide_string (2, &wbuf);
+                      wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
+                      wbuf[1] = i;
+                    }
                   if (enc == NULL)
                     {
                       if (i <= 0xFF)
@@ -1220,8 +1233,8 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0,
 
   SCM_VALIDATE_CHAR (1, chr);
   SCM_VALIDATE_OPORT_VALUE (2, port);
-
-  scm_putc ((int) SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
+  
+  scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
 #if 0
 #ifdef HAVE_PIPE
 # ifdef EPIPE
diff --git a/libguile/read.c b/libguile/read.c
index d91c868..07c8d71 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -844,16 +844,25 @@ scm_read_character (scm_t_wchar chr, SCM port)
     return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
 
   cp = scm_i_string_ref (charname, 0);
+  if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
+    return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
+
   if (cp >= '0' && cp < '8')
     {
       /* Dirk:FIXME::  This type of character syntax is not R5RS
        * compliant.  Further, it should be verified that the constant
-       * does only consist of octal digits.  Finally, it should be
-       * checked whether the resulting fixnum is in the range of
-       * characters.  */
+       * does only consist of octal digits.  */
       SCM p = scm_string_to_number (charname, scm_from_uint (8));
       if (SCM_I_INUMP (p))
-       return SCM_MAKE_CHAR (SCM_I_INUM (p));
+        {
+          scm_t_wchar c = SCM_I_INUM (p);
+          if (SCM_IS_UNICODE_CHAR (c))
+            return SCM_MAKE_CHAR (c);
+          else
+            scm_i_input_error (FUNC_NAME, port, 
+                               "out-of-range octal character escape: ~a",
+                               scm_list_1 (charname));
+        }
     }
 
   /* The names of characters should never have non-Latin1
@@ -1437,7 +1446,9 @@ scm_scan_for_encoding (SCM port)
   /* grab the next token */
   i = 0;
   while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE 
-        && (isalnum(pos[i]) || pos[i] == '_' || pos[i] == '-' || pos[i] == 
'.'))
+         && pos + i - header < bytes_read
+        && (isalnum((int) pos[i]) || pos[i] == '_' || pos[i] == '-' 
+             || pos[i] == '.'))
     i++;
 
   if (i == 0)
diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c
index 7c00131..76e776c 100644
--- a/libguile/srfi-14.c
+++ b/libguile/srfi-14.c
@@ -29,10 +29,23 @@
 #include "libguile.h"
 #include "libguile/srfi-14.h"
 #include "libguile/strings.h"
+#include "libguile/chars.h"
 
 /* Include the pre-computed standard charset data.  */
 #include "libguile/srfi-14.i.c"
 
+scm_t_char_range cs_full_ranges[] = {
+  {0x0000, SCM_CODEPOINT_SURROGATE_START - 1}
+  ,
+  {SCM_CODEPOINT_SURROGATE_END + 1, SCM_CODEPOINT_MAX}
+};
+
+scm_t_char_set cs_full = {
+  2,
+  cs_full_ranges
+};
+
+
 #define SCM_CHARSET_DATA(charset) ((scm_t_char_set *) SCM_SMOB_DATA (charset))
 
 #define SCM_CHARSET_SET(cs, idx)                        \
@@ -85,18 +98,11 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
           /* This char is one below the current range. */
           if (i > 0 && cs->ranges[i - 1].hi + 1 == n)
             {
-              /* It is also one above the previous range, so combine them.  */
-              cs->ranges[i - 1].hi = cs->ranges[i].hi;
-              if (i < len - 1)
-                memmove (cs->ranges + i, cs->ranges + (i + 1),
-                         sizeof (scm_t_char_range) * (len - i - 1));
-              cs->ranges = scm_gc_realloc (cs->ranges,
-                                           sizeof (scm_t_char_range) * len,
-                                           sizeof (scm_t_char_range) * (len -
-                                                                        1),
-                                           "character-set");
-              cs->len = len - 1;
-              return;
+              /* It is also one above the previous range.  */
+              /* This is an impossible condition: in the previous
+                 iteration, the test for 'one above the current range'
+                 should already have inserted the character here.  */
+              abort ();
             }
           else
             {
@@ -167,6 +173,103 @@ scm_i_charset_set (scm_t_char_set *cs, scm_t_wchar n)
   return;
 }
 
+/* Put LO to HI inclusive into charset CS.  */
+static void
+scm_i_charset_set_range (scm_t_char_set *cs, scm_t_wchar lo, scm_t_wchar hi)
+{
+  size_t i;
+
+  i = 0;
+  while (i < cs->len)
+    {
+      /* Already in this range  */
+      if (cs->ranges[i].lo <= lo && cs->ranges[i].hi >= hi)
+        return;
+
+      /* cur:       +---+
+         new: +---+
+      */
+      if (cs->ranges[i].lo - 1 > hi)
+        {
+          /* Add a new range below the current one.  */
+          cs->ranges = scm_gc_realloc (cs->ranges,
+                                       sizeof (scm_t_char_range) * cs->len,
+                                       sizeof (scm_t_char_range) * (cs->len + 
1),
+                                       "character-set");
+          memmove (cs->ranges + (i + 1), cs->ranges + i,
+                   sizeof (scm_t_char_range) * (cs->len - i));
+          cs->ranges[i].lo = lo;
+          cs->ranges[i].hi = hi;
+          cs->len += 1;
+          return;
+        }
+
+      /* cur:      +---+  or     +---+  or    +---+
+         new: +---+          +---+         +---+
+      */
+      if (cs->ranges[i].lo > lo
+          && (cs->ranges[i].lo - 1 <= hi && cs->ranges[i].hi >= hi))
+        {
+          cs->ranges[i].lo = lo;
+          return;
+        }
+
+      /* cur: +---+    or +---+     or +---+
+         new:   +---+         +---+         +---+
+      */
+      else if (cs->ranges[i].hi + 1 >= lo && cs->ranges[i].hi < hi)
+        {
+          if (cs->ranges[i].lo > lo)
+            cs->ranges[i].lo = lo;
+          if (cs->ranges[i].hi < hi)
+            cs->ranges[i].hi = hi;
+          while (i < cs->len - 1)
+            {
+              /* cur: --+    +---+
+                 new: -----+
+              */
+              if (cs->ranges[i + 1].lo - 1 > hi)
+                break;
+              
+              /* cur: --+   +---+  or  --+  +---+  or --+ +--+
+                 new: -----+           ------+        ---------+
+              */
+              /* Combine this range with the previous one.  */
+              if (cs->ranges[i + 1].hi > hi)
+                cs->ranges[i].hi = cs->ranges[i + 1].hi;
+              if (i + 1 < cs->len)
+                memmove (cs->ranges + i + 1, cs->ranges + i + 2,
+                         sizeof (scm_t_char_range) * (cs->len - i - 2));
+              cs->ranges = scm_gc_realloc (cs->ranges,
+                                           sizeof (scm_t_char_range) * cs->len,
+                                           sizeof (scm_t_char_range) * 
(cs->len - 1),
+                                           "character-set");
+              cs->len -= 1;
+            }
+          return;
+        }
+      i ++;
+    }
+
+  /* This is a new range above all previous ranges.  */
+  if (cs->len == 0)
+    {
+      cs->ranges = scm_gc_malloc (sizeof (scm_t_char_range), "character-set");
+    }
+  else
+    {
+      cs->ranges = scm_gc_realloc (cs->ranges,
+                                   sizeof (scm_t_char_range) * cs->len,
+                                   sizeof (scm_t_char_range) * (cs->len + 1),
+                                   "character-set");
+    }
+  cs->len += 1;
+  cs->ranges[cs->len - 1].lo = lo;
+  cs->ranges[cs->len - 1].hi = hi;
+
+  return;
+}
+
 /* If N is in charset CS, remove it.  */
 void
 scm_i_charset_unset (scm_t_char_set *cs, scm_t_wchar n)
@@ -301,7 +404,7 @@ static void
 charsets_union (scm_t_char_set *a, scm_t_char_set *b)
 {
   size_t i = 0;
-  scm_t_wchar blo, bhi, n;
+  scm_t_wchar blo, bhi;
 
   if (b->len == 0)
     return;
@@ -315,13 +418,11 @@ charsets_union (scm_t_char_set *a, scm_t_char_set *b)
       return;
     }
 
-  /* This needs optimization.  */
   while (i < b->len)
     {
       blo = b->ranges[i].lo;
       bhi = b->ranges[i].hi;
-      for (n = blo; n <= bhi; n++)
-        scm_i_charset_set (a, n);
+      scm_i_charset_set_range (a, blo, bhi);
 
       i++;
     }
@@ -373,22 +474,35 @@ charsets_intersection (scm_t_char_set *a, scm_t_char_set 
*b)
   return;
 }
 
+#define SCM_ADD_RANGE(low, high)                        \
+  do {                                                  \
+    p->ranges[idx].lo = (low);                          \
+    p->ranges[idx++].hi = (high);                       \
+  } while (0)
+#define SCM_ADD_RANGE_SKIP_SURROGATES(low, high)                  \
+  do {                                                            \
+    p->ranges[idx].lo = (low);                                    \
+    p->ranges[idx++].hi = SCM_CODEPOINT_SURROGATE_START - 1;      \
+    p->ranges[idx].lo = SCM_CODEPOINT_SURROGATE_END + 1;          \
+    p->ranges[idx++].hi = (high);                                 \
+  } while (0)
+
+
+
 /* Make P the compelement of Q.  */
 static void
 charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
 {
   int k, idx;
 
+  idx = 0;
   if (q->len == 0)
     {
       /* Fill with all valid codepoints.  */
       p->len = 2;
       p->ranges = scm_gc_malloc (sizeof (scm_t_char_range) * 2,
                                  "character-set");
-      p->ranges[0].lo = 0;
-      p->ranges[0].hi = 0xd7ff;
-      p->ranges[1].lo = 0xe000;
-      p->ranges[1].hi = SCM_CODEPOINT_MAX;
+      SCM_ADD_RANGE_SKIP_SURROGATES (0, SCM_CODEPOINT_MAX);
       return;
     }
 
@@ -396,33 +510,42 @@ charsets_complement (scm_t_char_set *p, scm_t_char_set *q)
     scm_gc_free (p->ranges, sizeof (scm_t_char_set) * p->len,
                  "character-set");
 
+  /* Count the number of ranges needed for the output.  */
   p->len = 0;
   if (q->ranges[0].lo > 0)
     p->len++;
   if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
     p->len++;
-  p->len += q->len - 1;
+  p->len += q->len;
   p->ranges =
     (scm_t_char_range *) scm_gc_malloc (sizeof (scm_t_char_range) * p->len,
                                         "character-set");
-  idx = 0;
   if (q->ranges[0].lo > 0)
     {
-      p->ranges[idx].lo = 0;
-      p->ranges[idx++].hi = q->ranges[0].lo - 1;
+      if (q->ranges[0].lo > SCM_CODEPOINT_SURROGATE_END)
+        SCM_ADD_RANGE_SKIP_SURROGATES (0, q->ranges[0].lo - 1);
+      else
+        SCM_ADD_RANGE (0, q->ranges[0].lo - 1);
     }
   for (k = 1; k < q->len; k++)
     {
-      p->ranges[idx].lo = q->ranges[k - 1].hi + 1;
-      p->ranges[idx++].hi = q->ranges[k].lo - 1;
+      if (q->ranges[k - 1].hi < SCM_CODEPOINT_SURROGATE_START
+          && q->ranges[k].lo - 1 > SCM_CODEPOINT_SURROGATE_END)
+        SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[k - 1].hi + 1, 
q->ranges[k].lo - 1);
+      else
+        SCM_ADD_RANGE (q->ranges[k - 1].hi + 1, q->ranges[k].lo - 1);
     }
   if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_MAX)
     {
-      p->ranges[idx].lo = q->ranges[q->len - 1].hi + 1;
-      p->ranges[idx].hi = SCM_CODEPOINT_MAX;
+      if (q->ranges[q->len - 1].hi < SCM_CODEPOINT_SURROGATE_START)
+        SCM_ADD_RANGE_SKIP_SURROGATES (q->ranges[q->len - 1].hi + 1, 
SCM_CODEPOINT_MAX);
+      else
+        SCM_ADD_RANGE (q->ranges[q->len - 1].hi + 1, SCM_CODEPOINT_MAX);
     }
   return;
 }
+#undef SCM_ADD_RANGE
+#undef SCM_ADD_RANGE_SKIP_SURROGATES
 
 /* Replace A with elements only found in one of A or B.  */
 static void
@@ -1161,7 +1284,7 @@ SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 3, 
0, 0,
   for (k = 0; k < p->len; k++)
     for (n = p->ranges[k].lo; n <= p->ranges[k].hi; n++)
       {
-        SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (k));
+        SCM res = scm_call_1 (pred, SCM_MAKE_CHAR (n));
 
         if (scm_is_true (res))
           SCM_CHARSET_SET (base_cs, n);
@@ -1171,27 +1294,18 @@ SCM_DEFINE (scm_char_set_filter_x, "char-set-filter!", 
3, 0, 0,
 #undef FUNC_NAME
 
 
-SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
-           (SCM lower, SCM upper, SCM error, SCM base_cs),
-           "Return a character set containing all characters whose\n"
-           "character codes lie in the half-open range\n"
-           "address@hidden,@var{upper}).\n"
-           "\n"
-           "If @var{error} is a true value, an error is signalled if the\n"
-           "specified range contains characters which are not contained in\n"
-           "the implemented character range.  If @var{error} is @code{#f},\n"
-           "these characters are silently left out of the resultung\n"
-           "character set.\n"
-           "\n"
-           "The characters in @var{base_cs} are added to the result, if\n"
-           "given.")
-#define FUNC_NAME s_scm_ucs_range_to_char_set
+/* Return a character set containing all the characters from [LOWER,UPPER),
+   giving range errors if ERROR, adding chars from BASE_CS, and recycling
+   BASE_CS if REUSE is true.  */
+static SCM
+scm_i_ucs_range_to_char_set (const char *FUNC_NAME, SCM lower, SCM upper, 
+                             SCM error, SCM base_cs, int reuse)
 {
   SCM cs;
   size_t clower, cupper;
 
   clower = scm_to_size_t (lower);
-  cupper = scm_to_size_t (upper);
+  cupper = scm_to_size_t (upper) - 1;
   SCM_ASSERT_RANGE (2, upper, cupper >= clower);
   if (!SCM_UNBNDP (error))
     {
@@ -1199,28 +1313,66 @@ SCM_DEFINE (scm_ucs_range_to_char_set, 
"ucs-range->char-set", 2, 2, 0,
         {
           SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
           SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
+          if (clower < SCM_CODEPOINT_SURROGATE_START 
+              && cupper > SCM_CODEPOINT_SURROGATE_END)
+            scm_error(scm_out_of_range_key,
+                      FUNC_NAME, "invalid range - contains surrogate 
characters: ~S to ~S",
+                      scm_list_2 (lower, upper), scm_list_1 (upper));
         }
     }
-  if (clower > 0x10FFFF)
-    clower = 0x10FFFF;
-  if (cupper > 0x10FFFF)
-    cupper = 0x10FFFF;
+
   if (SCM_UNBNDP (base_cs))
     cs = make_char_set (FUNC_NAME);
   else
     {
       SCM_VALIDATE_SMOB (4, base_cs, charset);
-      cs = scm_char_set_copy (base_cs);
+      if (reuse)
+        cs = base_cs;
+      else
+        cs = scm_char_set_copy (base_cs);
     }
-  /* It not be difficult to write a more optimized version of the
-     following.  */
-  while (clower < cupper)
+
+  if ((clower >= SCM_CODEPOINT_SURROGATE_START && clower <= 
SCM_CODEPOINT_SURROGATE_END)
+      && (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= 
SCM_CODEPOINT_SURROGATE_END))
+    return cs;
+
+  if (clower > SCM_CODEPOINT_MAX)
+    clower = SCM_CODEPOINT_MAX;
+  if (clower >= SCM_CODEPOINT_SURROGATE_START  && clower <= 
SCM_CODEPOINT_SURROGATE_END)
+    clower = SCM_CODEPOINT_SURROGATE_END + 1;
+  if (cupper > SCM_CODEPOINT_MAX)
+    cupper = SCM_CODEPOINT_MAX;
+  if (cupper >= SCM_CODEPOINT_SURROGATE_START && cupper <= 
SCM_CODEPOINT_SURROGATE_END)
+    cupper = SCM_CODEPOINT_SURROGATE_START - 1;
+  if (clower < SCM_CODEPOINT_SURROGATE_START && cupper > 
SCM_CODEPOINT_SURROGATE_END)
     {
-      SCM_CHARSET_SET (cs, clower);
-      clower++;
+      scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, 
SCM_CODEPOINT_SURROGATE_START - 1);
+      scm_i_charset_set_range (SCM_CHARSET_DATA (cs), 
SCM_CODEPOINT_SURROGATE_END + 1, cupper);
     }
+  else
+    scm_i_charset_set_range (SCM_CHARSET_DATA (cs), clower, cupper);
   return cs;
 }
+
+SCM_DEFINE (scm_ucs_range_to_char_set, "ucs-range->char-set", 2, 2, 0,
+           (SCM lower, SCM upper, SCM error, SCM base_cs),
+           "Return a character set containing all characters whose\n"
+           "character codes lie in the half-open range\n"
+           "address@hidden,@var{upper}).\n"
+           "\n"
+           "If @var{error} is a true value, an error is signalled if the\n"
+           "specified range contains characters which are not valid\n"
+           "Unicode code points.  If @var{error} is @code{#f},\n"
+           "these characters are silently left out of the resultung\n"
+           "character set.\n"
+           "\n"
+           "The characters in @var{base_cs} are added to the result, if\n"
+           "given.")
+#define FUNC_NAME s_scm_ucs_range_to_char_set
+{
+  return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, 
+                                      error, base_cs, 0);
+}
 #undef FUNC_NAME
 
 
@@ -1240,28 +1392,9 @@ SCM_DEFINE (scm_ucs_range_to_char_set_x, 
"ucs-range->char-set!", 4, 0, 0,
            "returned.")
 #define FUNC_NAME s_scm_ucs_range_to_char_set_x
 {
-  size_t clower, cupper;
-
-  clower = scm_to_size_t (lower);
-  cupper = scm_to_size_t (upper);
-  SCM_ASSERT_RANGE (2, upper, cupper >= clower);
-  if (scm_is_true (error))
-    {
-      SCM_ASSERT_RANGE (1, lower, SCM_IS_UNICODE_CHAR (clower));
-      SCM_ASSERT_RANGE (2, upper, SCM_IS_UNICODE_CHAR (cupper));
-    }
-  if (clower > SCM_CODEPOINT_MAX)
-    clower = SCM_CODEPOINT_MAX;
-  if (cupper > SCM_CODEPOINT_MAX)
-    cupper = SCM_CODEPOINT_MAX;
-
-  while (clower < cupper)
-    {
-      if (SCM_IS_UNICODE_CHAR (clower))
-        SCM_CHARSET_SET (base_cs, clower);
-      clower++;
-    }
-  return base_cs;
+  SCM_VALIDATE_SMOB (4, base_cs, charset);  
+  return scm_i_ucs_range_to_char_set (FUNC_NAME, lower, upper, 
+                                      error, base_cs, 1);
 }
 #undef FUNC_NAME
 
@@ -1455,7 +1588,9 @@ SCM_DEFINE (scm_char_set_any, "char-set-any", 2, 0, 0,
   SCM_VALIDATE_PROC (1, pred);
   SCM_VALIDATE_SMOB (2, cs, charset);
 
-  cs_data = (scm_t_char_set *) cs;
+  cs_data = SCM_CHARSET_DATA (cs);
+  if (cs_data->len == 0)
+    return SCM_BOOL_T;
 
   for (k = 0; k < cs_data->len; k++)
     for (n = cs_data->ranges[k].lo; n <= cs_data->ranges[k].hi; n++)
@@ -1819,7 +1954,8 @@ SCM_DEFINE (scm_char_set_xor_x, "char-set-xor!", 1, 0, 1,
      (char-set-xor a a a) -> char set #\a
      (char-set-xor! a a a) -> char set #\a
    */
-  return scm_char_set_xor (scm_cons (cs1, rest));
+  cs1 = scm_char_set_xor (scm_cons (cs1, rest));
+  return cs1;
 }
 #undef FUNC_NAME
 
@@ -1862,6 +1998,7 @@ SCM scm_char_set_hex_digit;
 SCM scm_char_set_blank;
 SCM scm_char_set_ascii;
 SCM scm_char_set_empty;
+SCM scm_char_set_designated;
 SCM scm_char_set_full;
 
 
@@ -1876,31 +2013,59 @@ define_charset (const char *name, const scm_t_char_set 
*p)
   return scm_permanent_object (cs);
 }
 
-#ifdef SCM_CHARSET_DEBUG
-SCM_DEFINE (scm_debug_char_set, "debug-char-set", 1, 0, 0,
-            (SCM charset),
-            "Print out the internal C structure of @var{charset}.\n")
-#define FUNC_NAME s_scm_debug_char_set
+SCM_DEFINE (scm_sys_char_set_dump, "%char-set-dump", 1, 0, 0, (SCM charset), 
+            "Returns an association list containing debugging information\n"
+            "for @var{charset}. The association list has the following 
entries."
+            "@table @code\n"
+            "@item char-set\n"
+            "The char-set itself.\n"
+            "@item len\n"
+            "The number of character ranges the char-set contains\n"
+            "@item ranges\n"
+            "A list of lists where each sublist a range of code points\n"
+            "and their associated characters"
+            "@end table")
+#define FUNC_NAME s_scm_sys_char_set_dump
 {
-  int i;
-  scm_t_char_set *cs = SCM_CHARSET_DATA (charset);
-  fprintf (stderr, "cs %p\n", cs);
-  fprintf (stderr, "len %d\n", cs->len);
-  fprintf (stderr, "arr %p\n", cs->ranges);
+  SCM e1, e2, e3;
+  SCM ranges = SCM_EOL, elt;
+  size_t i;
+  scm_t_char_set *cs;
+  char codepoint_string_lo[9], codepoint_string_hi[9];
+
+  SCM_VALIDATE_SMOB (1, charset, charset);
+  cs = SCM_CHARSET_DATA (charset);
+
+  e1 = scm_cons (scm_from_locale_symbol ("char-set"),
+                 charset);
+  e2 = scm_cons (scm_from_locale_symbol ("n"),
+                 scm_from_size_t (cs->len));
+
   for (i = 0; i < cs->len; i++)
     {
-      if (cs->ranges[i].lo == cs->ranges[i].hi)
-        fprintf (stderr, "%04x\n", cs->ranges[i].lo);
+      if (cs->ranges[i].lo > 0xFFFF)
+        sprintf (codepoint_string_lo, "U+%06x", cs->ranges[i].lo);
       else
-        fprintf (stderr, "%04x..%04x\t[%d]\n",
-                 cs->ranges[i].lo,
-                 cs->ranges[i].hi, cs->ranges[i].hi - cs->ranges[i].lo + 1);
+        sprintf (codepoint_string_lo, "U+%04x", cs->ranges[i].lo);
+      if (cs->ranges[i].hi > 0xFFFF)
+        sprintf (codepoint_string_hi, "U+%06x", cs->ranges[i].hi);
+      else
+        sprintf (codepoint_string_hi, "U+%04x", cs->ranges[i].hi);
+
+      elt = scm_list_4 (SCM_MAKE_CHAR (cs->ranges[i].lo),
+                            SCM_MAKE_CHAR (cs->ranges[i].hi),
+                            scm_from_locale_string (codepoint_string_lo),
+                            scm_from_locale_string (codepoint_string_hi));
+      ranges = scm_append (scm_list_2 (ranges,
+                                       scm_list_1 (elt)));
     }
-  printf ("\n");
-  return SCM_UNSPECIFIED;
+  e3 = scm_cons (scm_from_locale_symbol ("ranges"),
+                 ranges);
+
+  return scm_list_3 (e1, e2, e3);
 }
 #undef FUNC_NAME
-#endif /* SCM_CHARSET_DEBUG */
+
 
 
 
@@ -1937,6 +2102,7 @@ scm_init_srfi_14 (void)
   scm_char_set_blank = define_charset ("char-set:blank", &cs_blank);
   scm_char_set_ascii = define_charset ("char-set:ascii", &cs_ascii);
   scm_char_set_empty = define_charset ("char-set:empty", &cs_empty);
+  scm_char_set_designated = define_charset ("char-set:designated", 
&cs_designated);
   scm_char_set_full = define_charset ("char-set:full", &cs_full);
 
 #include "libguile/srfi-14.x"
diff --git a/libguile/srfi-14.h b/libguile/srfi-14.h
index 1b9c295..4b1a4b2 100644
--- a/libguile/srfi-14.h
+++ b/libguile/srfi-14.h
@@ -100,9 +100,7 @@ SCM_API SCM scm_char_set_intersection_x (SCM cs1, SCM rest);
 SCM_API SCM scm_char_set_difference_x (SCM cs1, SCM rest);
 SCM_API SCM scm_char_set_xor_x (SCM cs1, SCM rest);
 SCM_API SCM scm_char_set_diff_plus_intersection_x (SCM cs1, SCM cs2, SCM rest);
-#if SCM_CHARSET_DEBUG
-SCM_API SCM scm_debug_char_set (SCM cs);
-#endif /* SCM_CHARSET_DEBUG */
+SCM_API SCM scm_sys_char_set_dump (SCM charset);
 
 SCM_API SCM scm_char_set_lower_case;
 SCM_API SCM scm_char_set_upper_case;
diff --git a/libguile/srfi-14.i.c b/libguile/srfi-14.i.c
index 5ef21f3..fd537da 100644
--- a/libguile/srfi-14.i.c
+++ b/libguile/srfi-14.i.c
@@ -2,7 +2,8 @@
 
 /* This file is #include'd by srfi-14.c.  */
 
-/* This file was generated from 
http://unicode.org/Public/UNIDATA/UnicodeData.txt
+/* This file was generated from
+   http://unicode.org/Public/UNIDATA/UnicodeData.txt
    with the unidata_to_charset.pl script.  */
 
 scm_t_char_range cs_lower_case_ranges[] = {
@@ -6252,7 +6253,7 @@ scm_t_char_set cs_empty = {
   cs_empty_ranges
 };
 
-scm_t_char_range cs_full_ranges[] = {
+scm_t_char_range cs_designated_ranges[] = {
   {0x0000, 0x0377}
   ,
   {0x037a, 0x037e}
@@ -6925,7 +6926,7 @@ scm_t_char_range cs_full_ranges[] = {
   ,
   {0xac00, 0xd7a3}
   ,
-  {0xd800, 0xfa2d}
+  {0xe000, 0xfa2d}
   ,
   {0xfa30, 0xfa6a}
   ,
@@ -7144,7 +7145,7 @@ scm_t_char_range cs_full_ranges[] = {
   {0x100000, 0x10fffd}
 };
 
-scm_t_char_set cs_full = {
+scm_t_char_set cs_designated = {
   445,
-  cs_full_ranges
+  cs_designated_ranges
 };
diff --git a/libguile/strings.c b/libguile/strings.c
index 5199981..c7f09db 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1374,7 +1374,7 @@ scm_is_string (SCM obj)
   return IS_STRING (obj);
 }
 
-static SCM
+SCM
 scm_from_stringn (const char *str, size_t len, const char *encoding,
                   scm_t_string_failed_conversion_handler handler)
 {
@@ -1383,6 +1383,9 @@ scm_from_stringn (const char *str, size_t len, const char 
*encoding,
   int wide = 0;
   SCM res;
 
+  if (len == 0)
+    return scm_nullstr;
+
   if (encoding == NULL)
     {
       /* If encoding is null, use Latin-1.  */
@@ -1570,7 +1573,7 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t 
*lenp)
 }
 
 char *
-scm_to_locale_stringn (SCM str, size_t * lenp)
+scm_to_locale_stringn (SCM str, size_t *lenp)
 {
   SCM outport;
   scm_t_port *pt;
@@ -1677,6 +1680,8 @@ scm_to_stringn (SCM str, size_t *lenp, const char 
*encoding,
                           scm_list_2 (scm_from_locale_string (enc),
                                       str));
         }
+      if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+        unistring_escapes_to_guile_escapes (&buf, &len);
     }
   if (lenp)
     *lenp = len;
diff --git a/libguile/strings.h b/libguile/strings.h
index fff7c97..8a32918 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -111,6 +111,10 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM 
end);
 SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
 SCM_API SCM scm_string_append (SCM args);
 
+SCM_API SCM scm_from_stringn (const char *str, size_t len, 
+                                     const char *encoding,
+                                     scm_t_string_failed_conversion_handler 
+                                     handler);
 SCM_API SCM scm_c_make_string (size_t len, SCM chr);
 SCM_API size_t scm_c_string_length (SCM str);
 SCM_API size_t scm_c_symbol_length (SCM sym);
diff --git a/libguile/strports.c b/libguile/strports.c
index 490a15f..b984b83 100644
--- a/libguile/strports.c
+++ b/libguile/strports.c
@@ -290,7 +290,7 @@ st_truncate (SCM port, scm_t_off length)
 }
 
 SCM 
-scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, 
const char *caller)
+scm_i_mkstrport (SCM pos, const char *utf8_str, size_t str_len, long modes, 
const char *caller)
 {
   SCM z, str;
   scm_t_port *pt;
@@ -301,11 +301,11 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t 
str_len, long modes, co
      to a locale representation for storage.  But, since string ports
      rely on string functionality for their memory management, we need
      to create a new string that has the 8-bit locale representation
-     of the underlying string.  This violates the guideline that the
-     internal encoding of characters in strings is in unicode
-     codepoints. */
+     of the underlying string.  
+
+     locale_str is already in the locale of the port.  */
   str = scm_i_make_string (str_len, &buf);
-  memcpy (buf, locale_str, str_len);
+  memcpy (buf, utf8_str, str_len);
 
   c_pos = scm_to_unsigned_integer (pos, 0, str_len);
 
@@ -323,12 +323,14 @@ scm_i_mkstrport (SCM pos, const char *locale_str, size_t 
str_len, long modes, co
   pt->write_end = pt->read_end = pt->read_buf + pt->read_buf_size;
 
   pt->rw_random = 1;
-
   scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
 
   /* ensure write_pos is writable. */
   if ((modes & SCM_WRTNG) && pt->write_pos == pt->write_end)
     st_flush (z);
+
+  scm_i_set_port_encoding_x (z, "UTF-8");
+  scm_i_set_conversion_strategy_x (z, SCM_FAILED_CONVERSION_ERROR);
   return z;
 }
 
@@ -348,13 +350,18 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char 
*caller)
      of the underlying string.  This violates the guideline that the
      internal encoding of characters in strings is in unicode
      codepoints. */
-  buf = scm_to_locale_stringn (str, &str_len);
+
+  /* String ports are are always initialized with "UTF-8" as their
+     encoding.  */
+  buf = scm_to_stringn (str, &str_len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
   z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
   free (buf);
   return z;
 }
 
-/* create a new string from a string port's buffer.  */
+/* Create a new string from a string port's buffer, converting from
+   the port's 8-bit locale-specific representation to the standard
+   string representation.  */
 SCM scm_strport_to_string (SCM port)
 {
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
@@ -363,27 +370,20 @@ SCM scm_strport_to_string (SCM port)
   if (pt->rw_active == SCM_PORT_WRITE)
     st_flush (port);
 
-  str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
-  scm_remember_upto_here_1 (port);
-  return str;
-}
-
-/* Create a vector containing the locale representation of the string in the
-   port's buffer.  */
-SCM scm_strport_to_locale_u8vector (SCM port)
-{
-  scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  SCM vec;
-  char *buf;
-  
-  if (pt->rw_active == SCM_PORT_WRITE)
-    st_flush (port);
+  if (pt->read_buf_size == 0)
+    return scm_nullstr;
 
-  buf = scm_malloc (pt->read_buf_size);
-  memcpy (buf, pt->read_buf, pt->read_buf_size);
-  vec = scm_take_u8vector ((unsigned char *) buf, pt->read_buf_size);
+  if (pt->encoding == NULL)
+    {
+      char *buf;
+      str = scm_i_make_string (pt->read_buf_size, &buf);
+      memcpy (buf, pt->read_buf, pt->read_buf_size);
+    }
+  else
+    str = scm_from_stringn ((char *)pt->read_buf, pt->read_buf_size,
+                            pt->encoding, pt->ilseq_handler);
   scm_remember_upto_here_1 (port);
-  return vec;
+  return str;
 }
 
 SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
@@ -410,25 +410,6 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 
0,
 }
 #undef FUNC_NAME
 
-SCM_DEFINE (scm_call_with_output_locale_u8vector, 
"call-with-output-locale-u8vector", 1, 0, 0, 
-           (SCM proc),
-           "Calls the one-argument procedure @var{proc} with a newly created 
output\n"
-           "port.  When the function returns, a vector containing the bytes of 
a\n"
-           "locale representation of the characters written into the port is 
returned\n")
-#define FUNC_NAME s_scm_call_with_output_locale_u8vector
-{
-  SCM p;
-
-  p = scm_mkstrport (SCM_INUM0, 
-                    scm_make_string (SCM_INUM0, SCM_UNDEFINED),
-                    SCM_OPN | SCM_WRTNG,
-                     FUNC_NAME);
-  scm_call_1 (proc, p);
-
-  return scm_get_output_locale_u8vector (p);
-}
-#undef FUNC_NAME
-
 SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0, 
            (SCM proc),
            "Calls the one-argument procedure @var{proc} with a newly created 
output\n"
@@ -473,27 +454,6 @@ SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 
0, 0,
 }
 #undef FUNC_NAME
 
-SCM_DEFINE (scm_open_input_locale_u8vector, "open-input-locale-u8vector", 1, 
0, 0,
-           (SCM vec),
-           "Take a u8vector containing the bytes of a string encoded in the\n"
-           "current locale and return an input port that delivers characters\n"
-           "from the string. The port can be closed by\n"
-           "@code{close-input-port}, though its storage will be reclaimed\n"
-           "by the garbage collector if it becomes inaccessible.")
-#define FUNC_NAME s_scm_open_input_locale_u8vector
-{
-  scm_t_array_handle hnd;
-  ssize_t inc;
-  size_t len;
-  const scm_t_uint8 *buf;
-
-  buf = scm_u8vector_elements (vec, &hnd, &len, &inc);
-  SCM p = scm_i_mkstrport(SCM_INUM0, (const char *) buf, len, SCM_OPN | 
SCM_RDNG, FUNC_NAME);
-  scm_array_handle_release (&hnd);
-  return p;
-}
-#undef FUNC_NAME
-
 SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0, 
            (void),
            "Return an output port that will accumulate characters for\n"
@@ -526,19 +486,6 @@ SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 
0, 0,
 #undef FUNC_NAME
 
 
-SCM_DEFINE (scm_get_output_locale_u8vector, "get-output-locale-u8vector", 1, 
0, 0, 
-           (SCM port),
-           "Given an output port created by @code{open-output-string},\n"
-           "return a u8 vector containing the characters of the string\n"
-           "encoded in the current locale.")
-#define FUNC_NAME s_scm_get_output_locale_u8vector
-{
-  SCM_VALIDATE_OPOUTSTRPORT (1, port);
-  return scm_strport_to_locale_u8vector (port);
-}
-#undef FUNC_NAME
-
-
 /* Given a null-terminated string EXPR containing a Scheme expression
    read it, and return it as an SCM value. */
 SCM
diff --git a/libguile/strports.h b/libguile/strports.h
index b2ded01..d93266a 100644
--- a/libguile/strports.h
+++ b/libguile/strports.h
@@ -47,16 +47,12 @@ SCM_API SCM scm_mkstrport (SCM pos, SCM str, long modes, 
const char * caller);
 SCM_INTERNAL SCM scm_i_mkstrport (SCM pos, const char *locale_str, size_t 
str_len, 
                                  long modes, const char *caller);
 SCM_API SCM scm_strport_to_string (SCM port);
-SCM_API SCM scm_strport_to_locale_u8vector (SCM port);
 SCM_API SCM scm_object_to_string (SCM obj, SCM printer);
 SCM_API SCM scm_call_with_output_string (SCM proc);
-SCM_API SCM scm_call_with_output_locale_u8vector (SCM proc);
 SCM_API SCM scm_call_with_input_string (SCM str, SCM proc);
 SCM_API SCM scm_open_input_string (SCM str);
-SCM_API SCM scm_open_input_locale_u8vector (SCM str);
 SCM_API SCM scm_open_output_string (void);
 SCM_API SCM scm_get_output_string (SCM port);
-SCM_API SCM scm_get_output_locale_u8vector (SCM port);
 SCM_API SCM scm_c_read_string (const char *expr);
 SCM_API SCM scm_c_eval_string (const char *expr);
 SCM_API SCM scm_c_eval_string_in_module (const char *expr, SCM module);
diff --git a/libguile/unidata_to_charset.pl b/libguile/unidata_to_charset.pl
index 6871e67..d086c8e 100755
--- a/libguile/unidata_to_charset.pl
+++ b/libguile/unidata_to_charset.pl
@@ -254,10 +254,14 @@ sub empty {
     return 0;
 }
 
-# Full -- All characters.  
-sub full {
+# Designated -- All characters except for the surrogates
+sub designated {
     my($codepoint, $name, $category, $uppercase, $lowercase)= @_;
-    return 1;
+    if ($category =~ (/Cs/)) {
+        return 0;
+    } else {
+        return 1;
+    }
 }
 
 
@@ -362,7 +366,7 @@ sub compute {
 # Write a bit of a header
 print $out "/* srfi-14.i.c -- standard SRFI-14 character set data */\n\n";
 print $out "/* This file is #include'd by srfi-14.c.  */\n\n";
-print $out "/* This file was generated from\n"
+print $out "/* This file was generated from\n";
 print $out "   http://unicode.org/Public/UNIDATA/UnicodeData.txt\n";;
 print $out "   with the unidata_to_charset.pl script.  */\n\n";
 
@@ -383,7 +387,7 @@ compute "symbol";
 compute "blank";
 compute "ascii";
 compute "empty";
-compute "full";
+compute "designated";
 
 close $in;
 close $out;
diff --git a/libguile/vports.c b/libguile/vports.c
index cea11c6..e3db60d 100644
--- a/libguile/vports.c
+++ b/libguile/vports.c
@@ -92,19 +92,26 @@ sf_fill_input (SCM port)
 {
   SCM p = SCM_PACK (SCM_STREAM (port));
   SCM ans;
+  scm_t_port *pt;
 
   ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char.  */
   if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
     return EOF;
   SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
-  {
-    scm_t_port *pt = SCM_PTAB_ENTRY (port);    
-
-    *pt->read_buf = SCM_CHAR (ans);
-    pt->read_pos = pt->read_buf;
-    pt->read_end = pt->read_buf + 1;
-    return *pt->read_buf;
-  }
+  pt = SCM_PTAB_ENTRY (port);    
+
+  if (pt->encoding == NULL)
+    {
+      scm_t_port *pt = SCM_PTAB_ENTRY (port);    
+      
+      *pt->read_buf = SCM_CHAR (ans);
+      pt->read_pos = pt->read_buf;
+      pt->read_end = pt->read_buf + 1;
+      return *pt->read_buf;
+    }
+  else
+    scm_ungetc (SCM_CHAR (ans), port);
+  return SCM_CHAR (ans);
 }
 
 
diff --git a/libguile/win32-socket.c b/libguile/win32-socket.c
index e845d88..825b4c4 100644
--- a/libguile/win32-socket.c
+++ b/libguile/win32-socket.c
@@ -435,7 +435,7 @@ scm_i_socket_uncomment (char *line)
       while (end > line && (*end == '\r' || *end == '\n'))
        *end-- = '\0';
     }
-  while (end > line && isspace (*end))
+  while (end > line && isspace ((int) (*end)))
     *end-- = '\0';
 
   return end;
diff --git a/module/language/tree-il/compile-glil.scm 
b/module/language/tree-il/compile-glil.scm
index 86b610f..9de5c88 100644
--- a/module/language/tree-il/compile-glil.scm
+++ b/module/language/tree-il/compile-glil.scm
@@ -629,7 +629,7 @@
        ;; we know the vals are lambdas, we can set them to their local
        ;; var slots first, then capture their bindings, mutating them in
        ;; place.
-       (let ((RA (if (eq? context 'tail) #f (make-label))))
+       (let ((new-RA (if (or (eq? context 'tail) RA) #f (make-label))))
          (for-each
           (lambda (x v)
             (cond
@@ -657,7 +657,7 @@
                                allocation self emit-code)
                 (if (lambda-src x)
                     (emit-code #f (make-glil-source (lambda-src x))))
-                (comp-fix (lambda-body x) RA)
+                (comp-fix (lambda-body x) (or RA new-RA))
                 (emit-code #f (make-glil-unbind))
                 (emit-label POST)))))
           vals
@@ -696,7 +696,8 @@
           vals
           vars)
          (comp-tail body)
-         (emit-label RA)
+         (if new-RA
+             (emit-label new-RA))
          (emit-code #f (make-glil-unbind))))
 
       ((<let-values> src names vars exp body)
diff --git a/module/language/tree-il/primitives.scm 
b/module/language/tree-il/primitives.scm
index 955c7bf..98633f0 100644
--- a/module/language/tree-il/primitives.scm
+++ b/module/language/tree-il/primitives.scm
@@ -208,11 +208,11 @@
   (x) x
   (x y) (if (and (const? y)
                  (let ((y (const-exp y)))
-                   (and (exact? y) (= y 1))))
+                   (and (number? y) (exact? y) (= y 1))))
             (1+ x)
             (if (and (const? x)
                      (let ((x (const-exp x)))
-                       (and (exact? x) (= x 1))))
+                       (and (number? y) (exact? x) (= x 1))))
                 (1+ y)
                 (+ x y)))
   (x y z . rest) (+ x (+ y z . rest)))
@@ -226,7 +226,7 @@
   (x) (- 0 x)
   (x y) (if (and (const? y)
                  (let ((y (const-exp y)))
-                   (and (exact? y) (= y 1))))
+                   (and (number? y) (exact? y) (= y 1))))
             (1- x)
             (- x y))
   (x y z . rest) (- x (+ y z . rest)))
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index b52b384..7dc719c 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -1,7 +1,7 @@
 ;;;; chars.test --- test suite for Guile's char functions    -*- scheme -*-
 ;;;; Greg J. Badros <address@hidden>
 ;;;;
-;;;;   Copyright (C) 2000, 2006 Free Software Foundation, Inc.
+;;;;   Copyright (C) 2000, 2006, 2009 Free Software Foundation, Inc.
 ;;;; 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -22,6 +22,12 @@
 (define exception:wrong-type-to-apply
   (cons 'misc-error "^Wrong type to apply:"))
 
+(define exception:unknown-character-name
+  (cons #t "unknown character"))
+
+(define exception:out-of-range-octal
+  (cons #t "out-of-range"))
+
 
 (with-test-prefix "basic char handling"
 
@@ -31,13 +37,252 @@
     ;; evaluator-internal instruction codes and characters.
     (pass-if-exception "evaluating chars"
       exception:wrong-type-to-apply
-      (eval '(#\0) (interaction-environment)))))
-
-(pass-if "char-is-both? works"
-        (and
-         (not (char-is-both? #\?))
-         (not (char-is-both? #\newline))
-         (char-is-both? #\a)
-         (char-is-both? #\Z)
-         (not (char-is-both? #\1))))
+      (eval '(#\0) (interaction-environment))))
+
+  (with-test-prefix "comparisons"
+
+    ;; char=?
+    (pass-if "char=? #\\A #\\A"
+      (char=? #\A #\A))
+
+    (expect-fail "char=? #\\A #\\a"
+      (char=? #\A #\a))
+
+    (expect-fail "char=? #\\A #\\B"
+      (char=? #\A #\B))
+
+    (expect-fail "char=? #\\B #\\A"
+      (char=? #\A #\B))
+
+    ;; char<?
+    (expect-fail "char<? #\\A #\\A"
+      (char<? #\A #\A))
+
+    (pass-if "char<? #\\A #\\a"
+      (char<? #\A #\a))
+
+    (pass-if "char<? #\\A #\\B"
+      (char<? #\A #\B))
+
+    (expect-fail "char<? #\\B #\\A"
+      (char<? #\B #\A))
+
+    ;; char<=?
+    (pass-if "char<=? #\\A #\\A"
+      (char<=? #\A #\A))
+
+    (pass-if "char<=? #\\A #\\a"
+      (char<=? #\A #\a))
+
+    (pass-if "char<=? #\\A #\\B"
+      (char<=? #\A #\B))
+
+    (expect-fail "char<=? #\\B #\\A"
+      (char<=? #\B #\A))
+
+    ;; char>?
+    (expect-fail "char>? #\\A #\\A"
+      (char>? #\A #\A))
+
+    (expect-fail "char>? #\\A #\\a"
+      (char>? #\A #\a))
+
+    (expect-fail "char>? #\\A #\\B"
+      (char>? #\A #\B))
+
+    (pass-if "char>? #\\B #\\A"
+      (char>? #\B #\A))
+
+    ;; char>=?
+    (pass-if "char>=? #\\A #\\A"
+      (char>=? #\A #\A))
+
+    (expect-fail "char>=? #\\A #\\a"
+      (char>=? #\A #\a))
+
+    (expect-fail "char>=? #\\A #\\B"
+      (char>=? #\A #\B))
+
+    (pass-if "char>=? #\\B #\\A"
+      (char>=? #\B #\A))
+
+    ;; char-ci=?
+    (pass-if "char-ci=? #\\A #\\A"
+      (char-ci=? #\A #\A))
+
+    (pass-if "char-ci=? #\\A #\\a"
+      (char-ci=? #\A #\a))
+
+    (expect-fail "char-ci=? #\\A #\\B"
+      (char-ci=? #\A #\B))
+
+    (expect-fail "char-ci=? #\\B #\\A"
+      (char-ci=? #\A #\B))
+
+    ;; char-ci<?
+    (expect-fail "char-ci<? #\\A #\\A"
+      (char-ci<? #\A #\A))
+
+    (expect-fail "char-ci<? #\\A #\\a"
+      (char-ci<? #\A #\a))
+
+    (pass-if "char-ci<? #\\A #\\B"
+      (char-ci<? #\A #\B))
+
+    (expect-fail "char-ci<? #\\B #\\A"
+      (char-ci<? #\B #\A))
+
+    ;; char-ci<=?
+    (pass-if "char-ci<=? #\\A #\\A"
+      (char-ci<=? #\A #\A))
+
+    (pass-if "char-ci<=? #\\A #\\a"
+      (char-ci<=? #\A #\a))
+
+    (pass-if "char-ci<=? #\\A #\\B"
+      (char-ci<=? #\A #\B))
+
+    (expect-fail "char-ci<=? #\\B #\\A"
+      (char-ci<=? #\B #\A))
+
+    ;; char-ci>?
+    (expect-fail "char-ci>? #\\A #\\A"
+      (char-ci>? #\A #\A))
+
+    (expect-fail "char-ci>? #\\A #\\a"
+      (char-ci>? #\A #\a))
+
+    (expect-fail "char-ci>? #\\A #\\B"
+      (char-ci>? #\A #\B))
+
+    (pass-if "char-ci>? #\\B #\\A"
+      (char-ci>? #\B #\A))
+
+    ;; char-ci>=?
+    (pass-if "char-ci>=? #\\A #\\A"
+      (char-ci>=? #\A #\A))
+
+    (pass-if "char-ci>=? #\\A #\\a"
+      (char-ci>=? #\A #\a))
+
+    (expect-fail "char-ci>=? #\\A #\\B"
+      (char-ci>=? #\A #\B))
+
+    (pass-if "char-ci>=? #\\B #\\A"
+      (char-ci>=? #\B #\A)))
+
+  (with-test-prefix "categories"
+
+    (pass-if "char-alphabetic?"
+      (and (char-alphabetic? #\a)
+           (char-alphabetic? #\A)
+           (not (char-alphabetic? #\1))
+           (not (char-alphabetic? #\+))))
+
+    (pass-if "char-numeric?"
+      (and (not (char-numeric? #\a))
+           (not (char-numeric? #\A))
+           (char-numeric? #\1)
+           (not (char-numeric? #\+))))
+
+    (pass-if "char-whitespace?"
+      (and (not (char-whitespace? #\a))
+           (not (char-whitespace? #\A))
+           (not (char-whitespace? #\1))
+           (char-whitespace? #\space)
+           (not (char-whitespace? #\+))))
+
+    (pass-if "char-upper-case?"
+      (and (not (char-upper-case? #\a))
+           (char-upper-case? #\A)
+           (not (char-upper-case? #\1))
+           (not (char-upper-case? #\+))))
+
+    (pass-if "char-lower-case?"
+      (and (char-lower-case? #\a)
+           (not (char-lower-case? #\A))
+           (not (char-lower-case? #\1))
+           (not (char-lower-case? #\+))))
+
+    (pass-if "char-is-both? works"
+      (and
+       (not (char-is-both? #\?))
+       (not (char-is-both? #\newline))
+       (char-is-both? #\a)
+       (char-is-both? #\Z)
+       (not (char-is-both? #\1)))))
+
+  (with-test-prefix "integer"
+
+    (pass-if "char->integer"
+      (eqv? (char->integer #\A) 65))
+
+    (pass-if "integer->char"
+      (eqv? (integer->char 65) #\A))
+
+    (pass-if-exception "integer->char out of range, -1" exception:out-of-range
+      (integer->char -1))
+
+    (pass-if-exception "integer->char out of range, surrrogate" 
+                       exception:out-of-range
+      (integer->char #xd800))
+
+    (pass-if-exception "integer->char out of range, too big" 
+                       exception:out-of-range
+      (integer->char #x110000))
+
+    (pass-if-exception "octal out of range, surrrogate" 
+                       exception:out-of-range-octal
+      (with-input-from-string "#\\154000" read))
+
+    (pass-if-exception "octal out of range, too big" 
+                       exception:out-of-range-octal
+      (with-input-from-string "#\\4200000" read)))
+
+  (with-test-prefix "case"
+
+    (pass-if "char-upcase"
+      (eqv? (char-upcase #\a) #\A))
+
+    (pass-if "char-downcase"
+      (eqv? (char-downcase #\A) #\a)))
+
+  (with-test-prefix "charnames"
+
+    (pass-if "R5RS character names are case insensitive"
+      (and (eqv? #\space #\ )
+           (eqv? #\SPACE #\ )
+           (eqv? #\Space #\ )
+           (eqv? #\newline (integer->char 10))
+           (eqv? #\NEWLINE (integer->char 10))
+           (eqv? #\Newline (integer->char 10))))
+
+    (pass-if "C0 control names are case insensitive"
+      (and (eqv? #\nul #\000)
+           (eqv? #\soh #\001)
+           (eqv? #\stx #\002)
+           (eqv? #\NUL #\000)
+           (eqv? #\SOH #\001)
+           (eqv? #\STX #\002)
+           (eqv? #\Nul #\000)
+           (eqv? #\Soh #\001)
+           (eqv? #\Stx #\002)))
+
+    (pass-if "alt charnames are case insensitive"
+      (eqv? #\null #\nul)
+      (eqv? #\NULL #\nul)
+      (eqv? #\Null #\nul))
+
+    (pass-if-exception "bad charname" exception:unknown-character-name
+      (with-input-from-string "#\\blammo" read))
+
+    (pass-if "R5RS character names are preferred write format"
+      (string=?
+       (with-output-to-string (lambda () (write #\space)))
+       "#\\space"))
+
+    (pass-if "C0 control character names are preferred write format"
+      (string=?
+       (with-output-to-string (lambda () (write #\soh)))
+       "#\\soh"))))
 
diff --git a/test-suite/tests/encoding-iso88591.test 
b/test-suite/tests/encoding-iso88591.test
index d4de5e5..b4d48a6 100644
--- a/test-suite/tests/encoding-iso88591.test
+++ b/test-suite/tests/encoding-iso88591.test
@@ -33,6 +33,67 @@
 (if (defined? 'setlocale)
     (set! oldlocale (setlocale LC_ALL "")))
 
+(define ascii-a (integer->char 65))     ; LATIN CAPITAL LETTER A
+(define a-acute (integer->char #x00c1)) ; LATIN CAPITAL LETTER A WITH ACUTE
+(define alpha (integer->char #x03b1))   ; GREEK SMALL LETTER ALPHA
+(define cherokee-a (integer->char #x13a0)) ; CHEROKEE LETTER A
+
+(with-test-prefix "characters"
+  (pass-if "input A"
+    (char=? ascii-a #\A))
+
+  (pass-if "input A acute"
+    (char=? a-acute #\Á))
+
+  (pass-if "display A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (display ascii-a pt)
+             (string=? "A"
+                       (get-output-string pt))))
+
+  (pass-if "display A acute"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (display a-acute pt)
+             (string=? "Á"
+                       (get-output-string pt))))
+
+  (pass-if "display alpha"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (display alpha pt)
+             (string-ci=? "\\u03b1"
+                       (get-output-string pt))))
+
+  (pass-if "display Cherokee a"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (display cherokee-a pt)
+             (string-ci=? "\\u13a0"
+                       (get-output-string pt))))
+
+  (pass-if "write A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (write ascii-a pt)
+             (string=? "#\\A"
+                       (get-output-string pt))))
+
+  (pass-if "write A acute"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-1")
+             (set-port-conversion-strategy! pt 'escape)
+             (write a-acute pt)
+             (string=? "#\\Á"
+                       (get-output-string pt)))))
+
+
 (define s1 "última")
 (define s2 "cédula")
 (define s3 "años")
@@ -84,27 +145,6 @@
           (list= eqv? (string->list s4)
                  (list #\¿ #\C #\ó #\m #\o #\?))))
 
-;; Check that the output is in ISO-8859-1 encoding
-(with-test-prefix "display"
- 
-  (pass-if "s1"
-          (let ((pt (open-output-string)))
-            (set-port-encoding! pt "ISO-8859-1")
-            (display s1 pt)
-            (list= eqv? 
-                   (list #xfa #x6c #x74 #x69 #x6d #x61)
-                   (u8vector->list
-                    (get-output-locale-u8vector pt)))))
-
-  (pass-if "s2"
-          (let ((pt (open-output-string)))
-            (set-port-encoding! pt "ISO-8859-1")
-            (display s2 pt)
-            (list= eqv? 
-                   (list #x63 #xe9 #x64 #x75 #x6c #x61)
-                   (u8vector->list
-                    (get-output-locale-u8vector pt))))))
-
 (with-test-prefix "symbols == strings"
 
   (pass-if "última"
diff --git a/test-suite/tests/encoding-iso88597.test 
b/test-suite/tests/encoding-iso88597.test
index 2221269..8c155d2 100644
--- a/test-suite/tests/encoding-iso88597.test
+++ b/test-suite/tests/encoding-iso88597.test
@@ -31,6 +31,65 @@
 (define oldlocale #f)
 (if (defined? 'setlocale)
     (set! oldlocale (setlocale LC_ALL "")))
+(define ascii-a (integer->char 65))     ; LATIN CAPITAL LETTER A
+(define a-acute (integer->char #x00c1)) ; LATIN CAPITAL LETTER A WITH ACUTE
+(define alpha (integer->char #x03b1))   ; GREEK SMALL LETTER ALPHA
+(define cherokee-a (integer->char #x13a0)) ; CHEROKEE LETTER A
+
+(with-test-prefix "characters"
+  (pass-if "input A"
+    (char=? ascii-a #\A))
+
+  (pass-if "input alpha"
+    (char=? alpha #\á))
+
+  (pass-if "display A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (display ascii-a pt)
+             (string=? "A"
+                       (get-output-string pt))))
+
+  (pass-if "display A acute"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (display a-acute pt)
+             (string-ci=? "\\xc1"
+                       (get-output-string pt))))
+
+  (pass-if "display alpha"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (display alpha pt)
+             (string-ci=? "á"
+                       (get-output-string pt))))
+
+  (pass-if "display Cherokee A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (display cherokee-a pt)
+             (string-ci=? "\\u13a0"
+                       (get-output-string pt))))
+
+  (pass-if "write A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (write ascii-a pt)
+             (string=? "#\\A"
+                       (get-output-string pt))))
+
+  (pass-if "write alpha"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "ISO-8859-7")
+             (set-port-conversion-strategy! pt 'escape)
+             (write alpha pt)
+             (string=? "#\\á"
+                       (get-output-string pt)))))
 
 (define s1 "Ðåñß")
 (define s2 "ôçò")
@@ -83,27 +142,6 @@
           (list= eqv? (string->list s4)
                  (list #\ê #\á #\é))))
 
-;; Testing that the display of the string is output in the ISO-8859-7
-;; encoding
-(with-test-prefix "display"
- 
-  (pass-if "s1"
-          (let ((pt (open-output-string)))
-            (set-port-encoding! pt "ISO-8859-7")
-            (display s1 pt)
-            (list= eqv? 
-                   (list #xd0 #xe5 #xf1 #xdf)
-                   (u8vector->list 
-                    (get-output-locale-u8vector pt)))))
-  (pass-if "s2"
-          (let ((pt (open-output-string)))
-            (set-port-encoding! pt "ISO-8859-7")
-            (display s2 pt)
-            (list= eqv? 
-                   (list #xf4 #xe7 #xf2)
-                   (u8vector->list 
-                    (get-output-locale-u8vector pt))))))
-
 (with-test-prefix "symbols == strings"
 
   (pass-if "Ðåñß"
diff --git a/test-suite/tests/encoding-utf8.test 
b/test-suite/tests/encoding-utf8.test
index a2613f1..d2bdb69 100644
--- a/test-suite/tests/encoding-utf8.test
+++ b/test-suite/tests/encoding-utf8.test
@@ -32,6 +32,88 @@
 (if (defined? 'setlocale)
     (set! oldlocale (setlocale LC_ALL "")))
 
+(define ascii-a (integer->char 65))     ; LATIN CAPITAL LETTER A
+(define a-acute (integer->char #x00c1)) ; LATIN CAPITAL LETTER A WITH ACUTE
+(define alpha (integer->char #x03b1))   ; GREEK SMALL LETTER ALPHA
+(define cherokee-a (integer->char #x13a0)) ; CHEROKEE LETTER A
+
+(with-test-prefix "characters"
+  (pass-if "input A"
+    (char=? ascii-a #\A))
+
+  (pass-if "input A acute"
+    (char=? a-acute #\Á))
+ 
+  (pass-if "input alpha"
+    (char=? alpha #\α))
+ 
+  (pass-if "input Cherokee A"
+    (char=? cherokee-a #\Ꭰ))
+ 
+ (pass-if "display A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'substitute)
+             (display ascii-a pt)
+             (string=? "A"
+                       (get-output-string pt))))
+
+  (pass-if "display A acute"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'substitute)
+             (display a-acute pt)
+             (string=? "Á"
+                       (get-output-string pt))))
+
+  (pass-if "display alpha"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'substitute)
+             (display alpha pt)
+             (string-ci=? "α"
+                       (get-output-string pt))))
+
+  (pass-if "display Cherokee A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'substitute)
+             (display cherokee-a pt)
+             (string-ci=? "Ꭰ"
+                       (get-output-string pt))))
+
+  (pass-if "write A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'escape)
+             (write ascii-a pt)
+             (string=? "#\\A"
+                       (get-output-string pt))))
+
+  (pass-if "write A acute"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'escape)
+             (write a-acute pt)
+             (string=? "#\\Á"
+                       (get-output-string pt))))
+
+  (pass-if "write alpha"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'escape)
+             (write alpha pt)
+             (string=? "#\\α"
+                       (get-output-string pt))))
+
+  (pass-if "write Cherokee A"
+           (let ((pt (open-output-string)))
+             (set-port-encoding! pt "UTF-8")
+             (set-port-conversion-strategy! pt 'escape)
+             (write cherokee-a pt)
+             (string=? "#\\Ꭰ"
+                       (get-output-string pt)))))
+
 (define s1 "última")
 (define s2 "cédula")
 (define s3 "años")
diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test
index 7308399..9c48ea5 100644
--- a/test-suite/tests/regexp.test
+++ b/test-suite/tests/regexp.test
@@ -21,6 +21,8 @@
   #:use-module (test-suite lib)
   #:use-module (ice-9 regex))
 
+(setlocale LC_ALL "C")
+
 
 ;;; Run a regexp-substitute or regexp-substitute/global test, once
 ;;; providing a real port and once providing #f, requesting direct
@@ -130,6 +132,30 @@
 ;;; regexp-quote
 ;;;
 
+(define (with-latin1-locale thunk)
+  ;; Try out several ISO-8859-1 locales and run THUNK under the one that
+  ;; works (if any).
+  (define %locales
+    (append
+     (map (lambda (name)
+            (string-append name ".ISO-8859-1"))
+          '("fr_FR" "es_ES" "en_GB" "en_US" "de_DE" "pt_PT"))
+     (map (lambda (name)
+            (string-append name ".iso88591"))
+          '("fr_FR" "es_ES" "en_GB" "en_US" "de_DE" "pt_PT"))))
+
+
+  (let loop ((locales %locales))
+    (if (null? locales)
+        (throw 'unresolved)
+        (catch 'unresolved
+          (lambda ()
+            (with-locale (car locales) thunk))
+          (lambda (key . args)
+            (loop (cdr locales)))))))
+
+
+
 (with-test-prefix "regexp-quote"
 
   (pass-if-exception "no args" exception:wrong-num-args
@@ -154,32 +180,36 @@
           ;; try on each individual character, except #\nul
           (do ((i 1 (1+ i)))
               ((>= i char-code-limit))
-            (let* ((c (integer->char i))
-                   (s (string c))
-                   (q (regexp-quote s)))
-              (pass-if (list "char" i c s q)
-                (let ((m (regexp-exec (make-regexp q flag) s)))
-                  (and (= 0 (match:start m))
-                       (= 1 (match:end m)))))))
+             (let* ((c (integer->char i))
+                    (s (string c)))
+               (pass-if (list "char" i (format #f "~s ~s" c s))
+                 (with-latin1-locale
+                  (let* ((q (regexp-quote s))
+                         (m (regexp-exec (make-regexp q flag) s)))
+                    (and (= 0 (match:start m))
+                         (= 1 (match:end m))))))))
 
           ;; try on pattern "aX" where X is each character, except #\nul
           ;; this exposes things like "?" which are special only when they
           ;; follow a pattern to repeat or whatever ("a" in this case)
           (do ((i 1 (1+ i)))
               ((>= i char-code-limit))
-            (let* ((c (integer->char i))
-                   (s (string #\a c))
-                   (q (regexp-quote s)))
-              (pass-if (list "string \"aX\"" i c s q)
-                (let ((m (regexp-exec (make-regexp q flag) s)))
-                  (and (= 0 (match:start m))
-                       (= 2 (match:end m)))))))
+             (let* ((c (integer->char i))
+                    (s (string #\a c))
+                    (q (regexp-quote s)))
+               (pass-if (list "string \"aX\"" i (format #f "~s ~s" c s))
+                 (with-latin1-locale
+                 (let* ((q (regexp-quote s))
+                         (m (regexp-exec (make-regexp q flag) s)))
+                    (and (= 0 (match:start m))
+                         (= 2 (match:end m))))))))
 
           (pass-if "string of all chars"
-            (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
-                                               flag) allchars)))
-              (and (= 0 (match:start m))
-                   (= (string-length allchars) (match:end m))))))))
+             (with-latin1-locale
+              (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
+                                                 flag) allchars)))
+                (and (= 0 (match:start m))
+                     (= (string-length allchars) (match:end m)))))))))
      lst)))
 
 ;;;
diff --git a/test-suite/tests/srfi-14.test b/test-suite/tests/srfi-14.test
index 56c944a..f12a255 100644
--- a/test-suite/tests/srfi-14.test
+++ b/test-suite/tests/srfi-14.test
@@ -53,6 +53,214 @@
            (char-set->list (char-set #\a #\c #\e))
            (list #\a #\c #\e))))
           
+(with-test-prefix "char set additition"
+
+  (pass-if "empty + x"
+    (let ((cs (char-set)))
+      (char-set-adjoin! cs #\x)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\x))))
+
+  (pass-if "x + y"
+    (let ((cs (char-set #\x)))
+      (char-set-adjoin! cs #\y)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\x #\y))))
+
+  (pass-if "x + w"
+    (let ((cs (char-set #\x)))
+      (char-set-adjoin! cs #\w)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\w #\x))))
+
+  (pass-if "x + z"
+    (let ((cs (char-set #\x)))
+      (char-set-adjoin! cs #\z)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\x #\z))))
+
+  (pass-if "x + v"
+    (let ((cs (char-set #\x)))
+      (char-set-adjoin! cs #\v)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\v #\x))))
+
+  (pass-if "uv + w"
+    (let ((cs (char-set #\u #\v)))
+      (char-set-adjoin! cs #\w)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\u #\v #\w))))
+
+  (pass-if "uv + t"
+    (let ((cs (char-set #\u #\v)))
+      (char-set-adjoin! cs #\t)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\t #\u #\v))))
+
+  (pass-if "uv + x"
+    (let ((cs (char-set #\u #\v)))
+      (char-set-adjoin! cs #\x)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\u #\v #\x))))
+
+  (pass-if "uv + s"
+    (let ((cs (char-set #\u #\v)))
+      (char-set-adjoin! cs #\s)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\s #\u #\v))))
+
+  (pass-if "uvx + w"
+    (let ((cs (char-set #\u #\v #\x)))
+      (char-set-adjoin! cs #\w)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\u #\v #\w #\x))))
+
+  (pass-if "uvx + y"
+    (let ((cs (char-set #\u #\v #\x)))
+      (char-set-adjoin! cs #\y)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\u #\v #\x #\y))))
+
+  (pass-if "uvxy + w"
+    (let ((cs (char-set #\u #\v #\x #\y)))
+      (char-set-adjoin! cs #\w)
+      (list= eqv? 
+             (char-set->list cs)
+             (list #\u #\v #\w #\x #\y)))))
+
+(with-test-prefix "char set union"
+  (pass-if "null U abc"
+    (char-set= (char-set-union (char-set) (->char-set "abc"))
+               (->char-set "abc")))
+
+  (pass-if "ab U ab"
+    (char-set= (char-set-union (->char-set "ab") (->char-set "ab"))
+               (->char-set "ab")))
+
+  (pass-if "ab U bc"
+    (char-set= (char-set-union (->char-set "ab") (->char-set "bc"))
+               (->char-set "abc")))
+
+  (pass-if "ab U cd"
+    (char-set= (char-set-union (->char-set "ab") (->char-set "cd"))
+               (->char-set "abcd")))
+
+  (pass-if "ab U de"
+    (char-set= (char-set-union (->char-set "ab") (->char-set "de"))
+               (->char-set "abde")))
+
+  (pass-if "abc U bcd"
+    (char-set= (char-set-union (->char-set "abc") (->char-set "bcd"))
+               (->char-set "abcd")))
+
+  (pass-if "abdf U abcdefg"
+    (char-set= (char-set-union (->char-set "abdf") (->char-set "abcdefg"))
+               (->char-set "abcdefg")))
+
+  (pass-if "abef U cd"
+    (char-set= (char-set-union (->char-set "abef") (->char-set "cd"))
+               (->char-set "abcdef")))
+
+  (pass-if "abgh U cd"
+    (char-set= (char-set-union (->char-set "abgh") (->char-set "cd"))
+               (->char-set "abcdgh")))
+
+  (pass-if "bc U ab"
+    (char-set= (char-set-union (->char-set "bc") (->char-set "ab"))
+               (->char-set "abc")))
+
+  (pass-if "cd U ab"
+    (char-set= (char-set-union (->char-set "cd") (->char-set "ab"))
+               (->char-set "abcd")))
+
+  (pass-if "de U ab"
+    (char-set= (char-set-union (->char-set "de") (->char-set "ab"))
+               (->char-set "abde")))
+
+  (pass-if "cd U abc"
+    (char-set= (char-set-union (->char-set "cd") (->char-set "abc"))
+               (->char-set "abcd")))
+
+  (pass-if "cd U abcd"
+    (char-set= (char-set-union (->char-set "cd") (->char-set "abcd"))
+               (->char-set "abcd")))
+
+  (pass-if "cde U abcdef"
+    (char-set= (char-set-union (->char-set "cde") (->char-set "abcdef"))
+               (->char-set "abcdef"))))
+
+(with-test-prefix "char set xor"
+  (pass-if "null - xy"
+    (char-set= (char-set-xor (char-set) (char-set #\x #\y))
+               (char-set #\x #\y)))
+
+  (pass-if "x - x"
+    (char-set= (char-set-xor (char-set #\x) (char-set #\x))
+               (char-set)))
+
+  (pass-if "xy - x"
+    (char-set= (char-set-xor (char-set #\x #\y) (char-set #\x))
+               (char-set #\y)))
+
+  (pass-if "xy - y"
+    (char-set= (char-set-xor (char-set #\x #\y) (char-set #\y))
+               (char-set #\x)))
+
+  (pass-if "wxy - w"
+    (char-set= (char-set-xor (char-set #\w #\x #\y) (char-set #\w))
+               (char-set #\x #\y)))
+
+  (pass-if "wxy - x"
+    (char-set= (char-set-xor (char-set #\w #\x #\y) (char-set #\x))
+               (char-set #\w #\y)))
+
+  (pass-if "wxy - y"
+    (char-set= (char-set-xor (char-set #\w #\x #\y) (char-set #\y))
+               (char-set #\w #\x)))
+
+  (pass-if "uvxy - u"
+    (char-set= (char-set-xor (char-set #\u #\v #\x #\y) (char-set #\u))
+               (char-set #\v #\x #\y)))
+
+  (pass-if "uvxy - v"
+    (char-set= (char-set-xor (char-set #\u #\v #\x #\y) (char-set #\v))
+               (char-set #\u #\x #\y)))
+
+  (pass-if "uvxy - x"
+    (char-set= (char-set-xor (char-set #\u #\v #\x #\y) (char-set #\x))
+               (char-set #\u #\v #\y)))
+
+  (pass-if "uvxy - y"
+    (char-set= (char-set-xor (char-set #\u #\v #\x #\y) (char-set #\y))
+               (char-set #\u #\v #\x)))
+
+  (pass-if "uwy - u"
+    (char-set= (char-set-xor (char-set #\u #\w #\y) (char-set #\u))
+               (char-set #\w #\y)))
+
+  (pass-if "uwy - w"
+    (char-set= (char-set-xor (char-set #\u #\w #\y) (char-set #\w))
+               (char-set #\u #\y)))
+
+  (pass-if "uwy - y"
+    (char-set= (char-set-xor (char-set #\u #\w #\y) (char-set #\y))
+               (char-set #\u #\w)))
+
+  (pass-if "uvwy - v"
+    (char-set= (char-set-xor (char-set #\u #\v #\w #\y) (char-set #\v))
+               (char-set #\u #\w #\y))))
+
 
 (with-test-prefix "char-set?"
 
@@ -83,7 +291,10 @@
     (not (char-set= (char-set #\a) (char-set))))
 
   (pass-if "success, more args"
-    (char-set= char-set:blank char-set:blank char-set:blank)))
+    (char-set= char-set:blank char-set:blank char-set:blank))
+
+  (pass-if "failure, same length, different elements"
+    (not (char-set= (char-set #\a #\b #\d) (char-set #\a #\c #\d)))))
 
 (with-test-prefix "char-set<="
   (pass-if "success, no arg"
@@ -238,6 +449,199 @@
        (string=? (char-set->string cs)
                  "egilu"))))
 
+(with-test-prefix "list->char-set"
+
+  (pass-if "list->char-set"
+    (char-set= (list->char-set '(#\a #\b #\c))
+               (->char-set "abc")))
+
+  (pass-if "list->char-set!"
+    (let* ((cs (char-set #\a #\z)))
+      (list->char-set! '(#\m #\n) cs)
+      (char-set= cs
+                 (char-set #\a #\m #\n #\z)))))
+
+(with-test-prefix "string->char-set"
+
+  (pass-if "string->char-set"
+    (char-set= (string->char-set "foobar")
+               (string->char-set "barfoo")))
+
+  (pass-if "string->char-set cs"
+    (char-set= (string->char-set "foo" (string->char-set "bar"))
+               (string->char-set "barfoo")))
+
+  (pass-if "string->char-set!"
+    (let ((cs (string->char-set "bar")))
+      (string->char-set! "foo" cs)
+      (char-set= cs
+                 (string->char-set "barfoo")))))
+
+(with-test-prefix "char-set-filter"
+
+  (pass-if "filter w/o base"
+    (char-set=
+     (char-set-filter (lambda (c) (char=? c #\x))
+                      (->char-set "qrstuvwxyz"))
+     (->char-set #\x)))
+
+  (pass-if "filter w/ base"
+    (char-set=
+     (char-set-filter (lambda (c) (char=? c #\x))
+                      (->char-set "qrstuvwxyz")
+                      (->char-set "op"))
+                      
+     (->char-set "opx")))
+
+  (pass-if "filter!"
+    (let ((cs (->char-set "abc")))
+      (set! cs (char-set-filter! (lambda (c) (char=? c #\x))
+                                 (->char-set "qrstuvwxyz")
+                                 cs))
+      (char-set= (string->char-set "abcx")
+                 cs))))
+
+
+(with-test-prefix "char-set-intersection"
+
+  (pass-if "empty"
+    (char-set= (char-set-intersection (char-set) (char-set))
+               (char-set)))
+
+  (pass-if "identical, one element"
+    (char-set= (char-set-intersection (char-set #\a) (char-set #\a))
+               (char-set #\a)))
+
+  (pass-if "identical, two elements"
+    (char-set= (char-set-intersection (char-set #\a #\b) (char-set #\a #\b))
+               (char-set #\a #\b)))
+
+  (pass-if "identical, two elements"
+    (char-set= (char-set-intersection (char-set #\a #\c) (char-set #\a #\c))
+               (char-set #\a #\c)))
+
+  (pass-if "one vs null"
+    (char-set= (char-set-intersection (char-set #\a) (char-set))
+               (char-set)))
+
+  (pass-if "null vs one"
+    (char-set= (char-set-intersection (char-set) (char-set #\a))
+               (char-set)))
+
+  (pass-if "no elements shared"
+    (char-set= (char-set-intersection (char-set #\a #\c) (char-set #\b #\d))
+               (char-set)))
+
+  (pass-if "one elements shared"
+    (char-set= (char-set-intersection (char-set #\a #\c #\d) (char-set #\b 
#\d))
+               (char-set #\d))))
+
+(with-test-prefix "char-set-complement"
+
+  (pass-if "complement of null"
+           (char-set= (char-set-complement (char-set))
+                      (char-set-union (ucs-range->char-set 0 #xd800)
+                                      (ucs-range->char-set #xe000 #x110000))))
+
+  (pass-if "complement of null (2)"
+           (char-set= (char-set-complement (char-set))
+                      (ucs-range->char-set 0 #x110000)))
+
+  (pass-if "complement of #\\0"
+           (char-set= (char-set-complement (char-set #\nul))
+                      (ucs-range->char-set 1 #x110000)))
+
+  (pass-if "complement of U+10FFFF"
+           (char-set= (char-set-complement (char-set (integer->char #x10ffff)))
+                      (ucs-range->char-set 0 #x10ffff)))
+
+  (pass-if "complement of 'FOO'"
+           (char-set= (char-set-complement (->char-set "FOO"))
+                      (char-set-union (ucs-range->char-set 0 (char->integer 
#\F))
+                                      (ucs-range->char-set (char->integer #\G) 
+                                                           (char->integer #\O))
+                                      (ucs-range->char-set (char->integer #\P) 
+                                                            #x110000))))
+  (pass-if "complement of #\\a #\\b U+010300"
+           (char-set= (char-set-complement (char-set #\a #\b (integer->char 
#x010300)))
+                      (char-set-union (ucs-range->char-set 0 (char->integer 
#\a))
+                                      (ucs-range->char-set (char->integer #\c) 
#x010300)
+                                      (ucs-range->char-set #x010301 
#x110000)))))
+
+(with-test-prefix "ucs-range->char-set"
+  (pass-if "char-set"
+    (char-set= (ucs-range->char-set 65 68)
+               (->char-set "ABC")))
+
+  (pass-if "char-set w/ base"
+    (char-set= (ucs-range->char-set 65 68 #f (->char-set "DEF"))
+               (->char-set "ABCDEF")))
+
+  (pass-if "char-set!"
+    (let ((cs (->char-set "DEF")))
+      (ucs-range->char-set! 65 68 #f cs)
+      (char-set= cs
+                 (->char-set "ABCDEF")))))
+
+(with-test-prefix "char-set-count"
+  (pass-if "null"
+    (= 0 (char-set-count (lambda (c) #t) (char-set))))
+
+  (pass-if "count"
+    (= 5 (char-set-count (lambda (c) #t) 
+                         (->char-set "guile")))))
+
+(with-test-prefix "char-set-contains?"
+  (pass-if "#\\a not in null"
+    (not (char-set-contains? (char-set) #\a)))
+
+  (pass-if "#\\a is in 'abc'"
+    (char-set-contains? (->char-set "abc") #\a)))
+
+(with-test-prefix "any / every"
+  (pass-if "char-set-every #t"
+    (char-set-every (lambda (c) #t) 
+                    (->char-set "abc")))
+
+  (pass-if "char-set-every #f"
+    (not (char-set-every (lambda (c) (char=? c #\c)) 
+                         (->char-set "abc"))))
+
+  (pass-if "char-set-any #t"
+    (char-set-any (lambda (c) (char=? c #\c)) 
+                  (->char-set "abc")))
+
+  (pass-if "char-set-any #f"
+    (not (char-set-any (lambda (c) #f)
+                       (->char-set "abc")))))
+
+(with-test-prefix "char-set-delete"
+  (pass-if "abc - a"
+    (char-set= (char-set-delete (->char-set "abc") #\a)
+               (char-set #\b #\c)))
+
+  (pass-if "abc - d"
+    (char-set= (char-set-delete (->char-set "abc") #\d)
+               (char-set #\a #\b #\c)))
+
+  (pass-if "delete! abc - a"
+    (let ((cs (char-set #\a #\b #\c)))
+      (char-set-delete! cs #\a)
+      (char-set= cs (char-set #\b #\c)))))
+
+(with-test-prefix "char-set-difference"
+  (pass-if "not different"
+    (char-set= (char-set-difference (->char-set "foobar") (->char-set 
"foobar"))
+               (char-set)))
+
+  (pass-if "completely different"
+    (char-set= (char-set-difference (->char-set "foo") (->char-set "bar"))
+               (->char-set "foo")))
+
+  (pass-if "partially different"
+    (char-set= (char-set-difference (->char-set "breakfast") (->char-set 
"breakroom"))
+               (->char-set "fst"))))
+
 (with-test-prefix "standard char sets (ASCII)"
 
   (pass-if "char-set:lower-case"


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]