emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] /srv/bzr/emacs/emacs-24 r107781: Warning comments about su


From: Eli Zaretskii
Subject: [Emacs-diffs] /srv/bzr/emacs/emacs-24 r107781: Warning comments about subtleties of fetching characters from buffers/strings.
Date: Fri, 06 Apr 2012 16:10:30 +0300
User-agent: Bazaar (2.3.1)

------------------------------------------------------------
revno: 107781
committer: Eli Zaretskii <address@hidden>
branch nick: trunk
timestamp: Fri 2012-04-06 16:10:30 +0300
message:
  Warning comments about subtleties of fetching characters from buffers/strings.
  
   src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
   src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
   about subtle differences between FETCH_CHAR* and STRING_CHAR*
   macros related to unification of CJK characters.  For the details,
   see the discussion following the message here:
   http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
modified:
  src/ChangeLog
  src/buffer.h
  src/character.h
=== modified file 'src/ChangeLog'
--- a/src/ChangeLog     2012-04-04 07:54:02 +0000
+++ b/src/ChangeLog     2012-04-06 13:10:30 +0000
@@ -1,3 +1,12 @@
+2012-04-06  Eli Zaretskii  <address@hidden>
+
+       * buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
+       * character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
+       about subtle differences between FETCH_CHAR* and STRING_CHAR*
+       macros related to unification of CJK characters.  For the details,
+       see the discussion following the message here:
+       http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
+
 2012-04-04  Chong Yidong  <address@hidden>
 
        * keyboard.c (Vdelayed_warnings_list): Doc fix.

=== modified file 'src/buffer.h'
--- a/src/buffer.h      2012-01-19 07:21:25 +0000
+++ b/src/buffer.h      2012-04-06 13:10:30 +0000
@@ -343,7 +343,8 @@
  - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
  + BEG_BYTE)
 
-/* Return character at byte position POS.  */
+/* Return character at byte position POS.  See the caveat WARNING for
+   FETCH_MULTIBYTE_CHAR below.  */
 
 #define FETCH_CHAR(pos)                                        \
   (!NILP (BVAR (current_buffer, enable_multibyte_characters))  \
@@ -359,7 +360,17 @@
 
 /* Return character code of multi-byte form at byte position POS.  If POS
    doesn't point the head of valid multi-byte form, only the byte at
-   POS is returned.  No range checking.  */
+   POS is returned.  No range checking.
+
+   WARNING: The character returned by this macro could be "unified"
+   inside STRING_CHAR, if the original character in the buffer belongs
+   to one of the Private Use Areas (PUAs) of codepoints that Emacs
+   uses to support non-unified CJK characters.  If that happens,
+   CHAR_BYTES will return a value that is different from the length of
+   the original multibyte sequence stored in the buffer.  Therefore,
+   do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
+   the buffer to the next character after fetching this one.  Instead,
+   use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH.  */
 
 #define FETCH_MULTIBYTE_CHAR(pos)                                      \
   (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0)       \

=== modified file 'src/character.h'
--- a/src/character.h   2011-11-20 02:29:42 +0000
+++ b/src/character.h   2012-04-06 13:10:30 +0000
@@ -292,7 +292,9 @@
   } while (0)
 
 /* Return the character code of character whose multibyte form is at
-   P.  */
+   P.  Note that this macro unifies CJK characters whose codepoints
+   are in the Private Use Areas (PUAs), so it might return a different
+   codepoint from the one actually stored at P.  */
 
 #define STRING_CHAR(p)                                         \
   (!((p)[0] & 0x80)                                            \
@@ -309,7 +311,15 @@
 
 
 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
-   form.  */
+   form.
+
+   Note: This macro returns the actual length of the character's
+   multibyte sequence as it is stored in a buffer or string.  The
+   character it returns might have a different codepoint that has a
+   different multibyte sequence of a different legth, due to possible
+   unification of CJK characters inside string_char.  Therefore do NOT
+   assume that the length returned by this macro is identical to the
+   length of the multibyte sequence of the character it returns.  */
 
 #define STRING_CHAR_AND_LENGTH(p, actual_len)                  \
   (!((p)[0] & 0x80)                                            \


reply via email to

[Prev in Thread] Current Thread [Next in Thread]