emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/utf-16.el


From: Kenichi Handa
Subject: [Emacs-diffs] Changes to emacs/lisp/international/utf-16.el
Date: Fri, 20 Jun 2003 22:26:13 -0400

Index: emacs/lisp/international/utf-16.el
diff -c emacs/lisp/international/utf-16.el:1.9 
emacs/lisp/international/utf-16.el:1.10
*** emacs/lisp/international/utf-16.el:1.9      Tue Apr  8 03:23:44 2003
--- emacs/lisp/international/utf-16.el  Fri Jun 20 22:26:13 2003
***************
*** 26,37 ****
  
  ;; Support for UTF-16, which is a two-byte encoding (modulo
  ;; surrogates) of Unicode, written either in little or big endian
! ;; order: coding-systems `mule-utf-16-le' and `mule-utf-16-be'.
! ;; (utf-16-le is used by the DozeN'T clipboard, for instance.)  The
! ;; data are preceeded by a two-byte signature which identifies their
! ;; byte sex.  These are used by the coding-category-utf-16-{b,l}e code
! ;; to identify the coding, but ignored on decoding.
! 
  ;; Note that un-decodable sequences aren't (yet?) preserved as raw
  ;; bytes, as they are with utf-8, so reading and writing as utf-16 can
  ;; corrupt data.
--- 26,43 ----
  
  ;; Support for UTF-16, which is a two-byte encoding (modulo
  ;; surrogates) of Unicode, written either in little or big endian
! ;; order and either with or without the leading BOM (a two-byte
! ;; signature which identifies their byte sex)a.
! ;;
! ;; We provides these base coding systems.
! ;;    name                                    endian  BOM
! ;;    ----                                    ------  ---
! ;;    mule-utf-16le                           little  no
! ;;    mule-utf-16be                           big     no
! ;;    mule-utf-16le-with-signature            little  yes
! ;;    mule-utf-16be-with-signature            big     yes
! ;;    mule-utf-16                             both    yes
! ;;
  ;; Note that un-decodable sequences aren't (yet?) preserved as raw
  ;; bytes, as they are with utf-8, so reading and writing as utf-16 can
  ;; corrupt data.
***************
*** 112,118 ****
                     (r1 %= 96)
                     (r1 += (r2 + 32)))))))))))))
  
! (defconst utf-16-le-decode-loop
    `(loop
      (read r3 r4)
      (r1 = (r4 <8 r3))
--- 118,124 ----
                     (r1 %= 96)
                     (r1 += (r2 + 32)))))))))))))
  
! (defconst utf-16le-decode-loop
    `(loop
      (read r3 r4)
      (r1 = (r4 <8 r3))
***************
*** 121,127 ****
      (write-multibyte-character r0 r1)
      (repeat)))
  
! (defconst utf-16-be-decode-loop
    `(loop
      (read r3 r4)
      (r1 = (r3 <8 r4))
--- 127,133 ----
      (write-multibyte-character r0 r1)
      (repeat)))
  
! (defconst utf-16be-decode-loop
    `(loop
      (read r3 r4)
      (r1 = (r3 <8 r4))
***************
*** 132,166 ****
  
  )
  
! (define-ccl-program ccl-decode-mule-utf-16-le
    `(2                                 ; 2 bytes -> 1 to 4 bytes
!     ,utf-16-le-decode-loop)
    "Decode UTF-16LE (little endian without signature bytes).
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and
  mule-unicode-*.  Un-representable Unicode characters are decoded as
  U+fffd.  The result is run through the translation-table named
  `utf-translation-table-for-decode'.")
  
! (define-ccl-program ccl-decode-mule-utf-16-be
    `(2                                 ; 2 bytes -> 1 to 4 bytes
!     ,utf-16-be-decode-loop)
    "Decode UTF-16BE (big endian without signature bytes).
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and
  mule-unicode-*.  Un-representable Unicode characters are
  decoded as U+fffd.  The result is run through the translation-table of
  name `utf-translation-table-for-decode'.")
  
! (define-ccl-program ccl-decode-mule-utf-16-le-with-signature
    `(2
      ((read r3 r4)
!      ,utf-16-le-decode-loop))
!   "Like ccl-decode-utf-16-le but skip the first 2-byte BOM.")
  
! (define-ccl-program ccl-decode-mule-utf-16-be-with-signature
    `(2
      ((read r3 r4)
!      ,utf-16-be-decode-loop))
!   "Like ccl-decode-utf-16-be but skip the first 2-byte BOM.")
  
  (define-ccl-program ccl-decode-mule-utf-16
    `(2
--- 138,172 ----
  
  )
  
! (define-ccl-program ccl-decode-mule-utf-16le
    `(2                                 ; 2 bytes -> 1 to 4 bytes
!     ,utf-16le-decode-loop)
    "Decode UTF-16LE (little endian without signature bytes).
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and
  mule-unicode-*.  Un-representable Unicode characters are decoded as
  U+fffd.  The result is run through the translation-table named
  `utf-translation-table-for-decode'.")
  
! (define-ccl-program ccl-decode-mule-utf-16be
    `(2                                 ; 2 bytes -> 1 to 4 bytes
!     ,utf-16be-decode-loop)
    "Decode UTF-16BE (big endian without signature bytes).
  Basic decoding is done into the charsets ascii, latin-iso8859-1 and
  mule-unicode-*.  Un-representable Unicode characters are
  decoded as U+fffd.  The result is run through the translation-table of
  name `utf-translation-table-for-decode'.")
  
! (define-ccl-program ccl-decode-mule-utf-16le-with-signature
    `(2
      ((read r3 r4)
!      ,utf-16le-decode-loop))
!   "Like ccl-decode-utf-16le but skip the first 2-byte BOM.")
  
! (define-ccl-program ccl-decode-mule-utf-16be-with-signature
    `(2
      ((read r3 r4)
!      ,utf-16be-decode-loop))
!   "Like ccl-decode-utf-16be but skip the first 2-byte BOM.")
  
  (define-ccl-program ccl-decode-mule-utf-16
    `(2
***************
*** 172,178 ****
         ;; function.
         (,@utf-16-decode-ucs
          (write-multibyte-character r0 r1)
!         ,utf-16-le-decode-loop)
         ((if (r1 == #xFEFF)
            ;; R1 is a BOM for big endian, but we can't keep that
            ;; character in the output because it can't be
--- 178,184 ----
         ;; function.
         (,@utf-16-decode-ucs
          (write-multibyte-character r0 r1)
!         ,utf-16le-decode-loop)
         ((if (r1 == #xFEFF)
            ;; R1 is a BOM for big endian, but we can't keep that
            ;; character in the output because it can't be
***************
*** 184,195 ****
          (,@utf-16-decode-ucs
           (translate-character utf-translation-table-for-decode r0 r1)))
        (write-multibyte-character r0 r1)
!       ,utf-16-be-decode-loop))))
!   "Like ccl-decode-utf-16-be/le but check the first BOM.")
  
  (makunbound 'utf-16-decode-ucs)               ; done with it
! (makunbound 'utf-16-le-decode-loop)
! (makunbound 'utf-16-be-decode-loop)
  
  (eval-and-compile
  (defconst utf-16-decode-to-ucs
--- 190,201 ----
          (,@utf-16-decode-ucs
           (translate-character utf-translation-table-for-decode r0 r1)))
        (write-multibyte-character r0 r1)
!       ,utf-16be-decode-loop))))
!   "Like ccl-decode-utf-16be/le but check the first BOM.")
  
  (makunbound 'utf-16-decode-ucs)               ; done with it
! (makunbound 'utf-16le-decode-loop)
! (makunbound 'utf-16be-decode-loop)
  
  (eval-and-compile
  (defconst utf-16-decode-to-ucs
***************
*** 216,222 ****
                    (r0 = (r3 + #xe000))
                  (r0 = #xfffd))))))))))
  
! (defconst utf-16-le-encode-loop
    `(loop
      (read-multibyte-character r0 r1)
      (lookup-character utf-subst-table-for-encode r0 r1)
--- 222,228 ----
                    (r0 = (r3 + #xe000))
                  (r0 = #xfffd))))))))))
  
! (defconst utf-16le-encode-loop
    `(loop
      (read-multibyte-character r0 r1)
      (lookup-character utf-subst-table-for-encode r0 r1)
***************
*** 227,233 ****
      (write (r0 >> 8))
      (repeat)))
  
! (defconst utf-16-be-encode-loop
    `(loop
      (read-multibyte-character r0 r1)
      (lookup-character utf-subst-table-for-encode r0 r1)
--- 233,239 ----
      (write (r0 >> 8))
      (repeat)))
  
! (defconst utf-16be-encode-loop
    `(loop
      (read-multibyte-character r0 r1)
      (lookup-character utf-subst-table-for-encode r0 r1)
***************
*** 239,247 ****
      (repeat)))
  )
  
! (define-ccl-program ccl-encode-mule-utf-16-le
    `(1
!     ,utf-16-le-encode-loop)
    "Encode to UTF-16LE (little endian without signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
--- 245,254 ----
      (repeat)))
  )
  
! 
! (define-ccl-program ccl-encode-mule-utf-16le
    `(1
!     ,utf-16le-encode-loop)
    "Encode to UTF-16LE (little endian without signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
***************
*** 249,257 ****
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16-be
    `(1
!     ,utf-16-be-encode-loop)
    "Encode to UTF-16BE (big endian without signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
--- 256,264 ----
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16be
    `(1
!     ,utf-16be-encode-loop)
    "Encode to UTF-16BE (big endian without signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
***************
*** 259,269 ****
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16-le-with-signature
    `(1
      ((write #xFF)
       (write #xFE)
!      ,utf-16-le-encode-loop))
    "Encode to UTF-16 (little endian with signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
--- 266,276 ----
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16le-with-signature
    `(1
      ((write #xFF)
       (write #xFE)
!      ,utf-16le-encode-loop))
    "Encode to UTF-16 (little endian with signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
***************
*** 271,281 ****
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16-be-with-signature
    `(1
      ((write #xFE)
       (write #xFF)
!      ,utf-16-be-encode-loop))
    "Encode to UTF-16 (big endian with signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
--- 278,288 ----
  `utf-translation-table-for-encode'.
  Others are encoded as U+FFFD.")
  
! (define-ccl-program ccl-encode-mule-utf-16be-with-signature
    `(1
      ((write #xFE)
       (write #xFF)
!      ,utf-16be-encode-loop))
    "Encode to UTF-16 (big endian with signature).
  Characters from the charsets ascii, eight-bit-control,
  eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
***************
*** 284,291 ****
  Others are encoded as U+FFFD.")
  
  (makunbound 'utf-16-decode-to-ucs)
! (makunbound 'utf-16-le-encode-loop)
! (makunbound 'utf-16-be-encode-loop)
  
  (defun mule-utf-16-post-read-conversion (length)
    (when (> length 0)
--- 291,298 ----
  Others are encoded as U+FFFD.")
  
  (makunbound 'utf-16-decode-to-ucs)
! (makunbound 'utf-16le-encode-loop)
! (makunbound 'utf-16be-encode-loop)
  
  (defun mule-utf-16-post-read-conversion (length)
    (when (> length 0)
***************
*** 295,311 ****
             (setq last-coding-system-used
                   (coding-system-change-text-conversion
                    last-coding-system-used
!                   'mule-utf-16-le-with-signature))
             (setq length (1- length)))
            ((= char (decode-char 'ucs #xFFFF))
             (delete-char 1)
             (setq last-coding-system-used
                   (coding-system-change-text-conversion
                    last-coding-system-used
!                   'mule-utf-16-be-with-signature))
             (setq length (1- length)))
            (t
!            (setq last-coding-system-used 'mule-utf-16-be)))))
    length)
  
  (let ((doc "
--- 302,318 ----
             (setq last-coding-system-used
                   (coding-system-change-text-conversion
                    last-coding-system-used
!                   'mule-utf-16le-with-signature))
             (setq length (1- length)))
            ((= char (decode-char 'ucs #xFFFF))
             (delete-char 1)
             (setq last-coding-system-used
                   (coding-system-change-text-conversion
                    last-coding-system-used
!                   'mule-utf-16be-with-signature))
             (setq length (1- length)))
            (t
!            (setq last-coding-system-used 'mule-utf-16be)))))
    length)
  
  (let ((doc "
***************
*** 324,336 ****
  any of the character sets listed above are encoded into the byte
  sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
    (make-coding-system
!    'mule-utf-16-le 4
     ?u       ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
     (concat
!     "Little endian UTF-16 encoding for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16-le . ccl-encode-mule-utf-16-le)
     '((safe-charsets
        ascii
        eight-bit-control
--- 331,343 ----
  any of the character sets listed above are encoded into the byte
  sequence representing U+FFFD (REPLACEMENT CHARACTER)."))
    (make-coding-system
!    'mule-utf-16le 4
     ?u       ; Mule-UCS uses ?U, but code-pages uses that for koi8-u.
     (concat
!     "UTF-16LE encoding for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16le . ccl-encode-mule-utf-16le)
     '((safe-charsets
        ascii
        eight-bit-control
***************
*** 346,357 ****
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16-be 4 ?u
     (concat
!     "Big endian UTF-16 encoding for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16-be . ccl-encode-mule-utf-16-be)
     '((safe-charsets
        ascii
        eight-bit-control
--- 353,364 ----
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16be 4 ?u
     (concat
!     "UTF-16BE encoding for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16be . ccl-encode-mule-utf-16be)
     '((safe-charsets
        ascii
        eight-bit-control
***************
*** 367,379 ****
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16-le-with-signature 4 ?u
     (concat
      "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16-le-with-signature
!      . ccl-encode-mule-utf-16-le-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
--- 374,386 ----
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16le-with-signature 4 ?u
     (concat
      "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16le-with-signature
!      . ccl-encode-mule-utf-16le-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
***************
*** 390,402 ****
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16-be-with-signature 4 ?u
     (concat
      "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16-be-with-signature
!      . ccl-encode-mule-utf-16-be-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
--- 397,409 ----
                 utf-translate-cjk)))
  
    (make-coding-system
!    'mule-utf-16be-with-signature 4 ?u
     (concat
      "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16be-with-signature
!      . ccl-encode-mule-utf-16be-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
***************
*** 418,424 ****
      "UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16-be-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
--- 425,431 ----
      "UTF-16 (with or without BOM) for Emacs-supported Unicode characters."
      doc)
  
!    '(ccl-decode-mule-utf-16 . ccl-encode-mule-utf-16be-with-signature)
     '((safe-charsets
        ascii
        eight-bit-control
***************
*** 436,447 ****
       (post-read-conversion . mule-utf-16-post-read-conversion)))
  )
  
! (define-coding-system-alias 'utf-16-le 'mule-utf-16-le)
! (define-coding-system-alias 'utf-16-be 'mule-utf-16-be)
! (define-coding-system-alias 'utf-16-le-with-signature
!   'mule-utf-16-le-with-signature)
! (define-coding-system-alias 'utf-16-be-with-signature
!   'mule-utf-16-be-with-signature)
  (define-coding-system-alias 'utf-16 'mule-utf-16)
  
  ;;; utf-16.el ends here
--- 443,460 ----
       (post-read-conversion . mule-utf-16-post-read-conversion)))
  )
  
! (define-coding-system-alias 'utf-16le 'mule-utf-16le)
! (define-coding-system-alias 'utf-16be 'mule-utf-16be)
! (define-coding-system-alias 'utf-16le-with-signature
!   'mule-utf-16le-with-signature)
! (define-coding-system-alias 'utf-16be-with-signature
!   'mule-utf-16be-with-signature)
  (define-coding-system-alias 'utf-16 'mule-utf-16)
+ 
+ ;; For backward compatibility.
+ (define-coding-system-alias 'mule-utf-16-le 'mule-utf-16le-with-signature)
+ (define-coding-system-alias 'utf-16-le 'mule-utf-16le-with-signature)
+ (define-coding-system-alias 'mule-utf-16-be 'mule-utf-16be-with-signature)
+ (define-coding-system-alias 'utf-16-be 'mule-utf-16be-with-signature)
  
  ;;; utf-16.el ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]