Line data Source code
1 : ;;; mule.el --- basic commands for multilingual environment
2 :
3 : ;; Copyright (C) 1997-2017 Free Software Foundation, Inc.
4 : ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 : ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 : ;; Registration Number H14PRO021
8 : ;; Copyright (C) 2003
9 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 : ;; Registration Number H13PRO009
11 :
12 : ;; Keywords: mule, multilingual, character set, coding system
13 :
14 : ;; This file is part of GNU Emacs.
15 :
16 : ;; GNU Emacs is free software: you can redistribute it and/or modify
17 : ;; it under the terms of the GNU General Public License as published by
18 : ;; the Free Software Foundation, either version 3 of the License, or
19 : ;; (at your option) any later version.
20 :
21 : ;; GNU Emacs is distributed in the hope that it will be useful,
22 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 : ;; GNU General Public License for more details.
25 :
26 : ;; You should have received a copy of the GNU General Public License
27 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 :
29 : ;;; Commentary:
30 :
31 : ;;; Code:
32 :
33 : ;; FIXME? Are these still relevant? Nothing uses them AFAICS.
34 : (defconst mule-version "6.0 (HANACHIRUSATO)" "\
35 : Version number and name of this version of MULE (multilingual environment).")
36 :
37 : (defconst mule-version-date "2003.9.1" "\
38 : Distribution date of this version of MULE (multilingual environment).")
39 :
40 :
41 : ;;; CHARSET
42 :
43 : ;; Backward compatibility code for handling emacs-mule charsets.
44 : (defvar private-char-area-1-min #xF0000)
45 : (defvar private-char-area-1-max #xFFFFE)
46 : (defvar private-char-area-2-min #x100000)
47 : (defvar private-char-area-2-max #x10FFFE)
48 :
49 : ;; Table of emacs-mule charsets indexed by their emacs-mule ID.
50 : (defvar emacs-mule-charset-table (make-vector 256 nil))
51 : (aset emacs-mule-charset-table 0 'ascii)
52 :
53 : ;; Convert the argument of old-style call of define-charset to a
54 : ;; property list used by the new-style.
55 : ;; INFO-VECTOR is a vector of the format:
56 : ;; [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
57 : ;; SHORT-NAME LONG-NAME DESCRIPTION]
58 :
59 : (defun convert-define-charset-argument (emacs-mule-id info-vector)
60 0 : (let* ((dim (aref info-vector 0))
61 0 : (chars (aref info-vector 1))
62 0 : (total (if (= dim 1) chars (* chars chars)))
63 0 : (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
64 0 : (if (= chars 96) [32 127 32 127] [33 126 33 126])))
65 : code-offset)
66 0 : (if (integerp emacs-mule-id)
67 0 : (or (= emacs-mule-id 0)
68 0 : (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
69 0 : (error "Invalid CHARSET-ID: %d" emacs-mule-id))
70 0 : (let (from-id to-id)
71 0 : (if (= dim 1) (setq from-id 160 to-id 224)
72 0 : (setq from-id 224 to-id 255))
73 0 : (while (and (< from-id to-id)
74 0 : (not (aref emacs-mule-charset-table from-id)))
75 0 : (setq from-id (1+ from-id)))
76 0 : (if (= from-id to-id)
77 0 : (error "No more room for the new Emacs-mule charset"))
78 0 : (setq emacs-mule-id from-id)))
79 0 : (if (> (- private-char-area-1-max private-char-area-1-min) total)
80 0 : (setq code-offset private-char-area-1-min
81 0 : private-char-area-1-min (+ private-char-area-1-min total))
82 0 : (if (> (- private-char-area-2-max private-char-area-2-min) total)
83 0 : (setq code-offset private-char-area-2-min
84 0 : private-char-area-2-min (+ private-char-area-2-min total))
85 0 : (error "No more space for a new charset")))
86 0 : (list :dimension dim
87 0 : :code-space code-space
88 0 : :iso-final-char (aref info-vector 4)
89 0 : :code-offset code-offset
90 0 : :emacs-mule-id emacs-mule-id)))
91 :
92 : (defun define-charset (name docstring &rest props)
93 : "Define NAME (symbol) as a charset with DOCSTRING.
94 : The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
95 : may be any symbol. The following have special meanings, and one of
96 : `:code-offset', `:map', `:subset', `:superset' must be specified.
97 :
98 : `:short-name'
99 :
100 : VALUE must be a short string to identify the charset. If omitted,
101 : NAME is used.
102 :
103 : `:long-name'
104 :
105 : VALUE must be a string longer than `:short-name' to identify the
106 : charset. If omitted, the value of the `:short-name' attribute is used.
107 :
108 : `:dimension'
109 :
110 : VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
111 : code-points of the charsets. If omitted, it is calculated from the
112 : value of the `:code-space' attribute.
113 :
114 : `:code-space'
115 :
116 : VALUE must be a vector of length at most 8 specifying the byte code
117 : range of each dimension in this format:
118 : [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
119 : where MIN-N is the minimum byte value of Nth dimension of code-point,
120 : MAX-N is the maximum byte value of that.
121 :
122 : `:min-code'
123 :
124 : VALUE must be an integer specifying the minimum code point of the
125 : charset. If omitted, it is calculated from `:code-space'. VALUE may
126 : be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
127 : the code point and LOW is the least significant 16 bits.
128 :
129 : `:max-code'
130 :
131 : VALUE must be an integer specifying the maximum code point of the
132 : charset. If omitted, it is calculated from `:code-space'. VALUE may
133 : be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
134 : the code point and LOW is the least significant 16 bits.
135 :
136 : `:iso-final-char'
137 :
138 : VALUE must be a character in the range 32 to 127 (inclusive)
139 : specifying the final char of the charset for ISO-2022 encoding. If
140 : omitted, the charset can't be encoded by ISO-2022 based
141 : coding-systems.
142 :
143 : `:iso-revision-number'
144 :
145 : VALUE must be an integer in the range 0..63, specifying the revision
146 : number of the charset for ISO-2022 encoding.
147 :
148 : `:emacs-mule-id'
149 :
150 : VALUE must be an integer of 0, 129..255. If omitted, the charset
151 : can't be encoded by coding-systems of type `emacs-mule'.
152 :
153 : `:ascii-compatible-p'
154 :
155 : VALUE must be nil or t (default nil). If VALUE is t, the charset is
156 : compatible with ASCII, i.e. the first 128 code points map to ASCII.
157 :
158 : `:supplementary-p'
159 :
160 : VALUE must be nil or t. If the VALUE is t, the charset is
161 : supplementary, which means it is used only as a parent or a
162 : subset of some other charset, or it is provided just for backward
163 : compatibility.
164 :
165 : `:invalid-code'
166 :
167 : VALUE must be a nonnegative integer that can be used as an invalid
168 : code point of the charset. If the minimum code is 0 and the maximum
169 : code is greater than Emacs's maximum integer value, `:invalid-code'
170 : should not be omitted.
171 :
172 : `:code-offset'
173 :
174 : VALUE must be an integer added to the index number of a character to
175 : get the corresponding character code.
176 :
177 : `:map'
178 :
179 : VALUE must be vector or string.
180 :
181 : If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
182 : where CODE-n is a code-point of the charset, and CHAR-n is the
183 : corresponding character code.
184 :
185 : If it is a string, it is a name of file that contains the above
186 : information. Each line of the file must be this format:
187 : 0xXXX 0xYYY
188 : where XXX is a hexadecimal representation of CODE-n and YYY is a
189 : hexadecimal representation of CHAR-n. A line starting with `#' is a
190 : comment line.
191 :
192 : `:subset'
193 :
194 : VALUE must be a list:
195 : ( PARENT MIN-CODE MAX-CODE OFFSET )
196 : PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range
197 : of characters inherited from the parent. OFFSET is an integer value
198 : to add to a code point of the parent charset to get the corresponding
199 : code point of this charset.
200 :
201 : `:superset'
202 :
203 : VALUE must be a list of parent charsets. The charset inherits
204 : characters from them. Each element of the list may be a cons (PARENT
205 : . OFFSET), where PARENT is a parent charset, and OFFSET is an offset
206 : value to add to a code point of PARENT to get the corresponding code
207 : point of this charset.
208 :
209 : `:unify-map'
210 :
211 : VALUE must be vector or string.
212 :
213 : If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
214 : where CODE-n is a code-point of the charset, and CHAR-n is the
215 : corresponding Unicode character code.
216 :
217 : If it is a string, it is a name of file that contains the above
218 : information. The file format is the same as what described for `:map'
219 : attribute."
220 161 : (when (vectorp (car props))
221 : ;; Old style code:
222 : ;; (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
223 : ;; Convert the argument to make it fit with the current style.
224 0 : (let ((vec (car props)))
225 0 : (setq props (convert-define-charset-argument name vec)
226 0 : name docstring
227 161 : docstring (aref vec 8))))
228 161 : (let ((attrs (mapcar 'list '(:dimension
229 : :code-space
230 : :min-code
231 : :max-code
232 : :iso-final-char
233 : :iso-revision-number
234 : :emacs-mule-id
235 : :ascii-compatible-p
236 : :supplementary-p
237 : :invalid-code
238 : :code-offset
239 : :map
240 : :subset
241 : :superset
242 : :unify-map
243 161 : :plist))))
244 :
245 : ;; If :dimension is omitted, get the dimension from :code-space.
246 161 : (let ((dimension (plist-get props :dimension)))
247 161 : (or dimension
248 156 : (let ((code-space (plist-get props :code-space)))
249 156 : (setq dimension (if code-space (/ (length code-space) 2) 4))
250 161 : (setq props (plist-put props :dimension dimension)))))
251 :
252 161 : (let ((code-space (plist-get props :code-space)))
253 161 : (or code-space
254 0 : (let ((dimension (plist-get props :dimension)))
255 0 : (setq code-space (make-vector 8 0))
256 0 : (dotimes (i dimension)
257 0 : (aset code-space (1+ (* i 2)) #xFF))
258 161 : (setq props (plist-put props :code-space code-space)))))
259 :
260 : ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
261 161 : (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
262 161 : (if (integerp emacs-mule-id)
263 161 : (aset emacs-mule-charset-table emacs-mule-id name)))
264 :
265 161 : (dolist (slot attrs)
266 2576 : (setcdr slot (purecopy (plist-get props (car slot)))))
267 :
268 : ;; Make sure that the value of :code-space is a vector of 8
269 : ;; elements.
270 161 : (let* ((slot (assq :code-space attrs))
271 161 : (val (cdr slot))
272 161 : (len (length val)))
273 161 : (if (< len 8)
274 156 : (setcdr slot
275 161 : (vconcat val (make-vector (- 8 len) 0)))))
276 :
277 : ;; Add :name and :docstring properties to PROPS.
278 161 : (setq props
279 161 : (cons :name (cons name (cons :docstring (cons (purecopy docstring) props)))))
280 161 : (or (plist-get props :short-name)
281 161 : (plist-put props :short-name (symbol-name name)))
282 161 : (or (plist-get props :long-name)
283 161 : (plist-put props :long-name (plist-get props :short-name)))
284 161 : (plist-put props :base name)
285 : ;; We can probably get a worthwhile amount in purespace.
286 161 : (setq props
287 161 : (mapcar (lambda (elt)
288 3086 : (if (stringp elt)
289 578 : (purecopy elt)
290 3086 : elt))
291 161 : props))
292 161 : (setcdr (assq :plist attrs) props)
293 :
294 161 : (apply 'define-charset-internal name (mapcar 'cdr attrs))))
295 :
296 :
297 : (defun load-with-code-conversion (fullname file &optional noerror nomessage)
298 : "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
299 : The file contents are decoded before evaluation if necessary.
300 : If optional third arg NOERROR is non-nil,
301 : report no error if FILE doesn't exist.
302 : Print messages at start and end of loading unless
303 : optional fourth arg NOMESSAGE is non-nil.
304 : Return t if file exists."
305 11 : (if (null (file-readable-p fullname))
306 0 : (and (null noerror)
307 0 : (signal 'file-error (list "Cannot open load file" file)))
308 : ;; Read file with code conversion, and then eval.
309 11 : (let* ((buffer
310 : ;; We can't use `generate-new-buffer' because files.el
311 : ;; is not yet loaded.
312 11 : (get-buffer-create (generate-new-buffer-name " *load*")))
313 : (load-in-progress t)
314 11 : (source (save-match-data (string-match "\\.el\\'" fullname))))
315 11 : (unless nomessage
316 0 : (if source
317 0 : (message "Loading %s (source)..." file)
318 11 : (message "Loading %s..." file)))
319 11 : (when purify-flag
320 11 : (push (purecopy file) preloaded-file-list))
321 11 : (unwind-protect
322 11 : (let ((load-file-name fullname)
323 : (set-auto-coding-for-load t)
324 : (inhibit-file-name-operation nil))
325 11 : (with-current-buffer buffer
326 : ;; So that we don't get completely screwed if the
327 : ;; file is encoded in some complicated character set,
328 : ;; read it with real decoding, as a multibyte buffer.
329 11 : (set-buffer-multibyte t)
330 : ;; Don't let deactivate-mark remain set.
331 11 : (let (deactivate-mark)
332 11 : (insert-file-contents fullname))
333 : ;; If the loaded file was inserted with no-conversion or
334 : ;; raw-text coding system, make the buffer unibyte.
335 : ;; Otherwise, eval-buffer might try to interpret random
336 : ;; binary junk as multibyte characters.
337 11 : (if (and enable-multibyte-characters
338 11 : (or (eq (coding-system-type last-coding-system-used)
339 11 : 'raw-text)))
340 11 : (set-buffer-multibyte nil))
341 : ;; Make `kill-buffer' quiet.
342 11 : (set-buffer-modified-p nil))
343 : ;; Have the original buffer current while we eval.
344 11 : (eval-buffer buffer nil
345 : ;; This is compatible with what `load' does.
346 11 : (if purify-flag file fullname)
347 11 : nil t))
348 11 : (let (kill-buffer-hook kill-buffer-query-functions)
349 11 : (kill-buffer buffer)))
350 11 : (do-after-load-evaluation fullname)
351 :
352 11 : (unless (or nomessage noninteractive)
353 0 : (if source
354 0 : (message "Loading %s (source)...done" file)
355 11 : (message "Loading %s...done" file)))
356 11 : t)))
357 :
358 : (defun charset-info (charset)
359 : "Return a vector of information of CHARSET.
360 : This function is provided for backward compatibility.
361 :
362 : The elements of the vector are:
363 : CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
364 : LEADING-CODE-BASE, LEADING-CODE-EXT,
365 : ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
366 : REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
367 : PLIST.
368 : where
369 : CHARSET-ID is always 0.
370 : BYTES is always 0.
371 : DIMENSION is the number of bytes of a code-point of the charset:
372 : 1, 2, 3, or 4.
373 : CHARS is the number of characters in a dimension:
374 : 94, 96, 128, or 256.
375 : WIDTH is always 0.
376 : DIRECTION is always 0.
377 : LEADING-CODE-BASE is always 0.
378 : LEADING-CODE-EXT is always 0.
379 : ISO-FINAL-CHAR (character) is the final character of the
380 : corresponding ISO 2022 charset. If the charset is not assigned
381 : any final character, the value is -1.
382 : ISO-GRAPHIC-PLANE is always 0.
383 : REVERSE-CHARSET is always -1.
384 : SHORT-NAME (string) is the short name to refer to the charset.
385 : LONG-NAME (string) is the long name to refer to the charset
386 : DESCRIPTION (string) is the description string of the charset.
387 : PLIST (property list) may contain any type of information a user
388 : want to put and get by functions `put-charset-property' and
389 : `get-charset-property' respectively."
390 0 : (vector 0
391 : 0
392 0 : (charset-dimension charset)
393 0 : (charset-chars charset)
394 : 0
395 : 0
396 : 0
397 : 0
398 0 : (charset-iso-final-char charset)
399 : 0
400 : -1
401 0 : (get-charset-property charset :short-name)
402 0 : (get-charset-property charset :short-name)
403 0 : (charset-description charset)
404 0 : (charset-plist charset)))
405 :
406 : ;; It is better not to use backquote in this file,
407 : ;; because that makes a bootstrapping problem
408 : ;; if you need to recompile all the Lisp files using interpreted code.
409 :
410 : (defun charset-id (_charset)
411 : "Always return 0. This is provided for backward compatibility."
412 : (declare (obsolete nil "23.1"))
413 : 0)
414 :
415 : (defmacro charset-bytes (_charset)
416 : "Always return 0. This is provided for backward compatibility."
417 : (declare (obsolete nil "23.1"))
418 : 0)
419 :
420 : (defun get-charset-property (charset propname)
421 : "Return the value of CHARSET's PROPNAME property.
422 : This is the last value stored with
423 : (put-charset-property CHARSET PROPNAME VALUE)."
424 0 : (plist-get (charset-plist charset) propname))
425 :
426 : (defun put-charset-property (charset propname value)
427 : "Set CHARSETS's PROPNAME property to value VALUE.
428 : It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
429 14 : (set-charset-plist charset
430 14 : (plist-put (charset-plist charset) propname
431 14 : (if (stringp value)
432 14 : (purecopy value)
433 14 : value))))
434 :
435 : (defun charset-description (charset)
436 : "Return description string of CHARSET."
437 0 : (plist-get (charset-plist charset) :docstring))
438 :
439 : (defun charset-dimension (charset)
440 : "Return dimension of CHARSET."
441 0 : (plist-get (charset-plist charset) :dimension))
442 :
443 : (defun charset-chars (charset &optional dimension)
444 : "Return number of characters contained in DIMENSION of CHARSET.
445 : DIMENSION defaults to the first dimension."
446 0 : (unless dimension (setq dimension 1))
447 0 : (let ((code-space (plist-get (charset-plist charset) :code-space)))
448 0 : (1+ (- (aref code-space (1- (* 2 dimension)))
449 0 : (aref code-space (- (* 2 dimension) 2))))))
450 :
451 : (defun charset-iso-final-char (charset)
452 : "Return ISO-2022 final character of CHARSET.
453 : Return -1 if charset isn't an ISO 2022 one."
454 0 : (or (plist-get (charset-plist charset) :iso-final-char)
455 0 : -1))
456 :
457 : (defmacro charset-short-name (charset)
458 : "Return short name of CHARSET."
459 0 : (plist-get (charset-plist charset) :short-name))
460 :
461 : (defmacro charset-long-name (charset)
462 : "Return long name of CHARSET."
463 0 : (plist-get (charset-plist charset) :long-name))
464 :
465 : (defun charset-list ()
466 : "Return list of all charsets ever defined."
467 : (declare (obsolete charset-list "23.1"))
468 0 : charset-list)
469 :
470 :
471 : ;;; CHARACTER
472 : (define-obsolete-function-alias 'char-valid-p 'characterp "23.1")
473 :
474 : (defun generic-char-p (_char)
475 : "Always return nil. This is provided for backward compatibility."
476 : (declare (obsolete nil "23.1"))
477 : nil)
478 :
479 : (defun make-char-internal (charset-id &optional code1 code2)
480 0 : (let ((charset (aref emacs-mule-charset-table charset-id)))
481 0 : (or charset
482 0 : (error "Invalid Emacs-mule charset ID: %d" charset-id))
483 0 : (make-char charset code1 code2)))
484 :
485 : ;; Save the ASCII case table in case we need it later. Some locales
486 : ;; (such as Turkish) modify the case behavior of ASCII characters,
487 : ;; which can interfere with networking code that uses ASCII strings.
488 :
489 : (defvar ascii-case-table
490 : ;; Code copied from copy-case-table to avoid requiring case-table.el
491 : (let ((tbl (copy-sequence (standard-case-table)))
492 : (up (char-table-extra-slot (standard-case-table) 0)))
493 : (if up (set-char-table-extra-slot tbl 0 (copy-sequence up)))
494 : (set-char-table-extra-slot tbl 1 nil)
495 : (set-char-table-extra-slot tbl 2 nil)
496 : tbl)
497 : "Case table for the ASCII character set.")
498 :
499 : ;; Coding system stuff
500 :
501 : ;; Coding system is a symbol that has been defined by the function
502 : ;; `define-coding-system'.
503 :
504 : (defconst coding-system-iso-2022-flags
505 : '(long-form
506 : ascii-at-eol
507 : ascii-at-cntl
508 : 7-bit
509 : locking-shift
510 : single-shift
511 : designation
512 : revision
513 : direction
514 : init-at-bol
515 : designate-at-bol
516 : safe
517 : latin-extra
518 : composition
519 : euc-tw-shift
520 : use-roman
521 : use-oldjis
522 : 8-bit-level-4)
523 : "List of symbols that control ISO-2022 encoder/decoder.
524 :
525 : The value of the `:flags' attribute in the argument of the function
526 : `define-coding-system' must be one of them.
527 :
528 : If `long-form' is specified, use a long designation sequence on
529 : encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
530 : and `japanese-jisx0208'. The long designation sequence doesn't
531 : conform to ISO 2022, but is used by such coding systems as
532 : `compound-text'.
533 :
534 : If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
535 : on encoding.
536 :
537 : If `ascii-at-cntl' is specified, designate ASCII to g0 before control
538 : codes and SPC on encoding.
539 :
540 : If `7-bit' is specified, use 7-bit code only on encoding.
541 :
542 : If `locking-shift' is specified, decode locking-shift code correctly
543 : on decoding, and use locking-shift to invoke a graphic element on
544 : encoding.
545 :
546 : If `single-shift' is specified, decode single-shift code
547 : correctly on decoding, and use single-shift to invoke a graphic
548 : element on encoding. See also `8-bit-level-4' specification.
549 :
550 : If `designation' is specified, decode designation code correctly on
551 : decoding, and use designation to designate a charset to a graphic
552 : element on encoding.
553 :
554 : If `revision' is specified, produce an escape sequence to specify
555 : revision number of a charset on encoding. Such an escape sequence is
556 : always correctly decoded on decoding.
557 :
558 : If `direction' is specified, decode ISO6429's code for specifying
559 : direction correctly, and produce the code on encoding.
560 :
561 : If `init-at-bol' is specified, on encoding, it is assumed that
562 : invocation and designation statuses are reset at each beginning of
563 : line even if `ascii-at-eol' is not specified; thus no codes for
564 : resetting them are produced.
565 :
566 : If `safe' is specified, on encoding, characters not supported by a
567 : coding are replaced with `?'.
568 :
569 : If `latin-extra' is specified, the code-detection routine assumes that a
570 : code specified in `latin-extra-code-table' (which see) is valid.
571 :
572 : If `composition' is specified, an escape sequence to specify
573 : composition sequence is correctly decoded on decoding, and is produced
574 : on encoding.
575 :
576 : If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
577 : correctly decoded on decoding, and is produced on encoding.
578 :
579 : If `use-roman' is specified, JIS0201-1976-Roman is designated instead
580 : of ASCII.
581 :
582 : If `use-oldjis' is specified, JIS0208-1976 is designated instead of
583 : JIS0208-1983.
584 :
585 : If `8-bit-level-4' is specified, the decoder assumes the
586 : implementation level \"4\" for 8-bit codes which means that GL is
587 : identified as the single-shift area. The default implementation
588 : level for 8-bit code is \"4A\" which means that GR is identified
589 : as the single-shift area.")
590 :
591 : (defun define-coding-system (name docstring &rest props)
592 : "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
593 : The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE
594 : may be any symbol.
595 :
596 : A coding system specifies a rule to decode (i.e. to convert a
597 : byte sequence to a character sequence) and a rule to encode (the
598 : opposite of decoding).
599 :
600 : The decoding is done by at most 3 steps; the first is to convert
601 : a byte sequence to a character sequence by one of Emacs'
602 : internal routines specified by `:coding-type' attribute. The
603 : optional second step is to convert the character sequence (the
604 : result of the first step) by a translation table specified
605 : by `:decode-translation-table' attribute. The optional third step
606 : is to convert the above result by a Lisp function specified
607 : by `:post-read-conversion' attribute.
608 :
609 : The encoding is done by at most 3 steps, which are the reverse
610 : of the decoding steps. The optional first step converts a
611 : character sequence to another character sequence by a Lisp
612 : function specified by `:pre-write-conversion' attribute. The
613 : optional second step converts the above result by a translation
614 : table specified by `:encode-translation-table' attribute. The
615 : third step converts the above result to a byte sequence by one
616 : of the Emacs's internal routines specified by the `:coding-type'
617 : attribute.
618 :
619 : The following attributes have special meanings. Those labeled as
620 : \"(required)\" should not be omitted.
621 :
622 : `:mnemonic' (required)
623 :
624 : VALUE is a character to display on mode line for the coding system.
625 :
626 : `:coding-type' (required)
627 :
628 : VALUE specifies the format of byte sequence the coding system
629 : decodes and encodes to. It must be one of `charset', `utf-8',
630 : `utf-16', `iso-2022', `emacs-mule', `shift-jis', `ccl',
631 : `raw-text', `undecided'.
632 :
633 : If VALUE is `charset', the coding system is for handling a
634 : byte sequence in which each byte or every two- to four-byte
635 : sequence represents a character code of a charset specified
636 : by the `:charset-list' attribute.
637 :
638 : If VALUE is `utf-8', the coding system is for handling Unicode
639 : UTF-8 byte sequences. See also the documentation of the
640 : attribute `:bom'.
641 :
642 : If VALUE is `utf-16', the coding system is for handling Unicode
643 : UTF-16 byte sequences. See also the documentation of the
644 : attributes :bom and `:endian'.
645 :
646 : If VALUE is `iso-2022', the coding system is for handling byte
647 : sequences conforming to ISO/IEC 2022. See also the documentation
648 : of the attributes `:charset-list', `:flags', and `:designation'.
649 :
650 : If VALUE is `emacs-mule', the coding system is for handling
651 : byte sequences which Emacs 20 and 21 used for their internal
652 : representation of characters.
653 :
654 : If VALUE is `shift-jis', the coding system is for handling byte
655 : sequences of Shift_JIS format. See also the attribute `:charset-list'.
656 :
657 : If VALUE is `ccl', the coding system uses CCL programs to decode
658 : and encode byte sequences. The CCL programs must be
659 : specified by the attributes `:ccl-decoder' and `:ccl-encoder'.
660 :
661 : If VALUE is `raw-text', the coding system decodes byte sequences
662 : without any conversions.
663 :
664 : `:eol-type'
665 :
666 : VALUE is the EOL (end-of-line) format of the coding system. It must be
667 : one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL
668 : \(i.e., a single LF character), `dos' means DOS-like EOL \(i.e., a sequence
669 : of CR followed by LF), and `mac' means Mac-like EOL \(i.e., a single CR).
670 : If omitted, Emacs detects the EOL format automatically when decoding.
671 :
672 : `:charset-list' (required if `:coding-type' is `charset' or `shift-jis')
673 :
674 : VALUE must be a list of charsets supported by the coding system.
675 :
676 : If `coding-type:' is `charset', then on decoding and encoding by the
677 : coding system, if a character belongs to multiple charsets in the
678 : list, a charset that comes first in the list is selected.
679 :
680 : If `:coding-type' is `iso-2022', VALUE may be `iso-2022', which
681 : indicates that the coding system supports all ISO-2022 based
682 : charsets.
683 :
684 : If `:coding-type' is `shift-jis', VALUE must be a list of three
685 : to four charsets supported by Shift_JIS encoding scheme. The
686 : first charset (one dimension) is for code space 0x00..0x7F, the
687 : second (one dimension) for 0xA1..0xDF, the third (two dimension)
688 : for 0x8140..0xEFFC, the optional fourth (three dimension) for
689 : 0xF040..0xFCFC.
690 :
691 : If `:coding-type' is `emacs-mule', VALUE may be `emacs-mule',
692 : which indicates that the coding system supports all charsets that
693 : have the `:emacs-mule-id' property.
694 :
695 : `:ascii-compatible-p'
696 :
697 : If VALUE is non-nil, the coding system decodes all 7-bit bytes into
698 : the corresponding ASCII characters, and encodes all ASCII characters
699 : back to the corresponding 7-bit bytes. VALUE defaults to nil.
700 :
701 : `:decode-translation-table'
702 :
703 : VALUE must be a translation table to use on decoding.
704 :
705 : `:encode-translation-table'
706 :
707 : VALUE must be a translation table to use on encoding.
708 :
709 : `:post-read-conversion'
710 :
711 : VALUE must be a function to call after some text is inserted and
712 : decoded by the coding system itself and before any functions in
713 : `after-insert-functions' are called. This function is passed one
714 : argument: the number of characters in the text to convert, with
715 : point at the start of the text. The function should leave point
716 : unchanged, and should return the new character count. Note that
717 : this function should avoid reading from files or receiving text
718 : from subprocesses -- anything that could invoke decoding; if it
719 : must do so, it should bind `coding-system-for-read' to a value
720 : other than the current coding-system, to avoid infinite recursion.
721 :
722 : `:pre-write-conversion'
723 :
724 : VALUE must be a function to call after all functions in
725 : `write-region-annotate-functions' and `buffer-file-format' are
726 : called, and before the text is encoded by the coding system
727 : itself. This function should convert the whole text in the
728 : current buffer. For backward compatibility, this function is
729 : passed two arguments which can be ignored. Note that this
730 : function should avoid writing to files or sending text to
731 : subprocesses -- anything that could invoke encoding; if it
732 : must do so, it should bind `coding-system-for-write' to a
733 : value other than the current coding-system, to avoid infinite
734 : recursion.
735 :
736 : `:default-char'
737 :
738 : VALUE must be a character. On encoding, a character not supported by
739 : the coding system is replaced with VALUE.
740 :
741 : `:for-unibyte'
742 :
743 : VALUE non-nil means that visiting a file with the coding system
744 : results in a unibyte buffer.
745 :
746 : `:mime-charset'
747 :
748 : VALUE must be a symbol whose name is that of a MIME charset converted
749 : to lower case.
750 :
751 : `:mime-text-unsuitable'
752 :
753 : VALUE non-nil means the `:mime-charset' property names a charset which
754 : is unsuitable for the top-level media of type \"text\".
755 :
756 : `:flags'
757 :
758 : VALUE must be a list of symbols that control the ISO-2022 converter.
759 : Each must be a member of the list `coding-system-iso-2022-flags'
760 : \(which see). This attribute is meaningful only when `:coding-type'
761 : is `iso-2022'.
762 :
763 : `:designation'
764 :
765 : VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
766 : GN-USAGE specifies the usage of graphic register GN as follows.
767 :
768 : If it is nil, no charset can be designated to GN.
769 :
770 : If it is a charset, the charset is initially designated to GN, and
771 : never used by the other charsets.
772 :
773 : If it is a list, the elements must be charsets, nil, 94, or 96. GN
774 : can be used by all the listed charsets. If the list contains 94, any
775 : iso-2022 charset whose code-space ranges are 94 long can be designated
776 : to GN. If the list contains 96, any charsets whose whose ranges are
777 : 96 long can be designated to GN. If the first element is a charset,
778 : that charset is initially designated to GN.
779 :
780 : This attribute is meaningful only when `:coding-type' is `iso-2022'.
781 :
782 : `:bom'
783 :
784 : This attributes specifies whether the coding system uses a \"byte order
785 : mark\". VALUE must be nil, t, or a cons cell of coding systems whose
786 : `:coding-type' is `utf-16' or `utf-8'.
787 :
788 : If the value is nil, on decoding, don't treat the first two-byte as
789 : BOM, and on encoding, don't produce BOM bytes.
790 :
791 : If the value is t, on decoding, skip the first two-byte as BOM, and on
792 : encoding, produce BOM bytes according to the value of `:endian'.
793 :
794 : If the value is a cons cell, on decoding, check the first two bytes.
795 : If they are 0xFE 0xFF, use the car part coding system of the value.
796 : If they are 0xFF 0xFE, use the cdr part coding system of the value.
797 : Otherwise, treat them as bytes for a normal character. On encoding,
798 : produce BOM bytes according to the value of `:endian'.
799 :
800 : This attribute is meaningful only when `:coding-type' is `utf-16' or
801 : `utf-8'.
802 :
803 : `:endian'
804 :
805 : VALUE must be `big' or `little' specifying big-endian and
806 : little-endian respectively. The default value is `big'.
807 :
808 : This attribute is meaningful only when `:coding-type' is `utf-16'.
809 :
810 : `:ccl-decoder' (required if :coding-type is `ccl')
811 :
812 : VALUE is a CCL program name defined by `define-ccl-program'. The
813 : CCL program reads a byte sequence and writes a character sequence
814 : as a decoding result.
815 :
816 : `:ccl-encoder' (required if :coding-type is `ccl')
817 :
818 : VALUE is a CCL program name defined by `define-ccl-program'. The
819 : CCL program reads a character sequence and writes a byte sequence
820 : as an encoding result.
821 :
822 : `:inhibit-null-byte-detection'
823 :
824 : VALUE non-nil means Emacs ignore null bytes on code detection.
825 : See the variable `inhibit-null-byte-detection'. This attribute
826 : is meaningful only when `:coding-type' is `undecided'.
827 :
828 : `:inhibit-iso-escape-detection'
829 :
830 : VALUE non-nil means Emacs ignores ISO-2022 escape sequences on
831 : code detection. See the variable `inhibit-iso-escape-detection'.
832 : This attribute is meaningful only when `:coding-type' is
833 : `undecided'.
834 :
835 : `:prefer-utf-8'
836 :
837 : VALUE non-nil means Emacs prefers UTF-8 on code detection for
838 : non-ASCII files. This attribute is meaningful only when
839 : `:coding-type' is `undecided'."
840 114 : (let* ((common-attrs (mapcar 'list
841 : '(:mnemonic
842 : :coding-type
843 : :charset-list
844 : :ascii-compatible-p
845 : :decode-translation-table
846 : :encode-translation-table
847 : :post-read-conversion
848 : :pre-write-conversion
849 : :default-char
850 : :for-unibyte
851 : :plist
852 114 : :eol-type)))
853 114 : (coding-type (plist-get props :coding-type))
854 114 : (spec-attrs (mapcar 'list
855 114 : (cond ((eq coding-type 'iso-2022)
856 : '(:initial
857 : :reg-usage
858 : :request
859 : :flags))
860 91 : ((eq coding-type 'utf-8)
861 : '(:bom))
862 81 : ((eq coding-type 'utf-16)
863 : '(:bom
864 : :endian))
865 76 : ((eq coding-type 'ccl)
866 : '(:ccl-decoder
867 : :ccl-encoder
868 : :valids))
869 76 : ((eq coding-type 'undecided)
870 : '(:inhibit-null-byte-detection
871 : :inhibit-iso-escape-detection
872 114 : :prefer-utf-8))))))
873 :
874 114 : (dolist (slot common-attrs)
875 1368 : (setcdr slot (plist-get props (car slot))))
876 :
877 114 : (dolist (slot spec-attrs)
878 115 : (setcdr slot (plist-get props (car slot))))
879 :
880 114 : (if (eq coding-type 'iso-2022)
881 23 : (let ((designation (plist-get props :designation))
882 23 : (flags (plist-get props :flags))
883 23 : (initial (make-vector 4 nil))
884 23 : (reg-usage (cons 4 4))
885 : request elt)
886 23 : (dotimes (i 4)
887 92 : (setq elt (aref designation i))
888 92 : (cond ((charsetp elt)
889 25 : (aset initial i elt)
890 25 : (setq request (cons (cons elt i) request)))
891 67 : ((consp elt)
892 29 : (aset initial i (car elt))
893 29 : (if (charsetp (car elt))
894 29 : (setq request (cons (cons (car elt) i) request)))
895 29 : (dolist (e (cdr elt))
896 64 : (cond ((charsetp e)
897 49 : (setq request (cons (cons e i) request)))
898 15 : ((eq e 94)
899 7 : (setcar reg-usage i))
900 8 : ((eq e 96)
901 7 : (setcdr reg-usage i))
902 1 : ((eq e t)
903 1 : (setcar reg-usage i)
904 92 : (setcdr reg-usage i)))))))
905 23 : (setcdr (assq :initial spec-attrs) initial)
906 23 : (setcdr (assq :reg-usage spec-attrs) reg-usage)
907 23 : (setcdr (assq :request spec-attrs) request)
908 :
909 : ;; Change :flags value from a list to a bit-mask.
910 23 : (let ((bits 0)
911 : (i 0))
912 23 : (dolist (elt coding-system-iso-2022-flags)
913 414 : (if (memq elt flags)
914 414 : (setq bits (logior bits (lsh 1 i))))
915 414 : (setq i (1+ i)))
916 114 : (setcdr (assq :flags spec-attrs) bits))))
917 :
918 : ;; Add :name and :docstring properties to PROPS.
919 114 : (setq props
920 114 : (cons :name (cons name (cons :docstring (cons (purecopy docstring)
921 114 : props)))))
922 114 : (setcdr (assq :plist common-attrs) props)
923 114 : (apply 'define-coding-system-internal
924 114 : name (mapcar 'cdr (append common-attrs spec-attrs)))))
925 :
926 : (defun coding-system-doc-string (coding-system)
927 : "Return the documentation string for CODING-SYSTEM."
928 0 : (plist-get (coding-system-plist coding-system) :docstring))
929 :
930 : (defun coding-system-mnemonic (coding-system)
931 : "Return the mnemonic character of CODING-SYSTEM.
932 : The mnemonic character of a coding system is used in mode line to
933 : indicate the coding system. If CODING-SYSTEM is nil, return ?=."
934 0 : (plist-get (coding-system-plist coding-system) :mnemonic))
935 :
936 : (defun coding-system-type (coding-system)
937 : "Return the coding type of CODING-SYSTEM.
938 : A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
939 : See the function `define-coding-system' for more detail."
940 480 : (plist-get (coding-system-plist coding-system) :coding-type))
941 :
942 : (defun coding-system-charset-list (coding-system)
943 : "Return list of charsets supported by CODING-SYSTEM.
944 : If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
945 : If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
946 0 : (plist-get (coding-system-plist coding-system) :charset-list))
947 :
948 : (defun coding-system-category (coding-system)
949 : "Return a category symbol of CODING-SYSTEM."
950 0 : (plist-get (coding-system-plist coding-system) :category))
951 :
952 : (defun coding-system-get (coding-system prop)
953 : "Extract a value from CODING-SYSTEM's property list for property PROP.
954 : For compatibility with Emacs 20/21, this accepts old-style symbols
955 : like `mime-charset' as well as the current style like `:mime-charset'."
956 339 : (or (plist-get (coding-system-plist coding-system) prop)
957 0 : (if (not (keywordp prop))
958 : ;; For backward compatibility.
959 0 : (if (eq prop 'ascii-incompatible)
960 0 : (not (plist-get (coding-system-plist coding-system)
961 0 : :ascii-compatible-p))
962 0 : (plist-get (coding-system-plist coding-system)
963 339 : (intern (concat ":" (symbol-name prop))))))))
964 :
965 : (defun coding-system-eol-type-mnemonic (coding-system)
966 : "Return the string indicating end-of-line format of CODING-SYSTEM."
967 0 : (let* ((eol-type (coding-system-eol-type coding-system))
968 0 : (val (cond ((eq eol-type 0) eol-mnemonic-unix)
969 0 : ((eq eol-type 1) eol-mnemonic-dos)
970 0 : ((eq eol-type 2) eol-mnemonic-mac)
971 0 : (t eol-mnemonic-undecided))))
972 0 : (if (stringp val)
973 0 : val
974 0 : (char-to-string val))))
975 :
976 : (defun coding-system-lessp (x y)
977 0 : (cond ((eq x 'no-conversion) t)
978 0 : ((eq y 'no-conversion) nil)
979 0 : ((eq x 'emacs-mule) t)
980 0 : ((eq y 'emacs-mule) nil)
981 0 : ((eq x 'undecided) t)
982 0 : ((eq y 'undecided) nil)
983 0 : (t (let ((c1 (coding-system-mnemonic x))
984 0 : (c2 (coding-system-mnemonic y)))
985 0 : (or (< (downcase c1) (downcase c2))
986 0 : (and (not (> (downcase c1) (downcase c2)))
987 0 : (< c1 c2)))))))
988 :
989 : (defun coding-system-equal (coding-system-1 coding-system-2)
990 : "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical.
991 : Two coding systems are identical if both symbols are equal
992 : or one is an alias of the other."
993 0 : (or (eq coding-system-1 coding-system-2)
994 0 : (and (equal (coding-system-plist coding-system-1)
995 0 : (coding-system-plist coding-system-2))
996 0 : (let ((eol-type-1 (coding-system-eol-type coding-system-1))
997 0 : (eol-type-2 (coding-system-eol-type coding-system-2)))
998 0 : (or (eq eol-type-1 eol-type-2)
999 0 : (and (vectorp eol-type-1) (vectorp eol-type-2)))))))
1000 :
1001 : (defun add-to-coding-system-list (coding-system)
1002 : "Add CODING-SYSTEM to variable `coding-system-list' while keeping it sorted."
1003 0 : (if (or (null coding-system-list)
1004 0 : (coding-system-lessp coding-system (car coding-system-list)))
1005 0 : (setq coding-system-list (cons coding-system coding-system-list))
1006 0 : (let ((len (length coding-system-list))
1007 0 : mid (tem coding-system-list))
1008 0 : (while (> len 1)
1009 0 : (setq mid (nthcdr (/ len 2) tem))
1010 0 : (if (coding-system-lessp (car mid) coding-system)
1011 0 : (setq tem mid
1012 0 : len (- len (/ len 2)))
1013 0 : (setq len (/ len 2))))
1014 0 : (setcdr tem (cons coding-system (cdr tem))))))
1015 :
1016 : (defun coding-system-list (&optional base-only)
1017 : "Return a list of all existing non-subsidiary coding systems.
1018 : If optional arg BASE-ONLY is non-nil, only base coding systems are
1019 : listed. The value doesn't include subsidiary coding systems which are
1020 : made from bases and aliases automatically for various end-of-line
1021 : formats (e.g. iso-latin-1-unix, koi8-r-dos)."
1022 71 : (let ((codings nil))
1023 71 : (dolist (coding coding-system-list)
1024 128439 : (if (eq (coding-system-base coding) coding)
1025 16188 : (if base-only
1026 0 : (setq codings (cons coding codings))
1027 16188 : (dolist (alias (coding-system-aliases coding))
1028 128439 : (setq codings (cons alias codings))))))
1029 71 : codings))
1030 :
1031 : (defconst char-coding-system-table nil
1032 : "It exists just for backward compatibility, and the value is always nil.")
1033 : (make-obsolete-variable 'char-coding-system-table nil "23.1")
1034 :
1035 : (defun transform-make-coding-system-args (name type &optional doc-string props)
1036 : "For internal use only.
1037 : Transform XEmacs style args for `make-coding-system' to Emacs style.
1038 : Value is a list of transformed arguments."
1039 0 : (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
1040 0 : (eol-type (plist-get props 'eol-type))
1041 : properties tmp)
1042 0 : (cond
1043 0 : ((eq eol-type 'lf) (setq eol-type 'unix))
1044 0 : ((eq eol-type 'crlf) (setq eol-type 'dos))
1045 0 : ((eq eol-type 'cr) (setq eol-type 'mac)))
1046 0 : (if (setq tmp (plist-get props 'post-read-conversion))
1047 0 : (setq properties (plist-put properties 'post-read-conversion tmp)))
1048 0 : (if (setq tmp (plist-get props 'pre-write-conversion))
1049 0 : (setq properties (plist-put properties 'pre-write-conversion tmp)))
1050 0 : (cond
1051 0 : ((eq type 'shift-jis)
1052 0 : `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
1053 0 : ((eq type 'iso2022) ; This is not perfect.
1054 0 : (if (plist-get props 'escape-quoted)
1055 0 : (error "escape-quoted is not supported: %S"
1056 0 : `(,name ,type ,doc-string ,props)))
1057 0 : (let ((g0 (plist-get props 'charset-g0))
1058 0 : (g1 (plist-get props 'charset-g1))
1059 0 : (g2 (plist-get props 'charset-g2))
1060 0 : (g3 (plist-get props 'charset-g3))
1061 : (use-roman
1062 0 : (and
1063 0 : (eq (cadr (assoc 'latin-jisx0201
1064 0 : (plist-get props 'input-charset-conversion)))
1065 0 : 'ascii)
1066 0 : (eq (cadr (assoc 'ascii
1067 0 : (plist-get props 'output-charset-conversion)))
1068 0 : 'latin-jisx0201)))
1069 : (use-oldjis
1070 0 : (and
1071 0 : (eq (cadr (assoc 'japanese-jisx0208-1978
1072 0 : (plist-get props 'input-charset-conversion)))
1073 0 : 'japanese-jisx0208)
1074 0 : (eq (cadr (assoc 'japanese-jisx0208
1075 0 : (plist-get props 'output-charset-conversion)))
1076 0 : 'japanese-jisx0208-1978))))
1077 0 : (if (charsetp g0)
1078 0 : (if (plist-get props 'force-g0-on-output)
1079 0 : (setq g0 `(nil ,g0))
1080 0 : (setq g0 `(,g0 t))))
1081 0 : (if (charsetp g1)
1082 0 : (if (plist-get props 'force-g1-on-output)
1083 0 : (setq g1 `(nil ,g1))
1084 0 : (setq g1 `(,g1 t))))
1085 0 : (if (charsetp g2)
1086 0 : (if (plist-get props 'force-g2-on-output)
1087 0 : (setq g2 `(nil ,g2))
1088 0 : (setq g2 `(,g2 t))))
1089 0 : (if (charsetp g3)
1090 0 : (if (plist-get props 'force-g3-on-output)
1091 0 : (setq g3 `(nil ,g3))
1092 0 : (setq g3 `(,g3 t))))
1093 0 : `(,name 2 ,mnemonic ,doc-string
1094 0 : (,g0 ,g1 ,g2 ,g3
1095 0 : ,(plist-get props 'short)
1096 0 : ,(not (plist-get props 'no-ascii-eol))
1097 0 : ,(not (plist-get props 'no-ascii-cntl))
1098 0 : ,(plist-get props 'seven)
1099 : t
1100 0 : ,(not (plist-get props 'lock-shift))
1101 0 : ,use-roman
1102 0 : ,use-oldjis
1103 0 : ,(plist-get props 'no-iso6429)
1104 : nil nil nil nil)
1105 0 : ,properties ,eol-type)))
1106 0 : ((eq type 'big5)
1107 0 : `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
1108 0 : ((eq type 'ccl)
1109 0 : `(,name 4 ,mnemonic ,doc-string
1110 0 : (,(plist-get props 'decode) . ,(plist-get props 'encode))
1111 0 : ,properties ,eol-type))
1112 : (t
1113 0 : (error "unsupported XEmacs style make-coding-style arguments: %S"
1114 0 : `(,name ,type ,doc-string ,props))))))
1115 :
1116 : (defun make-coding-system (coding-system type mnemonic doc-string
1117 : &optional
1118 : flags
1119 : properties
1120 : eol-type)
1121 : "Define a new coding system CODING-SYSTEM (symbol).
1122 : This function is provided for backward compatibility."
1123 : (declare (obsolete define-coding-system "23.1"))
1124 : ;; For compatibility with XEmacs, we check the type of TYPE. If it
1125 : ;; is a symbol, perhaps, this function is called with XEmacs-style
1126 : ;; arguments. Here, try to transform that kind of arguments to
1127 : ;; Emacs style.
1128 0 : (if (symbolp type)
1129 0 : (let ((args (transform-make-coding-system-args coding-system type
1130 0 : mnemonic doc-string)))
1131 0 : (setq coding-system (car args)
1132 0 : type (nth 1 args)
1133 0 : mnemonic (nth 2 args)
1134 0 : doc-string (nth 3 args)
1135 0 : flags (nth 4 args)
1136 0 : properties (nth 5 args)
1137 0 : eol-type (nth 6 args))))
1138 :
1139 0 : (setq type
1140 0 : (cond ((eq type 0) 'emacs-mule)
1141 0 : ((eq type 1) 'shift-jis)
1142 0 : ((eq type 2) 'iso2022)
1143 0 : ((eq type 3) 'big5)
1144 0 : ((eq type 4) 'ccl)
1145 0 : ((eq type 5) 'raw-text)
1146 : (t
1147 0 : (error "Invalid coding system type: %s" type))))
1148 :
1149 0 : (setq properties
1150 0 : (let ((plist nil) key)
1151 0 : (dolist (elt properties)
1152 0 : (setq key (car elt))
1153 0 : (cond ((eq key 'post-read-conversion)
1154 0 : (setq key :post-read-conversion))
1155 0 : ((eq key 'pre-write-conversion)
1156 0 : (setq key :pre-write-conversion))
1157 0 : ((eq key 'translation-table-for-decode)
1158 0 : (setq key :decode-translation-table))
1159 0 : ((eq key 'translation-table-for-encode)
1160 0 : (setq key :encode-translation-table))
1161 0 : ((eq key 'safe-charsets)
1162 0 : (setq key :charset-list))
1163 0 : ((eq key 'mime-charset)
1164 0 : (setq key :mime-charset))
1165 0 : ((eq key 'valid-codes)
1166 0 : (setq key :valids)))
1167 0 : (setq plist (plist-put plist key (cdr elt))))
1168 0 : plist))
1169 0 : (setq properties (plist-put properties :mnemonic mnemonic))
1170 0 : (plist-put properties :coding-type type)
1171 0 : (cond ((eq eol-type 0) (setq eol-type 'unix))
1172 0 : ((eq eol-type 1) (setq eol-type 'dos))
1173 0 : ((eq eol-type 2) (setq eol-type 'mac))
1174 0 : ((vectorp eol-type) (setq eol-type nil)))
1175 0 : (plist-put properties :eol-type eol-type)
1176 :
1177 0 : (cond
1178 0 : ((eq type 'iso2022)
1179 0 : (plist-put properties :flags
1180 0 : (list (and (or (consp (nth 0 flags))
1181 0 : (consp (nth 1 flags))
1182 0 : (consp (nth 2 flags))
1183 0 : (consp (nth 3 flags))) 'designation)
1184 0 : (or (nth 4 flags) 'long-form)
1185 0 : (and (nth 5 flags) 'ascii-at-eol)
1186 0 : (and (nth 6 flags) 'ascii-at-cntl)
1187 0 : (and (nth 7 flags) '7-bit)
1188 0 : (and (nth 8 flags) 'locking-shift)
1189 0 : (and (nth 9 flags) 'single-shift)
1190 0 : (and (nth 10 flags) 'use-roman)
1191 0 : (and (nth 11 flags) 'use-oldjis)
1192 0 : (or (nth 12 flags) 'direction)
1193 0 : (and (nth 13 flags) 'init-at-bol)
1194 0 : (and (nth 14 flags) 'designate-at-bol)
1195 0 : (and (nth 15 flags) 'safe)
1196 0 : (and (nth 16 flags) 'latin-extra)))
1197 0 : (plist-put properties :designation
1198 0 : (let ((vec (make-vector 4 nil)))
1199 0 : (dotimes (i 4)
1200 0 : (let ((spec (nth i flags)))
1201 0 : (if (eq spec t)
1202 0 : (aset vec i '(94 96))
1203 0 : (if (consp spec)
1204 0 : (progn
1205 0 : (if (memq t spec)
1206 0 : (setq spec (append (delq t spec) '(94 96))))
1207 0 : (aset vec i spec))))))
1208 0 : vec)))
1209 :
1210 0 : ((eq type 'ccl)
1211 0 : (plist-put properties :ccl-decoder (car flags))
1212 0 : (plist-put properties :ccl-encoder (cdr flags))))
1213 :
1214 0 : (apply 'define-coding-system coding-system doc-string properties))
1215 :
1216 : (defun merge-coding-systems (first second)
1217 : "Fill in any unspecified aspects of coding system FIRST from SECOND.
1218 : Return the resulting coding system."
1219 0 : (let ((base (coding-system-base second))
1220 0 : (eol (coding-system-eol-type second)))
1221 : ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
1222 0 : (if (eq (coding-system-base first) 'undecided)
1223 0 : (setq first (coding-system-change-text-conversion first base)))
1224 : ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
1225 0 : (if (and (vectorp (coding-system-eol-type first))
1226 0 : (numberp eol) (>= eol 0) (<= eol 2))
1227 0 : (setq first (coding-system-change-eol-conversion
1228 0 : first eol)))
1229 0 : first))
1230 :
1231 : (defun autoload-coding-system (symbol form)
1232 : "Define SYMBOL as a coding-system that is defined on demand.
1233 :
1234 : FORM is a form to evaluate to define the coding-system."
1235 0 : (put symbol 'coding-system-define-form form)
1236 0 : (setq coding-system-alist (cons (list (symbol-name symbol))
1237 0 : coding-system-alist))
1238 0 : (dolist (elt '("-unix" "-dos" "-mac"))
1239 0 : (let ((name (concat (symbol-name symbol) elt)))
1240 0 : (put (intern name) 'coding-system-define-form form)
1241 0 : (setq coding-system-alist (cons (list name) coding-system-alist)))))
1242 :
1243 : ;; This variable is set in these two cases:
1244 : ;; (1) A file is read by a coding system specified explicitly.
1245 : ;; `after-insert-file-set-coding' sets the car of this value to
1246 : ;; `coding-system-for-read', and sets the cdr to nil.
1247 : ;; (2) `set-buffer-file-coding-system' is called.
1248 : ;; The cdr of this value is set to the specified coding system.
1249 : ;; This variable is used for decoding in `revert-buffer' and encoding
1250 : ;; in `select-safe-coding-system'.
1251 : ;;
1252 : ;; When saving a buffer, if `buffer-file-coding-system-explicit' is
1253 : ;; already non-nil, `basic-save-buffer-1' sets its CAR to the value of
1254 : ;; `last-coding-system-used'. (It used to set it unconditionally, but
1255 : ;; that seems unnecessary; see Bug#4533.)
1256 :
1257 : (defvar buffer-file-coding-system-explicit nil
1258 : "The file coding system explicitly specified for the current buffer.
1259 : The value is a cons of coding systems for reading (decoding) and
1260 : writing (encoding).
1261 : Internal use only.")
1262 : (make-variable-buffer-local 'buffer-file-coding-system-explicit)
1263 : (put 'buffer-file-coding-system-explicit 'permanent-local t)
1264 :
1265 : (defun read-buffer-file-coding-system ()
1266 0 : (let* ((bcss (find-coding-systems-region (point-min) (point-max)))
1267 : (css-table
1268 0 : (unless (equal bcss '(undecided))
1269 0 : (append '("dos" "unix" "mac")
1270 0 : (delq nil (mapcar (lambda (cs)
1271 0 : (if (memq (coding-system-base cs) bcss)
1272 0 : (symbol-name cs)))
1273 0 : coding-system-list)))))
1274 : (combined-table
1275 0 : (if css-table
1276 0 : (completion-table-in-turn css-table coding-system-alist)
1277 0 : coding-system-alist))
1278 : (auto-cs
1279 0 : (unless find-file-literally
1280 0 : (save-excursion
1281 0 : (save-restriction
1282 0 : (widen)
1283 0 : (goto-char (point-min))
1284 0 : (funcall set-auto-coding-function
1285 0 : (or buffer-file-name "") (buffer-size))))))
1286 : (preferred
1287 0 : (let ((bfcs (default-value 'buffer-file-coding-system)))
1288 0 : (cons (and (or (equal bcss '(undecided))
1289 0 : (memq (coding-system-base bfcs) bcss))
1290 0 : bfcs)
1291 0 : (mapcar (lambda (cs)
1292 0 : (and (coding-system-p cs)
1293 0 : (coding-system-get cs :mime-charset)
1294 0 : (or (equal bcss '(undecided))
1295 0 : (memq (coding-system-base cs) bcss))
1296 0 : cs))
1297 0 : (coding-system-priority-list)))))
1298 : (default
1299 0 : (let ((current (coding-system-base buffer-file-coding-system)))
1300 : ;; Generally use as a default the first preferred coding-system
1301 : ;; different from the current coding-system, except for
1302 : ;; the case of auto-cs since choosing anything else is asking
1303 : ;; for trouble (would lead to using a different coding
1304 : ;; system than specified in the coding tag).
1305 0 : (or auto-cs
1306 0 : (car (delq nil
1307 0 : (mapcar (lambda (cs)
1308 0 : (if (eq current (coding-system-base cs))
1309 : nil
1310 0 : cs))
1311 0 : preferred))))))
1312 : (completion-ignore-case t)
1313 : (completion-pcm--delim-wild-regex ; Let "u8" complete to "utf-8".
1314 0 : (concat "\\(?:" completion-pcm--delim-wild-regex
1315 0 : "\\|\\([[:alpha:]]\\)[[:digit:]]\\)"))
1316 0 : (cs (completing-read
1317 0 : (format "Coding system for saving file (default %s): " default)
1318 0 : combined-table
1319 : nil t nil 'coding-system-history
1320 0 : (if default (symbol-name default)))))
1321 0 : (unless (zerop (length cs)) (intern cs))))
1322 :
1323 : (defun set-buffer-file-coding-system (coding-system &optional force nomodify)
1324 : "Set the file coding-system of the current buffer to CODING-SYSTEM.
1325 : This means that when you save the buffer, it will be converted
1326 : according to CODING-SYSTEM. For a list of possible values of
1327 : CODING-SYSTEM, use \\[list-coding-systems].
1328 :
1329 : If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves
1330 : the end-of-line conversion unspecified, FORCE controls what to do.
1331 : If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's
1332 : previous `buffer-file-coding-system' value (if it is specified there).
1333 : Otherwise, leave it unspecified.
1334 :
1335 : This marks the buffer modified so that the succeeding \\[save-buffer]
1336 : surely saves the buffer with CODING-SYSTEM. From a program, if you
1337 : don't want to mark the buffer modified, specify t for NOMODIFY.
1338 : If you know exactly what coding system you want to use,
1339 : just set the variable `buffer-file-coding-system' directly."
1340 : (interactive
1341 0 : (list (read-buffer-file-coding-system)
1342 0 : current-prefix-arg))
1343 0 : (check-coding-system coding-system)
1344 0 : (if (and coding-system buffer-file-coding-system (null force))
1345 0 : (setq coding-system
1346 0 : (merge-coding-systems coding-system buffer-file-coding-system)))
1347 0 : (when (and (called-interactively-p 'interactive)
1348 0 : (not (memq 'emacs (coding-system-get coding-system
1349 0 : :charset-list))))
1350 : ;; Check whether save would succeed, and jump to the offending char(s)
1351 : ;; if not.
1352 0 : (let ((css (find-coding-systems-region (point-min) (point-max))))
1353 0 : (unless (or (eq (car css) 'undecided)
1354 0 : (memq (coding-system-base coding-system) css))
1355 0 : (setq coding-system (select-safe-coding-system-interactively
1356 0 : (point-min) (point-max) css
1357 0 : (list coding-system))))))
1358 0 : (setq buffer-file-coding-system coding-system)
1359 0 : (if buffer-file-coding-system-explicit
1360 0 : (setcdr buffer-file-coding-system-explicit coding-system)
1361 0 : (setq buffer-file-coding-system-explicit (cons nil coding-system)))
1362 0 : (unless nomodify
1363 0 : (set-buffer-modified-p t))
1364 0 : (force-mode-line-update))
1365 :
1366 : (defun revert-buffer-with-coding-system (coding-system &optional force)
1367 : "Visit the current buffer's file again using coding system CODING-SYSTEM.
1368 : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1369 :
1370 : If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves
1371 : the end-of-line conversion unspecified, FORCE controls what to do.
1372 : If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's
1373 : previous `buffer-file-coding-system' value (if it is specified there).
1374 : Otherwise, determine it from the file contents as usual for visiting a file."
1375 : (interactive "zCoding system for visited file (default nil): \nP")
1376 0 : (check-coding-system coding-system)
1377 0 : (if (and coding-system buffer-file-coding-system (null force))
1378 0 : (setq coding-system
1379 0 : (merge-coding-systems coding-system buffer-file-coding-system)))
1380 0 : (let ((coding-system-for-read coding-system))
1381 0 : (revert-buffer)))
1382 :
1383 : (defun set-file-name-coding-system (coding-system)
1384 : "Set coding system for decoding and encoding file names to CODING-SYSTEM.
1385 : It actually just set the variable `file-name-coding-system' (which see)
1386 : to CODING-SYSTEM."
1387 : (interactive "zCoding system for file names (default nil): ")
1388 0 : (check-coding-system coding-system)
1389 0 : (if (and coding-system
1390 0 : (not (coding-system-get coding-system :ascii-compatible-p))
1391 0 : (not (coding-system-get coding-system :suitable-for-file-name)))
1392 0 : (error "%s is not suitable for file names" coding-system))
1393 0 : (setq file-name-coding-system coding-system))
1394 :
1395 : (defvar default-terminal-coding-system nil
1396 : "Default value for the terminal coding system.
1397 : This is normally set according to the selected language environment.
1398 : See also the command `set-terminal-coding-system'.")
1399 :
1400 : (defun set-terminal-coding-system (coding-system &optional terminal)
1401 : "Set coding system of terminal output to CODING-SYSTEM.
1402 : All text output to TERMINAL will be encoded
1403 : with the specified coding system.
1404 :
1405 : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1406 : The default is determined by the selected language environment
1407 : or by the previous use of this command.
1408 :
1409 : TERMINAL may be a terminal object, a frame, or nil for the
1410 : selected frame's terminal. The setting has no effect on
1411 : graphical terminals."
1412 : (interactive
1413 0 : (list (let ((default (if (and (not (terminal-coding-system))
1414 0 : default-terminal-coding-system)
1415 0 : default-terminal-coding-system)))
1416 0 : (read-coding-system
1417 0 : (format "Coding system for terminal display (default %s): "
1418 0 : default)
1419 0 : default))))
1420 0 : (if (and (not coding-system)
1421 0 : (not (terminal-coding-system)))
1422 0 : (setq coding-system default-terminal-coding-system))
1423 0 : (if coding-system
1424 0 : (setq default-terminal-coding-system coding-system))
1425 0 : (set-terminal-coding-system-internal coding-system terminal)
1426 0 : (redraw-frame))
1427 :
1428 : (defvar default-keyboard-coding-system nil
1429 : "Default value of the keyboard coding system.
1430 : This is normally set according to the selected language environment.
1431 : See also the command `set-keyboard-coding-system'.")
1432 :
1433 : (defun set-keyboard-coding-system (coding-system &optional terminal)
1434 : "Set coding system for keyboard input on TERMINAL to CODING-SYSTEM.
1435 :
1436 : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
1437 : The default is determined by the selected language environment
1438 : or by the previous use of this command.
1439 :
1440 : If CODING-SYSTEM is nil or the coding-type of CODING-SYSTEM is
1441 : `raw-text', the decoding of keyboard input is disabled.
1442 :
1443 : TERMINAL may be a terminal object, a frame, or nil for the
1444 : selected frame's terminal. The setting has no effect on
1445 : graphical terminals."
1446 : (interactive
1447 0 : (list (let* ((coding (keyboard-coding-system nil))
1448 0 : (default (if (eq (coding-system-type coding) 'raw-text)
1449 0 : default-keyboard-coding-system)))
1450 0 : (read-coding-system
1451 0 : (format "Coding system for keyboard input (default %s): "
1452 0 : default)
1453 0 : default))))
1454 0 : (let ((coding-type (coding-system-type coding-system))
1455 : (saved-meta-mode
1456 0 : (terminal-parameter terminal 'keyboard-coding-saved-meta-mode)))
1457 0 : (let (accept-8-bit)
1458 0 : (if (not (or (coding-system-get coding-system :suitable-for-keyboard)
1459 0 : (coding-system-get coding-system :ascii-compatible-p)))
1460 0 : (error "Unsuitable coding system for keyboard: %s" coding-system))
1461 0 : (cond ((memq coding-type '(raw-text charset utf-8 shift-jis big5 ccl))
1462 0 : (setq accept-8-bit t))
1463 0 : ((eq coding-type 'iso-2022)
1464 0 : (let ((flags (coding-system-get coding-system :flags)))
1465 0 : (or (memq '7-bit flags)
1466 0 : (setq accept-8-bit t))))
1467 : (t
1468 0 : (error "Unsupported coding system for keyboard: %s"
1469 0 : coding-system)))
1470 0 : (if accept-8-bit
1471 0 : (progn
1472 0 : (or saved-meta-mode
1473 0 : (set-terminal-parameter terminal
1474 : 'keyboard-coding-saved-meta-mode
1475 0 : (cons (nth 2 (current-input-mode))
1476 0 : nil)))
1477 0 : (set-input-meta-mode 8 terminal))
1478 0 : (when saved-meta-mode
1479 0 : (set-input-meta-mode (car saved-meta-mode) terminal)
1480 0 : (set-terminal-parameter terminal
1481 : 'keyboard-coding-saved-meta-mode
1482 0 : nil)))
1483 : ;; Avoid end-of-line conversion.
1484 0 : (setq coding-system
1485 0 : (coding-system-change-eol-conversion coding-system 'unix))))
1486 0 : (set-keyboard-coding-system-internal coding-system terminal)
1487 0 : (setq keyboard-coding-system coding-system))
1488 :
1489 : (defcustom keyboard-coding-system nil
1490 : "Specify coding system for keyboard input.
1491 : If you set this on a terminal which can't distinguish Meta keys from
1492 : 8-bit characters, you will have to use ESC to type Meta characters.
1493 : See Info node `Terminal Coding' and Info node `Unibyte Mode'.
1494 :
1495 : On non-windowing terminals, this is set from the locale by default.
1496 :
1497 : Setting this variable directly does not take effect;
1498 : use either \\[customize] or \\[set-keyboard-coding-system]."
1499 : :type '(coding-system :tag "Coding system")
1500 : :link '(info-link "(emacs)Terminal Coding")
1501 : :link '(info-link "(emacs)Unibyte Mode")
1502 : :set (lambda (_symbol value)
1503 : ;; Don't load encoded-kb unnecessarily.
1504 : (if (or value (boundp 'encoded-kbd-setup-display))
1505 : (set-keyboard-coding-system value)
1506 : (set-default 'keyboard-coding-system nil))) ; must initialize
1507 : :version "22.1"
1508 : :group 'keyboard
1509 : :group 'mule)
1510 :
1511 : (defun set-buffer-process-coding-system (decoding encoding)
1512 : "Set coding systems for the process associated with the current buffer.
1513 : DECODING is the coding system to be used to decode input from the process,
1514 : ENCODING is the coding system to be used to encode output to the process.
1515 :
1516 : For a list of possible coding systems, use \\[list-coding-systems]."
1517 : (interactive
1518 : "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
1519 71 : (let ((proc (get-buffer-process (current-buffer))))
1520 71 : (if (null proc)
1521 0 : (error "No process")
1522 71 : (check-coding-system decoding)
1523 71 : (check-coding-system encoding)
1524 71 : (set-process-coding-system proc decoding encoding)))
1525 71 : (force-mode-line-update))
1526 :
1527 : (defalias 'set-clipboard-coding-system 'set-selection-coding-system)
1528 :
1529 : (defun set-selection-coding-system (coding-system)
1530 : "Make CODING-SYSTEM used for communicating with other X clients.
1531 : When sending or receiving text via cut_buffer, selection, and clipboard,
1532 : the text is encoded or decoded by CODING-SYSTEM."
1533 : (interactive "zCoding system for X selection: ")
1534 1 : (check-coding-system coding-system)
1535 1 : (setq selection-coding-system coding-system))
1536 :
1537 : ;; Coding system lastly specified by the command
1538 : ;; set-next-selection-coding-system.
1539 : (defvar last-next-selection-coding-system nil)
1540 :
1541 : (defun set-next-selection-coding-system (coding-system)
1542 : "Use CODING-SYSTEM for next communication with other window system clients.
1543 : This setting is effective for the next communication only."
1544 : (interactive
1545 0 : (list (read-coding-system
1546 0 : (if last-next-selection-coding-system
1547 0 : (format "Coding system for the next selection (default %S): "
1548 0 : last-next-selection-coding-system)
1549 0 : "Coding system for the next selection: ")
1550 0 : last-next-selection-coding-system)))
1551 0 : (if coding-system
1552 0 : (setq last-next-selection-coding-system coding-system)
1553 0 : (setq coding-system last-next-selection-coding-system))
1554 0 : (check-coding-system coding-system)
1555 :
1556 0 : (setq next-selection-coding-system coding-system))
1557 :
1558 : (defun set-coding-priority (arg)
1559 : "Set priority of coding categories according to ARG.
1560 : ARG is a list of coding categories ordered by priority.
1561 :
1562 : This function is provided for backward compatibility."
1563 : (declare (obsolete set-coding-system-priority "23.1"))
1564 0 : (apply 'set-coding-system-priority
1565 0 : (mapcar #'(lambda (x) (symbol-value x)) arg)))
1566 :
1567 : ;;; X selections
1568 :
1569 : (defvar ctext-non-standard-encodings-alist
1570 : (mapcar 'purecopy
1571 : '(("big5-0" big5 2 big5)
1572 : ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
1573 : ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
1574 : ("gbk-0" gbk 2 chinese-gbk)
1575 : ("koi8-r" koi8-r 1 koi8-r)
1576 : ("microsoft-cp1251" windows-1251 1 windows-1251)))
1577 : "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
1578 :
1579 : It controls how extended segments of a compound text are handled
1580 : by the coding system `compound-text-with-extensions'.
1581 :
1582 : Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
1583 :
1584 : ENCODING-NAME is an encoding name of an \"extended segment\".
1585 :
1586 : CODING-SYSTEM is the coding-system to encode (or decode) the
1587 : characters into (or from) the extended segment.
1588 :
1589 : N-OCTET is the number of octets (bytes) that encodes a character
1590 : in the segment. It can be 0 (meaning the number of octets per
1591 : character is variable), 1, 2, 3, or 4.
1592 :
1593 : CHARSET is a character set containing characters that are encoded
1594 : in the segment. It can be a list of character sets.
1595 :
1596 : On decoding CTEXT, all encoding names listed here are recognized.
1597 :
1598 : On encoding CTEXT, encoding names in the variable
1599 : `ctext-non-standard-encodings' (which see) and in the information
1600 : listed for the current language environment under the key
1601 : `ctext-non-standard-encodings' are used.")
1602 :
1603 : (defvar ctext-non-standard-encodings nil
1604 : "List of non-standard encoding names used in extended segments of CTEXT.
1605 : Each element must be one of the names listed in the variable
1606 : `ctext-non-standard-encodings-alist' (which see).")
1607 :
1608 : (defvar ctext-non-standard-encodings-regexp
1609 : (purecopy
1610 : (string-to-multibyte
1611 : (concat
1612 : ;; For non-standard encodings.
1613 : "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
1614 : "\\|"
1615 : ;; For UTF-8 encoding.
1616 : "\\(\e%G[^\e]*\e%@\\)"))))
1617 :
1618 : ;; Functions to support "Non-Standard Character Set Encodings" defined
1619 : ;; by the COMPOUND-TEXT spec. They also support "The UTF-8 encoding"
1620 : ;; described in the section 7 of the documentation of COMPOUND-TEXT
1621 : ;; distributed with XFree86.
1622 :
1623 : (defun ctext-post-read-conversion (len)
1624 : "Decode LEN characters encoded as Compound Text with Extended Segments."
1625 : ;; We don't need the following because it is expected that this
1626 : ;; function is mainly used for decoding X selection which is not
1627 : ;; that big data.
1628 : ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
1629 0 : (save-match-data
1630 0 : (save-restriction
1631 0 : (narrow-to-region (point) (+ (point) len))
1632 0 : (let ((case-fold-search nil)
1633 : last-coding-system-used
1634 : pos bytes)
1635 0 : (decode-coding-region (point-min) (point-max) 'ctext)
1636 0 : (while (re-search-forward ctext-non-standard-encodings-regexp
1637 0 : nil 'move)
1638 0 : (setq pos (match-beginning 0))
1639 0 : (if (match-beginning 1)
1640 : ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
1641 0 : (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4))))
1642 0 : (L (multibyte-char-to-unibyte (char-after (+ pos 5))))
1643 0 : (encoding (match-string 2))
1644 0 : (encoding-info (assoc-string
1645 0 : encoding
1646 0 : ctext-non-standard-encodings-alist t))
1647 0 : (coding (if encoding-info
1648 0 : (nth 1 encoding-info)
1649 0 : (setq encoding (intern (downcase encoding)))
1650 0 : (and (coding-system-p encoding)
1651 0 : encoding))))
1652 0 : (setq bytes (- (+ (* (- M 128) 128) (- L 128))
1653 0 : (- (point) (+ pos 6))))
1654 0 : (when coding
1655 0 : (delete-region pos (point))
1656 0 : (forward-char bytes)
1657 0 : (decode-coding-region (- (point) bytes) (point) coding)))
1658 : ;; ESC % G --UTF-8-BYTES-- ESC % @
1659 0 : (delete-char -3)
1660 0 : (delete-region pos (+ pos 3))
1661 0 : (decode-coding-region pos (point) 'utf-8))))
1662 0 : (goto-char (point-min))
1663 0 : (- (point-max) (point)))))
1664 :
1665 : (defvar ctext-standard-encodings
1666 : '(ascii latin-jisx0201 katakana-jisx0201
1667 : latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
1668 : greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
1669 : latin-iso8859-9
1670 : chinese-gb2312 japanese-jisx0208 korean-ksc5601)
1671 : "List of approved standard encodings (i.e. charsets) of X's Compound Text.
1672 : Coding-system `compound-text-with-extensions' encodes a character
1673 : belonging to any of those charsets using the normal ISO2022
1674 : designation sequence unless the current language environment or
1675 : the variable `ctext-non-standard-encodings' decide to use an extended
1676 : segment of CTEXT for that character. See also the documentation
1677 : of `ctext-non-standard-encodings-alist'.")
1678 :
1679 : ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
1680 : ;; `ctext-non-standard-encodings' and a list specified by the key
1681 : ;; `ctext-non-standard-encodings' for the current language
1682 : ;; environment. CTEXT-USAGE-INFO is one of the element of
1683 : ;; `ctext-non-standard-encodings-alist' or nil. In the former case, a
1684 : ;; character in CHARSET is encoded using extended segment. In the
1685 : ;; latter case, a character in CHARSET is encoded using normal ISO2022
1686 : ;; designation sequence. If a character is not in any of CHARSETs, it
1687 : ;; is encoded using UTF-8 encoding extension.
1688 :
1689 : (defun ctext-non-standard-encodings-table ()
1690 0 : (let* ((table (append ctext-non-standard-encodings
1691 0 : (copy-sequence
1692 0 : (get-language-info current-language-environment
1693 0 : 'ctext-non-standard-encodings))))
1694 0 : (tail table)
1695 : elt)
1696 0 : (while tail
1697 0 : (setq elt (car tail))
1698 0 : (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
1699 0 : (charset (nth 3 slot)))
1700 0 : (if (charsetp charset)
1701 0 : (setcar tail
1702 0 : (cons (plist-get (charset-plist charset) :base) slot))
1703 0 : (setcar tail (cons (car charset) slot))
1704 0 : (dolist (cs (cdr charset))
1705 0 : (setcdr tail
1706 0 : (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
1707 0 : (cdr tail)))
1708 0 : (setq tail (cdr tail))))
1709 0 : (setq tail (cdr tail))))
1710 0 : table))
1711 :
1712 : (defun ctext-pre-write-conversion (from to)
1713 : "Encode characters between FROM and TO as Compound Text w/Extended Segments.
1714 :
1715 : If FROM is a string, generate a new temp buffer, insert the text,
1716 : and convert it in the temporary buffer. Otherwise, convert
1717 : in-place."
1718 0 : (save-match-data
1719 : ;; Setup a working buffer if necessary.
1720 0 : (when (stringp from)
1721 0 : (set-buffer (generate-new-buffer " *temp"))
1722 0 : (set-buffer-multibyte (multibyte-string-p from))
1723 0 : (insert from)
1724 0 : (setq from (point-min) to (point-max)))
1725 0 : (save-restriction
1726 0 : (narrow-to-region from to)
1727 0 : (goto-char from)
1728 0 : (let ((encoding-table (ctext-non-standard-encodings-table))
1729 0 : (charset-list (sort-charsets
1730 0 : (copy-sequence ctext-standard-encodings)))
1731 0 : (end-pos (make-marker))
1732 : last-coding-system-used
1733 : last-pos charset encoding-info)
1734 0 : (dolist (elt encoding-table)
1735 0 : (push (car elt) charset-list))
1736 0 : (setq end-pos (point-marker))
1737 0 : (while (re-search-forward "[^\0-\177]+" nil t)
1738 : ;; Found a sequence of non-ASCII characters.
1739 0 : (set-marker end-pos (match-end 0))
1740 0 : (goto-char (match-beginning 0))
1741 0 : (setq last-pos (point)
1742 0 : charset (char-charset (following-char) charset-list))
1743 0 : (forward-char 1)
1744 0 : (while (and (< (point) end-pos)
1745 0 : (eq charset (char-charset (following-char) charset-list)))
1746 0 : (forward-char 1))
1747 0 : (if charset
1748 0 : (if (setq encoding-info (cdr (assq charset encoding-table)))
1749 : ;; Encode this range using an extended segment.
1750 0 : (let ((encoding-name (car encoding-info))
1751 0 : (coding-system (nth 1 encoding-info))
1752 0 : (noctets (nth 2 encoding-info))
1753 : len)
1754 0 : (encode-coding-region last-pos (point) coding-system)
1755 0 : (setq len (+ (length encoding-name) 1
1756 0 : (- (point) last-pos)))
1757 : ;; According to the spec of CTEXT, it is not
1758 : ;; necessary to produce this extra designation
1759 : ;; sequence, but some buggy application
1760 : ;; (e.g. crxvt-gb) requires it.
1761 0 : (insert "\e(B")
1762 0 : (save-excursion
1763 0 : (goto-char last-pos)
1764 0 : (insert (format "\e%%/%d" noctets))
1765 0 : (insert-byte (+ (/ len 128) 128) 1)
1766 0 : (insert-byte (+ (% len 128) 128) 1)
1767 0 : (insert encoding-name)
1768 0 : (insert 2)))
1769 : ;; Encode this range as characters in CHARSET.
1770 0 : (put-text-property last-pos (point) 'charset charset))
1771 : ;; Encode this range using UTF-8 encoding extension.
1772 0 : (encode-coding-region last-pos (point) 'mule-utf-8)
1773 0 : (save-excursion
1774 0 : (goto-char last-pos)
1775 0 : (insert "\e%G"))
1776 0 : (insert "\e%@")))
1777 0 : (goto-char (point-min)))))
1778 : ;; Must return nil, as build_annotations_2 expects that.
1779 : nil)
1780 :
1781 : ;;; FILE I/O
1782 :
1783 : ;; TODO many elements of this list are also in inhibit-local-variables-regexps.
1784 : (defcustom auto-coding-alist
1785 : ;; .exe and .EXE are added to support archive-mode looking at DOS
1786 : ;; self-extracting exe archives.
1787 : (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1788 : '(("\\.\\(\
1789 : arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|7z\\|\
1790 : ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|7Z\\)\\'"
1791 : . no-conversion-multibyte)
1792 : ("\\.\\(exe\\|EXE\\)\\'" . no-conversion)
1793 : ("\\.\\(sx[dmicw]\\|odt\\|tar\\|t[bg]z\\)\\'" . no-conversion)
1794 : ("\\.\\(gz\\|Z\\|bz\\|bz2\\|xz\\|gpg\\)\\'" . no-conversion)
1795 : ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
1796 : ("\\.pdf\\'" . no-conversion)
1797 : ("/#[^/]+#\\'" . utf-8-emacs-unix)))
1798 : "Alist of filename patterns vs corresponding coding systems.
1799 : Each element looks like (REGEXP . CODING-SYSTEM).
1800 : A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
1801 :
1802 : The settings in this alist take priority over `coding:' tags
1803 : in the file (see the function `set-auto-coding')
1804 : and the contents of `file-coding-system-alist'."
1805 : :version "24.1" ; added xz
1806 : :group 'files
1807 : :group 'mule
1808 : :type '(repeat (cons (regexp :tag "File name regexp")
1809 : (symbol :tag "Coding system"))))
1810 :
1811 : (defcustom auto-coding-regexp-alist
1812 : (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
1813 : '(("\\`BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
1814 : ("\\`\xFE\xFF" . utf-16be-with-signature)
1815 : ("\\`\xFF\xFE" . utf-16le-with-signature)
1816 : ("\\`\xEF\xBB\xBF" . utf-8-with-signature)
1817 : ("\\`;ELC\024\0\0\0" . emacs-mule))) ; Emacs 20-compiled
1818 : "Alist of patterns vs corresponding coding systems.
1819 : Each element looks like (REGEXP . CODING-SYSTEM).
1820 : A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1821 :
1822 : The settings in this alist take priority over `coding:' tags
1823 : in the file (see the function `set-auto-coding')
1824 : and the contents of `file-coding-system-alist'."
1825 : :group 'files
1826 : :group 'mule
1827 : :type '(repeat (cons (regexp :tag "Regexp")
1828 : (symbol :tag "Coding system"))))
1829 :
1830 : (defun auto-coding-regexp-alist-lookup (from to)
1831 : "Lookup `auto-coding-regexp-alist' for the contents of the current buffer.
1832 : The value is a coding system is specified for the region FROM and TO,
1833 : or nil."
1834 326 : (save-excursion
1835 326 : (goto-char from)
1836 326 : (let ((alist auto-coding-regexp-alist)
1837 : coding-system)
1838 1956 : (while (and alist (not coding-system))
1839 1630 : (let ((regexp (car (car alist))))
1840 1630 : (if enable-multibyte-characters
1841 1630 : (setq regexp (string-to-multibyte regexp)))
1842 1630 : (if (re-search-forward regexp to t)
1843 0 : (setq coding-system (cdr (car alist)))
1844 1630 : (setq alist (cdr alist)))))
1845 326 : coding-system)))
1846 :
1847 : ;; See the bottom of this file for built-in auto coding functions.
1848 : (defcustom auto-coding-functions '(sgml-xml-auto-coding-function
1849 : sgml-html-meta-auto-coding-function)
1850 : "A list of functions which attempt to determine a coding system.
1851 :
1852 : Each function in this list should be written to operate on the
1853 : current buffer, but should not modify it in any way. The buffer
1854 : will contain undecoded text of parts of the file. Each function
1855 : should take one argument, SIZE, which says how many characters
1856 : \(starting from point) it should look at.
1857 :
1858 : If one of these functions succeeds in determining a coding
1859 : system, it should return that coding system. Otherwise, it
1860 : should return nil.
1861 :
1862 : If a file has a `coding:' tag, that takes precedence over these
1863 : functions, so they won't be called at all."
1864 : :group 'files
1865 : :group 'mule
1866 : :type '(repeat function))
1867 :
1868 : (defvar set-auto-coding-for-load nil
1869 : "Non-nil means respect a \"unibyte: t\" entry in file local variables.
1870 : Emacs binds this variable to t when loading or byte-compiling Emacs Lisp
1871 : files.")
1872 :
1873 : (defun auto-coding-alist-lookup (filename)
1874 : "Return the coding system specified by `auto-coding-alist' for FILENAME."
1875 326 : (let ((alist auto-coding-alist)
1876 326 : (case-fold-search (file-name-case-insensitive-p filename))
1877 : coding-system)
1878 2608 : (while (and alist (not coding-system))
1879 2282 : (if (string-match (car (car alist)) filename)
1880 0 : (setq coding-system (cdr (car alist)))
1881 2282 : (setq alist (cdr alist))))
1882 326 : coding-system))
1883 :
1884 : (put 'enable-character-translation 'permanent-local t)
1885 : (put 'enable-character-translation 'safe-local-variable 'booleanp)
1886 :
1887 : (defun find-auto-coding (filename size)
1888 : "Find a coding system for a file FILENAME of which SIZE bytes follow point.
1889 : These bytes should include at least the first 1k of the file
1890 : and the last 3k of the file, but the middle may be omitted.
1891 :
1892 : The function checks FILENAME against the variable `auto-coding-alist'.
1893 : If FILENAME doesn't match any entries in the variable, it checks the
1894 : contents of the current buffer following point against
1895 : `auto-coding-regexp-alist'. If no match is found, it checks for a
1896 : `coding:' tag in the first one or two lines following point. If no
1897 : `coding:' tag is found, it checks any local variables list in the last
1898 : 3K bytes out of the SIZE bytes. Finally, if none of these methods
1899 : succeed, it checks to see if any function in `auto-coding-functions'
1900 : gives a match.
1901 :
1902 : If a coding system is specified, the return value is a cons
1903 : \(CODING . SOURCE), where CODING is the specified coding system and
1904 : SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist',
1905 : `:coding', or `auto-coding-functions' indicating by what CODING is
1906 : specified. Note that the validity of CODING is not checked;
1907 : it's the caller's responsibility to check it.
1908 :
1909 : If nothing is specified, the return value is nil."
1910 326 : (or (let ((coding-system (auto-coding-alist-lookup filename)))
1911 326 : (if coding-system
1912 326 : (cons coding-system 'auto-coding-alist)))
1913 : ;; Try using `auto-coding-regexp-alist'.
1914 326 : (let ((coding-system (auto-coding-regexp-alist-lookup (point)
1915 326 : (+ (point) size))))
1916 326 : (if coding-system
1917 326 : (cons coding-system 'auto-coding-regexp-alist)))
1918 326 : (let* ((case-fold-search t)
1919 326 : (head-start (point))
1920 326 : (head-end (+ head-start (min size 1024)))
1921 326 : (tail-start (+ head-start (max (- size 3072) 0)))
1922 326 : (tail-end (+ head-start size))
1923 : coding-system head-found tail-found char-trans)
1924 : ;; Try a short cut by searching for the string "coding:"
1925 : ;; and for "unibyte:" at the head and tail of SIZE bytes.
1926 326 : (setq head-found (or (search-forward "coding:" head-end t)
1927 305 : (search-forward "unibyte:" head-end t)
1928 305 : (search-forward "enable-character-translation:"
1929 326 : head-end t)))
1930 326 : (if (and head-found (> head-found tail-start))
1931 : ;; Head and tail are overlapped.
1932 10 : (setq tail-found head-found)
1933 316 : (goto-char tail-start)
1934 316 : (setq tail-found (or (search-forward "coding:" tail-end t)
1935 287 : (search-forward "unibyte:" tail-end t)
1936 287 : (search-forward "enable-character-translation:"
1937 326 : tail-end t))))
1938 :
1939 : ;; At first check the head.
1940 326 : (when head-found
1941 21 : (goto-char head-start)
1942 21 : (setq head-end (set-auto-mode-1))
1943 21 : (setq head-start (point))
1944 21 : (when (and head-end (< head-found head-end))
1945 21 : (goto-char head-start)
1946 21 : (when (and set-auto-coding-for-load
1947 0 : (re-search-forward
1948 : "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
1949 21 : head-end t))
1950 0 : (display-warning 'mule
1951 0 : (format "\"unibyte: t\" (in %s) is obsolete; \
1952 : use \"coding: 'raw-text\" instead."
1953 0 : (file-relative-name filename))
1954 0 : :warning)
1955 21 : (setq coding-system 'raw-text))
1956 21 : (when (and (not coding-system)
1957 21 : (re-search-forward
1958 : "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
1959 21 : head-end t))
1960 21 : (setq coding-system (intern (match-string 2))))
1961 21 : (when (re-search-forward
1962 : "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)"
1963 21 : head-end t)
1964 326 : (setq char-trans (match-string 2)))))
1965 :
1966 : ;; If no coding: tag in the head, check the tail.
1967 : ;; Here we must pay attention to the case that the end-of-line
1968 : ;; is just "\r" and we can't use "^" nor "$" in regexp.
1969 326 : (when (and tail-found (or (not coding-system) (not char-trans)))
1970 39 : (goto-char tail-start)
1971 39 : (re-search-forward "[\r\n]\^L" tail-end t)
1972 39 : (if (re-search-forward
1973 : "[\r\n]\\([^\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
1974 39 : tail-end t)
1975 : ;; The prefix is what comes before "local variables:" in its
1976 : ;; line. The suffix is what comes after "local variables:"
1977 : ;; in its line.
1978 29 : (let* ((prefix (regexp-quote (match-string 1)))
1979 29 : (suffix (regexp-quote (match-string 2)))
1980 : (re-coding
1981 29 : (concat
1982 29 : "[\r\n]" prefix
1983 : ;; N.B. without the \n below, the regexp can
1984 : ;; eat newlines.
1985 : "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
1986 29 : suffix "[\r\n]"))
1987 : (re-unibyte
1988 29 : (concat
1989 29 : "[\r\n]" prefix
1990 : "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
1991 29 : suffix "[\r\n]"))
1992 : (re-char-trans
1993 29 : (concat
1994 29 : "[\r\n]" prefix
1995 : "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
1996 29 : suffix "[\r\n]"))
1997 : (re-end
1998 29 : (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
1999 29 : "[\r\n]?"))
2000 29 : (pos (1- (point))))
2001 29 : (forward-char -1) ; skip back \r or \n.
2002 29 : (re-search-forward re-end tail-end 'move)
2003 29 : (setq tail-end (point))
2004 29 : (goto-char pos)
2005 29 : (when (and set-auto-coding-for-load
2006 29 : (re-search-forward re-unibyte tail-end t))
2007 0 : (display-warning 'mule "\"unibyte: t\" is obsolete; \
2008 0 : use \"coding: 'raw-text\" instead." :warning)
2009 29 : (setq coding-system 'raw-text))
2010 29 : (when (and (not coding-system)
2011 29 : (re-search-forward re-coding tail-end t))
2012 29 : (setq coding-system (intern (match-string 1))))
2013 29 : (when (and (not char-trans)
2014 29 : (re-search-forward re-char-trans tail-end t))
2015 326 : (setq char-trans (match-string 1))))))
2016 326 : (if coding-system
2017 : ;; If the coding-system name ends with "!", remove it and
2018 : ;; set char-trans to "nil".
2019 50 : (let ((name (symbol-name coding-system)))
2020 50 : (if (= (aref name (1- (length name))) ?!)
2021 0 : (setq coding-system (intern (substring name 0 -1))
2022 326 : char-trans "nil"))))
2023 326 : (when (and char-trans
2024 326 : (not (setq char-trans (intern char-trans))))
2025 0 : (make-local-variable 'enable-character-translation)
2026 326 : (setq enable-character-translation nil))
2027 326 : (if coding-system
2028 326 : (cons coding-system :coding)))
2029 : ;; Finally, try all the `auto-coding-functions'.
2030 276 : (let ((funcs auto-coding-functions)
2031 : (coding-system nil))
2032 828 : (while (and funcs (not coding-system))
2033 552 : (setq coding-system (ignore-errors
2034 552 : (save-excursion
2035 552 : (goto-char (point-min))
2036 1104 : (funcall (pop funcs) size)))))
2037 276 : (if coding-system
2038 326 : (cons coding-system 'auto-coding-functions)))))
2039 :
2040 : (defun set-auto-coding (filename size)
2041 : "Return coding system for a file FILENAME of which SIZE bytes follow point.
2042 : See `find-auto-coding' for how the coding system is found.
2043 : Return nil if an invalid coding system is found.
2044 :
2045 : The variable `set-auto-coding-function' (which see) is set to this
2046 : function by default."
2047 316 : (let ((found (find-auto-coding filename size)))
2048 316 : (if (and found (coding-system-p (car found)))
2049 316 : (car found))))
2050 :
2051 : (setq set-auto-coding-function 'set-auto-coding)
2052 :
2053 : (defun after-insert-file-set-coding (inserted &optional visit)
2054 : "Set `buffer-file-coding-system' of current buffer after text is inserted.
2055 : INSERTED is the number of characters that were inserted, as figured
2056 : in the situation before this function. Return the number of characters
2057 : inserted, as figured in the situation after. The two numbers can be
2058 : different if the buffer has become unibyte.
2059 : The optional second arg VISIT non-nil means that we are visiting a file."
2060 1185 : (if (and visit
2061 155 : coding-system-for-read
2062 1185 : (not (eq coding-system-for-read 'auto-save-coding)))
2063 0 : (setq buffer-file-coding-system-explicit
2064 1185 : (cons coding-system-for-read nil)))
2065 1185 : (if last-coding-system-used
2066 1185 : (let ((coding-system
2067 1185 : (find-new-buffer-file-coding-system last-coding-system-used)))
2068 1185 : (if coding-system
2069 1185 : (setq buffer-file-coding-system coding-system))))
2070 1185 : inserted)
2071 :
2072 : ;; The coding-spec and eol-type of coding-system returned is decided
2073 : ;; independently in the following order.
2074 : ;; 1. That of buffer-file-coding-system locally bound.
2075 : ;; 2. That of CODING.
2076 :
2077 : (defun find-new-buffer-file-coding-system (coding)
2078 : "Return a coding system for a buffer when a file of CODING is inserted.
2079 : The local variable `buffer-file-coding-system' of the current buffer
2080 : is set to the returned value.
2081 : Return nil if there's no need to set `buffer-file-coding-system'."
2082 1185 : (let (local-coding local-eol
2083 : found-coding found-eol
2084 : new-coding new-eol)
2085 1185 : (if (null coding)
2086 : ;; Nothing found about coding.
2087 : nil
2088 :
2089 : ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
2090 : ;; and LOCAL-CODING.
2091 1185 : (setq local-eol (coding-system-eol-type buffer-file-coding-system))
2092 1185 : (if (null (numberp local-eol))
2093 : ;; But eol-type is not yet set.
2094 1185 : (setq local-eol nil))
2095 1185 : (if (and buffer-file-coding-system
2096 130 : (not (eq (coding-system-type buffer-file-coding-system)
2097 1185 : 'undecided)))
2098 1185 : (setq local-coding (coding-system-base buffer-file-coding-system)))
2099 :
2100 1185 : (if (and (local-variable-p 'buffer-file-coding-system)
2101 1185 : local-eol local-coding)
2102 : ;; The current buffer has already set full coding-system, we
2103 : ;; had better not change it.
2104 : nil
2105 :
2106 1055 : (setq found-eol (coding-system-eol-type coding))
2107 1055 : (if (null (numberp found-eol))
2108 : ;; But eol-type is not found.
2109 : ;; If EOL conversions are inhibited, force unix eol-type.
2110 1055 : (setq found-eol (if inhibit-eol-conversion 0)))
2111 1055 : (setq found-coding (coding-system-base coding))
2112 :
2113 1055 : (if (and (not found-eol) (eq found-coding 'undecided))
2114 : ;; No valid coding information found.
2115 : nil
2116 :
2117 : ;; Some coding information (eol or text) found.
2118 :
2119 : ;; The local setting takes precedence over the found one.
2120 909 : (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
2121 0 : (or local-coding found-coding)
2122 909 : (or found-coding local-coding)))
2123 909 : (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
2124 0 : (or local-eol found-eol)
2125 909 : (or found-eol local-eol)))
2126 :
2127 909 : (let ((eol-type (coding-system-eol-type new-coding)))
2128 909 : (if (and (numberp new-eol) (vectorp eol-type))
2129 196 : (aref eol-type new-eol)
2130 1185 : new-coding)))))))
2131 :
2132 : (defun modify-coding-system-alist (target-type regexp coding-system)
2133 : "Modify one of look up tables for finding a coding system on I/O operation.
2134 : There are three of such tables, `file-coding-system-alist',
2135 : `process-coding-system-alist', and `network-coding-system-alist'.
2136 :
2137 : TARGET-TYPE specifies which of them to modify.
2138 : If it is `file', it affects `file-coding-system-alist' (which see).
2139 : If it is `process', it affects `process-coding-system-alist' (which see).
2140 : If it is `network', it affects `network-coding-system-alist' (which see).
2141 :
2142 : REGEXP is a regular expression matching a target of I/O operation.
2143 : The target is a file name if TARGET-TYPE is `file', a program name if
2144 : TARGET-TYPE is `process', or a network service name or a port number
2145 : to connect to if TARGET-TYPE is `network'.
2146 :
2147 : CODING-SYSTEM is a coding system to perform code conversion on the I/O
2148 : operation, or a cons cell (DECODING . ENCODING) specifying the coding
2149 : systems for decoding and encoding respectively, or a function symbol
2150 : which, when called, returns such a cons cell."
2151 0 : (or (memq target-type '(file process network))
2152 0 : (error "Invalid target type: %s" target-type))
2153 0 : (or (stringp regexp)
2154 0 : (and (eq target-type 'network) (integerp regexp))
2155 0 : (error "Invalid regular expression: %s" regexp))
2156 0 : (if (symbolp coding-system)
2157 0 : (if (not (fboundp coding-system))
2158 0 : (progn
2159 0 : (check-coding-system coding-system)
2160 0 : (setq coding-system (cons coding-system coding-system))))
2161 0 : (check-coding-system (car coding-system))
2162 0 : (check-coding-system (cdr coding-system)))
2163 0 : (cond ((eq target-type 'file)
2164 0 : (let ((slot (assoc regexp file-coding-system-alist)))
2165 0 : (if slot
2166 0 : (setcdr slot coding-system)
2167 0 : (setq file-coding-system-alist
2168 0 : (cons (cons regexp coding-system)
2169 0 : file-coding-system-alist)))))
2170 0 : ((eq target-type 'process)
2171 0 : (let ((slot (assoc regexp process-coding-system-alist)))
2172 0 : (if slot
2173 0 : (setcdr slot coding-system)
2174 0 : (setq process-coding-system-alist
2175 0 : (cons (cons regexp coding-system)
2176 0 : process-coding-system-alist)))))
2177 : (t
2178 0 : (let ((slot (assoc regexp network-coding-system-alist)))
2179 0 : (if slot
2180 0 : (setcdr slot coding-system)
2181 0 : (setq network-coding-system-alist
2182 0 : (cons (cons regexp coding-system)
2183 0 : network-coding-system-alist)))))))
2184 :
2185 : (defun decode-coding-inserted-region (from to filename
2186 : &optional visit beg end replace)
2187 : "Decode the region between FROM and TO as if it is read from file FILENAME.
2188 : The idea is that the text between FROM and TO was just inserted somehow.
2189 : Optional arguments VISIT, BEG, END, and REPLACE are the same as those
2190 : of the function `insert-file-contents'.
2191 : Part of the job of this function is setting `buffer-undo-list' appropriately."
2192 0 : (save-excursion
2193 0 : (save-restriction
2194 0 : (let ((coding coding-system-for-read)
2195 : undo-list-saved)
2196 0 : (if visit
2197 : ;; Temporarily turn off undo recording, if we're decoding the
2198 : ;; text of a visited file.
2199 0 : (setq buffer-undo-list t)
2200 : ;; Otherwise, if we can recognize the undo elt for the insertion,
2201 : ;; remove it and get ready to replace it later.
2202 : ;; In the mean time, turn off undo recording.
2203 0 : (let ((last (car-safe buffer-undo-list)))
2204 0 : (if (and (consp last) (eql (car last) from) (eql (cdr last) to))
2205 0 : (setq undo-list-saved (cdr buffer-undo-list)
2206 0 : buffer-undo-list t))))
2207 0 : (narrow-to-region from to)
2208 0 : (goto-char (point-min))
2209 0 : (or coding
2210 0 : (setq coding (funcall set-auto-coding-function
2211 0 : filename (- (point-max) (point-min)))))
2212 0 : (or coding
2213 0 : (setq coding (car (find-operation-coding-system
2214 : 'insert-file-contents
2215 0 : (cons filename (current-buffer))
2216 0 : visit beg end replace))))
2217 0 : (if (coding-system-p coding)
2218 0 : (or enable-multibyte-characters
2219 0 : (setq coding
2220 0 : (coding-system-change-text-conversion coding 'raw-text)))
2221 0 : (setq coding nil))
2222 0 : (if coding
2223 0 : (decode-coding-region (point-min) (point-max) coding)
2224 0 : (setq last-coding-system-used coding))
2225 : ;; If we're decoding the text of a visited file,
2226 : ;; the undo list should start out empty.
2227 0 : (if visit
2228 0 : (setq buffer-undo-list nil)
2229 : ;; If we decided to replace the undo entry for the insertion,
2230 : ;; do so now.
2231 0 : (if undo-list-saved
2232 0 : (setq buffer-undo-list
2233 0 : (cons (cons from (point-max)) undo-list-saved))))))))
2234 :
2235 : (defun recode-region (start end new-coding coding)
2236 : "Re-decode the region (previously decoded by CODING) by NEW-CODING."
2237 : (interactive
2238 0 : (list (region-beginning) (region-end)
2239 0 : (read-coding-system "Text was really in: ")
2240 0 : (let ((coding (or buffer-file-coding-system last-coding-system-used)))
2241 0 : (read-coding-system
2242 0 : (concat "But was interpreted as"
2243 0 : (if coding (format " (default %S): " coding) ": "))
2244 0 : coding))))
2245 0 : (or (and new-coding coding)
2246 0 : (error "Coding system not specified"))
2247 : ;; Check it before we encode the region.
2248 0 : (check-coding-system new-coding)
2249 0 : (save-restriction
2250 0 : (narrow-to-region start end)
2251 0 : (encode-coding-region (point-min) (point-max) coding)
2252 0 : (decode-coding-region (point-min) (point-max) new-coding))
2253 0 : (if (region-active-p)
2254 0 : (deactivate-mark)))
2255 :
2256 : (defun make-translation-table (&rest args)
2257 : "Make a translation table from arguments.
2258 : A translation table is a char table intended for character
2259 : translation in CCL programs.
2260 :
2261 : Each argument is a list of elements of the form (FROM . TO), where FROM
2262 : is a character to be translated to TO.
2263 :
2264 : The arguments and forms in each argument are processed in the given
2265 : order, and if a previous form already translates TO to some other
2266 : character, say TO-ALT, FROM is also translated to TO-ALT."
2267 8 : (let ((table (make-char-table 'translation-table))
2268 : revlist)
2269 8 : (dolist (elts args)
2270 8 : (dolist (elt elts)
2271 5070 : (let ((from (car elt))
2272 5070 : (to (cdr elt))
2273 : to-alt rev-from rev-to)
2274 : ;; If we have already translated TO to TO-ALT, FROM should
2275 : ;; also be translated to TO-ALT.
2276 5070 : (if (setq to-alt (aref table to))
2277 5070 : (setq to to-alt))
2278 5070 : (aset table from to)
2279 : ;; If we have already translated some chars to FROM, they
2280 : ;; should also be translated to TO.
2281 5070 : (when (setq rev-from (assq from revlist))
2282 0 : (dolist (elt (cdr rev-from))
2283 0 : (aset table elt to))
2284 0 : (setq revlist (delq rev-from revlist)
2285 5070 : rev-from (cdr rev-from)))
2286 : ;; Now update REVLIST.
2287 5070 : (setq rev-to (assq to revlist))
2288 5070 : (if rev-to
2289 13 : (setcdr rev-to (cons from (cdr rev-to)))
2290 5057 : (setq rev-to (list to from)
2291 5070 : revlist (cons rev-to revlist)))
2292 5070 : (if rev-from
2293 5070 : (setcdr rev-to (append rev-from (cdr rev-to)))))))
2294 : ;; Return TABLE just created.
2295 8 : (set-char-table-extra-slot table 1 1)
2296 8 : table))
2297 :
2298 : (defun make-translation-table-from-vector (vec)
2299 : "Make translation table from decoding vector VEC.
2300 : VEC is an array of 256 elements to map unibyte codes to multibyte
2301 : characters. Elements may be nil for undefined code points."
2302 0 : (let ((table (make-char-table 'translation-table))
2303 0 : (rev-table (make-char-table 'translation-table))
2304 : ch)
2305 0 : (dotimes (i 256)
2306 0 : (setq ch (aref vec i))
2307 0 : (when ch
2308 0 : (aset table i ch)
2309 0 : (if (>= ch 256)
2310 0 : (aset rev-table ch i))))
2311 0 : (set-char-table-extra-slot table 0 rev-table)
2312 0 : (set-char-table-extra-slot table 1 1)
2313 0 : (set-char-table-extra-slot rev-table 1 1)
2314 0 : table))
2315 :
2316 : (defun make-translation-table-from-alist (alist)
2317 : "Make translation table from N<->M mapping in ALIST.
2318 : ALIST is an alist, each element has the form (FROM . TO).
2319 : FROM and TO are a character or a vector of characters.
2320 : If FROM is a character, that character is translated to TO.
2321 : If FROM is a vector of characters, that sequence is translated to TO.
2322 : The first extra-slot of the value is a translation table for reverse mapping.
2323 :
2324 : FROM and TO may be nil. If TO is nil, the translation from FROM
2325 : to nothing is defined in the translation table and that element
2326 : is ignored in the reverse map. If FROM is nil, the translation
2327 : from TO to nothing is defined in the reverse map only. A vector
2328 : of length zero has the same meaning as specifying nil."
2329 7 : (let ((tables (vector (make-char-table 'translation-table)
2330 7 : (make-char-table 'translation-table)))
2331 : table max-lookup from to idx val)
2332 7 : (dotimes (i 2)
2333 14 : (setq table (aref tables i))
2334 14 : (setq max-lookup 1)
2335 14 : (dolist (elt alist)
2336 168594 : (if (= i 0)
2337 84297 : (setq from (car elt) to (cdr elt))
2338 168594 : (setq from (cdr elt) to (car elt)))
2339 168594 : (if (characterp from)
2340 84297 : (setq idx from)
2341 84297 : (if (= (length from) 0)
2342 0 : (setq idx nil)
2343 84297 : (setq idx (aref from 0)
2344 168594 : max-lookup (max max-lookup (length from)))))
2345 168594 : (when idx
2346 168594 : (setq val (aref table idx))
2347 168594 : (if val
2348 77528 : (progn
2349 77528 : (or (consp val)
2350 77528 : (setq val (list (cons (vector idx) val))))
2351 77528 : (if (characterp from)
2352 77528 : (setq from (vector from)))
2353 77528 : (setq val (nconc val (list (cons from to)))))
2354 91066 : (if (characterp from)
2355 84297 : (setq val to)
2356 168594 : (setq val (list (cons from to)))))
2357 168594 : (aset table idx val)))
2358 14 : (set-char-table-extra-slot table 1 max-lookup))
2359 7 : (set-char-table-extra-slot (aref tables 0) 0 (aref tables 1))
2360 7 : (aref tables 0)))
2361 :
2362 : (defun define-translation-table (symbol &rest args)
2363 : "Define SYMBOL as the name of translation table made by ARGS.
2364 : This sets up information so that the table can be used for
2365 : translations in a CCL program.
2366 :
2367 : If the first element of ARGS is a char-table whose purpose is
2368 : `translation-table', just define SYMBOL to name it. (Note that this
2369 : function does not bind SYMBOL.)
2370 :
2371 : Any other ARGS should be suitable as arguments of the function
2372 : `make-translation-table' (which see).
2373 :
2374 : This function sets properties `translation-table' and
2375 : `translation-table-id' of SYMBOL to the created table itself and the
2376 : identification number of the table respectively. It also registers
2377 : the table in `translation-table-vector'."
2378 16 : (let ((table (if (and (char-table-p (car args))
2379 8 : (eq (char-table-subtype (car args))
2380 16 : 'translation-table))
2381 8 : (car args)
2382 16 : (apply 'make-translation-table args)))
2383 16 : (len (length translation-table-vector))
2384 : (id 0)
2385 : (done nil))
2386 16 : (put symbol 'translation-table table)
2387 143 : (while (not done)
2388 127 : (if (>= id len)
2389 0 : (setq translation-table-vector
2390 127 : (vconcat translation-table-vector (make-vector len nil))))
2391 127 : (let ((slot (aref translation-table-vector id)))
2392 127 : (if (or (not slot)
2393 127 : (eq (car slot) symbol))
2394 16 : (progn
2395 16 : (aset translation-table-vector id (cons symbol table))
2396 16 : (setq done t))
2397 127 : (setq id (1+ id)))))
2398 16 : (put symbol 'translation-table-id id)
2399 16 : id))
2400 :
2401 : (defun translate-region (start end table)
2402 : "From START to END, translate characters according to TABLE.
2403 : TABLE is a string or a char-table.
2404 : If TABLE is a string, the Nth character in it is the mapping
2405 : for the character with code N.
2406 : If TABLE is a char-table, the element for character N is the mapping
2407 : for the character with code N.
2408 : It returns the number of characters changed."
2409 : (interactive
2410 0 : (list (region-beginning)
2411 0 : (region-end)
2412 0 : (let (table l)
2413 0 : (dotimes (i (length translation-table-vector))
2414 0 : (if (consp (aref translation-table-vector i))
2415 0 : (push (list (symbol-name
2416 0 : (car (aref translation-table-vector i)))) l)))
2417 0 : (if (not l)
2418 0 : (error "No translation table defined"))
2419 0 : (while (not table)
2420 0 : (setq table (completing-read "Translation table: " l nil t)))
2421 0 : (intern table))))
2422 157260 : (if (symbolp table)
2423 157260 : (let ((val (get table 'translation-table)))
2424 157260 : (or (char-table-p val)
2425 157260 : (error "Invalid translation table name: %s" table))
2426 157260 : (setq table val)))
2427 157260 : (translate-region-internal start end table))
2428 :
2429 : (defmacro with-category-table (table &rest body)
2430 : "Execute BODY like `progn' with TABLE the current category table.
2431 : The category table of the current buffer is saved, BODY is evaluated,
2432 : then the saved table is restored, even in case of an abnormal exit.
2433 : Value is what BODY returns."
2434 : (declare (indent 1) (debug t))
2435 0 : (let ((old-table (make-symbol "old-table"))
2436 0 : (old-buffer (make-symbol "old-buffer")))
2437 0 : `(let ((,old-table (category-table))
2438 0 : (,old-buffer (current-buffer)))
2439 : (unwind-protect
2440 : (progn
2441 0 : (set-category-table ,table)
2442 0 : ,@body)
2443 0 : (with-current-buffer ,old-buffer
2444 0 : (set-category-table ,old-table))))))
2445 :
2446 : (defun define-translation-hash-table (symbol table)
2447 : "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
2448 :
2449 : Analogous to `define-translation-table', but updates
2450 : `translation-hash-table-vector' and the table is for use in the CCL
2451 : `lookup-integer' and `lookup-character' functions."
2452 0 : (unless (and (symbolp symbol)
2453 0 : (hash-table-p table))
2454 0 : (error "Bad args to define-translation-hash-table"))
2455 0 : (let ((len (length translation-hash-table-vector))
2456 : (id 0)
2457 : done)
2458 0 : (put symbol 'translation-hash-table table)
2459 0 : (while (not done)
2460 0 : (if (>= id len)
2461 0 : (setq translation-hash-table-vector
2462 0 : (vconcat translation-hash-table-vector [nil])))
2463 0 : (let ((slot (aref translation-hash-table-vector id)))
2464 0 : (if (or (not slot)
2465 0 : (eq (car slot) symbol))
2466 0 : (progn
2467 0 : (aset translation-hash-table-vector id (cons symbol table))
2468 0 : (setq done t))
2469 0 : (setq id (1+ id)))))
2470 0 : (put symbol 'translation-hash-table-id id)
2471 0 : id))
2472 :
2473 : ;;; Initialize some variables.
2474 :
2475 : (put 'use-default-ascent 'char-table-extra-slots 0)
2476 : (setq use-default-ascent (make-char-table 'use-default-ascent))
2477 : (put 'ignore-relative-composition 'char-table-extra-slots 0)
2478 : (setq ignore-relative-composition
2479 : (make-char-table 'ignore-relative-composition))
2480 :
2481 : ;;; Built-in auto-coding-functions:
2482 :
2483 : (defun sgml-xml-auto-coding-function (size)
2484 : "Determine whether the buffer is XML, and if so, its encoding.
2485 : This function is intended to be added to `auto-coding-functions'."
2486 276 : (setq size (+ (point) size))
2487 276 : (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
2488 0 : (let ((end (save-excursion
2489 : ;; This is a hack.
2490 0 : (re-search-forward "[\"']\\s-*\\?>" size t))))
2491 0 : (when end
2492 0 : (if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t)
2493 0 : (let* ((match (match-string 1))
2494 0 : (sym (intern (downcase match))))
2495 0 : (if (coding-system-p sym)
2496 0 : sym
2497 0 : (message "Warning: unknown coding system \"%s\"" match)
2498 0 : nil))
2499 : ;; Files without an encoding tag should be UTF-8. But users
2500 : ;; may be naive about encodings, and have saved the file from
2501 : ;; another editor that does not help them get the encoding right.
2502 : ;; Detect the encoding and warn the user if it is detected as
2503 : ;; something other than UTF-8.
2504 0 : (let ((detected
2505 0 : (with-coding-priority '(utf-8)
2506 0 : (coding-system-base
2507 0 : (detect-coding-region (point-min) size t)))))
2508 : ;; Pure ASCII always comes back as undecided.
2509 0 : (if (memq detected '(utf-8 undecided))
2510 : 'utf-8
2511 0 : (warn "File contents detected as %s.
2512 : Consider adding an encoding attribute to the xml declaration,
2513 0 : or saving as utf-8, as mandated by the xml specification." detected)
2514 276 : detected)))))))
2515 :
2516 : (defun sgml-html-meta-auto-coding-function (size)
2517 : "If the buffer has an HTML meta tag, use it to determine encoding.
2518 : This function is intended to be added to `auto-coding-functions'."
2519 276 : (let ((case-fold-search t))
2520 276 : (setq size (min (+ (point) size)
2521 276 : (save-excursion
2522 : ;; Limit the search by the end of the HTML header.
2523 276 : (or (search-forward "</head>" (+ (point) size) t)
2524 : ;; In case of no header, search only 10 lines.
2525 276 : (forward-line 10))
2526 276 : (point))))
2527 : ;; Make sure that the buffer really contains an HTML document, by
2528 : ;; checking that it starts with a doctype or a <HTML> start tag
2529 : ;; (allowing for whitespace at bob). Note: 'DOCTYPE NETSCAPE' is
2530 : ;; useful for Mozilla bookmark files.
2531 276 : (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
2532 276 : (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t))
2533 0 : (let* ((match (match-string 2))
2534 0 : (sym (intern (downcase match))))
2535 0 : (if (coding-system-p sym)
2536 0 : sym
2537 0 : (message "Warning: unknown coding system \"%s\"" match)
2538 276 : nil)))))
2539 :
2540 : (defun xml-find-file-coding-system (args)
2541 : "Determine the coding system of an XML file without a declaration.
2542 : Strictly speaking, the file should be utf-8, but mistakes are
2543 : made, and there are genuine cases where XML fragments are saved,
2544 : with the encoding properly specified in a master document, or
2545 : added by processing software."
2546 0 : (if (eq (car args) 'insert-file-contents)
2547 0 : (let ((detected
2548 0 : (with-coding-priority '(utf-8)
2549 0 : (coding-system-base
2550 0 : (detect-coding-region (point-min) (point-max) t)))))
2551 : ;; Pure ASCII always comes back as undecided.
2552 0 : (cond
2553 0 : ((memq detected '(utf-8 undecided))
2554 : 'utf-8)
2555 0 : ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature)
2556 0 : ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature)
2557 : (t
2558 0 : (warn "File contents detected as %s.
2559 : Consider adding an xml declaration with the encoding specified,
2560 0 : or saving as utf-8, as mandated by the xml specification." detected)
2561 0 : detected)))
2562 : ;; Don't interfere with the user's wishes for saving the buffer.
2563 : ;; We did what we could when the buffer was created to ensure the
2564 : ;; correct encoding was used, or the user was warned, so any
2565 : ;; non-conformity here is deliberate on the part of the user.
2566 0 : 'undecided))
2567 :
2568 : ;;;
2569 : (provide 'mule)
2570 :
2571 : ;;; mule.el ends here
|