Line data Source code
1 : ;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*-
2 :
3 : ;; Copyright (C) 2001-2017 Free Software Foundation, Inc.
4 : ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 : ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 : ;; Registration Number H14PRO021
8 :
9 : ;; Copyright (C) 2003
10 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
11 : ;; Registration Number H13PRO009
12 :
13 : ;; Keywords: multilingual, Hebrew
14 :
15 : ;; This file is part of GNU Emacs.
16 :
17 : ;; GNU Emacs is free software: you can redistribute it and/or modify
18 : ;; it under the terms of the GNU General Public License as published by
19 : ;; the Free Software Foundation, either version 3 of the License, or
20 : ;; (at your option) any later version.
21 :
22 : ;; GNU Emacs is distributed in the hope that it will be useful,
23 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
24 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 : ;; GNU General Public License for more details.
26 :
27 : ;; You should have received a copy of the GNU General Public License
28 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 :
30 : ;;; Commentary:
31 :
32 : ;; For Hebrew, the character set ISO8859-8 is supported.
33 : ;; See http://www.ecma.ch/ecma1/STAND/ECMA-121.HTM.
34 : ;; Windows-1255 is also supported.
35 :
36 : ;;; Code:
37 :
38 : (define-coding-system 'hebrew-iso-8bit
39 : "ISO 2022 based 8-bit encoding for Hebrew (MIME:ISO-8859-8)."
40 : :coding-type 'charset
41 : :mnemonic ?8
42 : :charset-list '(iso-8859-8)
43 : :mime-charset 'iso-8859-8)
44 :
45 : (define-coding-system-alias 'iso-8859-8 'hebrew-iso-8bit)
46 :
47 : ;; These are for Explicit and Implicit directionality information, as
48 : ;; defined in RFC 1556.
49 : (define-coding-system-alias 'iso-8859-8-e 'hebrew-iso-8bit)
50 : (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit)
51 :
52 : (set-language-info-alist
53 : "Hebrew" '((tutorial . "TUTORIAL.he")
54 : (charset iso-8859-8)
55 : (coding-priority hebrew-iso-8bit)
56 : (coding-system hebrew-iso-8bit windows-1255 cp862)
57 : (nonascii-translation . iso-8859-8)
58 : (input-method . "hebrew")
59 : (unibyte-display . hebrew-iso-8bit)
60 : (sample-text . "Hebrew שלום")
61 : (documentation . "Bidirectional editing is supported.")))
62 :
63 : (set-language-info-alist
64 : "Windows-1255" '((coding-priority windows-1255)
65 : (coding-system windows-1255)
66 : (documentation . "\
67 : Support for Windows-1255 encoding, e.g. for Yiddish.
68 : Bidirectional editing is supported.")))
69 :
70 : (define-coding-system 'windows-1255
71 : "windows-1255 (Hebrew) encoding (MIME: WINDOWS-1255)"
72 : :coding-type 'charset
73 : :mnemonic ?h
74 : :charset-list '(windows-1255)
75 : :mime-charset 'windows-1255)
76 : (define-coding-system-alias 'cp1255 'windows-1255)
77 :
78 : (define-coding-system 'cp862
79 : "DOS codepage 862 (Hebrew)"
80 : :coding-type 'charset
81 : :mnemonic ?D
82 : :charset-list '(cp862)
83 : :mime-charset 'cp862)
84 : (define-coding-system-alias 'ibm862 'cp862)
85 :
86 : ;; Return a nested alist of Hebrew character sequences vs the
87 : ;; corresponding glyph of FONT-OBJECT.
88 : (defun hebrew-font-get-precomposed (font-object)
89 0 : (let ((precomposed (font-get font-object 'hebrew-precomposed))
90 : ;; Vector of Hebrew precomposed characters.
91 : (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31
92 : #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A
93 : #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46
94 : #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E])
95 : ;; Vector of decomposition character sequences corresponding
96 : ;; to the above vector.
97 : (decomposed
98 : [[#x05E9 #x05C1]
99 : [#x05E9 #x05C2]
100 : [#x05E9 #x05BC #x05C1]
101 : [#x05E9 #x05BC #x05C2]
102 : [#x05D0 #x05B7]
103 : [#x05D0 #x05B8]
104 : [#x05D0 #x05BC]
105 : [#x05D1 #x05BC]
106 : [#x05D2 #x05BC]
107 : [#x05D3 #x05BC]
108 : [#x05D4 #x05BC]
109 : [#x05D5 #x05BC]
110 : [#x05D6 #x05BC]
111 : [#x05D8 #x05BC]
112 : [#x05D9 #x05BC]
113 : [#x05DA #x05BC]
114 : [#x05DB #x05BC]
115 : [#x05DC #x05BC]
116 : [#x05DE #x05BC]
117 : [#x05E0 #x05BC]
118 : [#x05E1 #x05BC]
119 : [#x05E3 #x05BC]
120 : [#x05E4 #x05BC]
121 : [#x05E6 #x05BC]
122 : [#x05E7 #x05BC]
123 : [#x05E8 #x05BC]
124 : [#x05E9 #x05BC]
125 : [#x05EA #x05BC]
126 : [#x05D5 #x05B9]
127 : [#x05D1 #x05BF]
128 : [#x05DB #x05BF]
129 : [#x05E4 #x05BF]]))
130 0 : (unless precomposed
131 0 : (setq precomposed (list t))
132 0 : (let ((gvec (font-get-glyphs font-object 0 (length chars) chars)))
133 0 : (dotimes (i (length chars))
134 0 : (if (aref gvec i)
135 0 : (set-nested-alist (aref decomposed i) (aref gvec i)
136 0 : precomposed))))
137 : ;; Cache the result in FONT-OBJECT's property.
138 0 : (font-put font-object 'hebrew-precomposed precomposed))
139 0 : precomposed))
140 :
141 : ;; Composition function for hebrew. GSTRING is made of a Hebrew base
142 : ;; character followed by Hebrew diacritical marks, or is made of
143 : ;; single Hebrew diacritical mark. Adjust GSTRING to display that
144 : ;; sequence properly. The basic strategy is:
145 : ;;
146 : ;; (1) If there's single diacritical, add padding space to the left
147 : ;; and right of the glyph.
148 : ;;
149 : ;; (2) If the font has OpenType features for Hebrew, ask the OTF
150 : ;; driver the whole work.
151 : ;;
152 : ;; (3) If the font has precomposed glyphs, use them as far as
153 : ;; possible. Adjust the remaining glyphs artificially.
154 :
155 : (defun hebrew-shape-gstring (gstring)
156 0 : (let* ((font (lgstring-font gstring))
157 0 : (otf (font-get font :otf))
158 0 : (nchars (lgstring-char-len gstring))
159 : header nglyphs base-width glyph precomposed val idx)
160 0 : (cond
161 0 : ((= nchars 1)
162 : ;; Independent diacritical mark. Add padding space to left or
163 : ;; right so that the glyph doesn't overlap with the surrounding
164 : ;; chars.
165 0 : (setq glyph (lgstring-glyph gstring 0))
166 0 : (let ((width (lglyph-width glyph))
167 : bearing)
168 0 : (if (< (setq bearing (lglyph-lbearing glyph)) 0)
169 0 : (lglyph-set-adjustment glyph bearing 0 (- width bearing)))
170 0 : (if (> (setq bearing (lglyph-rbearing glyph)) width)
171 0 : (lglyph-set-adjustment glyph 0 0 bearing))))
172 :
173 0 : ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf)))
174 : ;; FONT has OpenType features for Hebrew.
175 0 : (font-shape-gstring gstring))
176 :
177 : (t
178 : ;; FONT doesn't have OpenType features for Hebrew.
179 : ;; Try a precomposed glyph.
180 : ;; Now GSTRING is in this form:
181 : ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...]
182 0 : (setq precomposed (hebrew-font-get-precomposed font)
183 0 : header (lgstring-header gstring)
184 0 : val (lookup-nested-alist header precomposed nil 1))
185 0 : (if (and (consp val) (vectorp (car val)))
186 : ;; All characters can be displayed by a single precomposed glyph.
187 : ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...]
188 0 : (let ((glyph (copy-sequence (car val))))
189 0 : (lglyph-set-from-to glyph 0 (1- nchars))
190 0 : (lgstring-set-glyph gstring 0 glyph)
191 0 : (lgstring-set-glyph gstring 1 nil))
192 0 : (if (and (integerp val) (> val 2)
193 0 : (setq glyph (lookup-nested-alist header precomposed val 1))
194 0 : (consp glyph) (vectorp (car glyph)))
195 : ;; The first (1- VAL) characters can be displayed by a
196 : ;; precomposed glyph. Provided that VAL is 3, the first
197 : ;; two glyphs should be replaced by the precomposed glyph.
198 : ;; In that case, reform GSTRING to:
199 : ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...]
200 0 : (let* ((ncmp (1- val)) ; number of composed glyphs
201 0 : (diff (1- ncmp))) ; number of reduced glyphs
202 0 : (setq glyph (copy-sequence (car glyph)))
203 0 : (lglyph-set-from-to glyph 0 (1- nchars))
204 0 : (lgstring-set-glyph gstring 0 glyph)
205 0 : (setq idx ncmp)
206 0 : (while (< idx nchars)
207 0 : (setq glyph (lgstring-glyph gstring idx))
208 0 : (lglyph-set-from-to glyph 0 (1- nchars))
209 0 : (lgstring-set-glyph gstring (- idx diff) glyph)
210 0 : (setq idx (1+ idx)))
211 0 : (lgstring-set-glyph gstring (- idx diff) nil)
212 0 : (setq idx (- ncmp diff)
213 0 : nglyphs (- nchars diff)))
214 0 : (setq glyph (lgstring-glyph gstring 0))
215 0 : (lglyph-set-from-to glyph 0 (1- nchars))
216 0 : (setq idx 1 nglyphs nchars))
217 : ;; Now IDX is an index to the first non-precomposed glyph.
218 : ;; Adjust positions of the remaining glyphs artificially.
219 0 : (if (font-get font :combining-capability)
220 0 : (font-shape-gstring gstring)
221 0 : (setq base-width (lglyph-width (lgstring-glyph gstring 0)))
222 0 : (while (< idx nglyphs)
223 0 : (setq glyph (lgstring-glyph gstring idx))
224 0 : (lglyph-set-from-to glyph 0 (1- nchars))
225 0 : (if (>= (lglyph-lbearing glyph) (lglyph-width glyph))
226 : ;; It seems that this glyph is designed to be rendered
227 : ;; before the base glyph.
228 0 : (lglyph-set-adjustment glyph (- base-width) 0 0)
229 0 : (if (>= (lglyph-lbearing glyph) 0)
230 : ;; Align the horizontal center of this glyph to the
231 : ;; horizontal center of the base glyph.
232 0 : (let ((width (- (lglyph-rbearing glyph)
233 0 : (lglyph-lbearing glyph))))
234 0 : (lglyph-set-adjustment glyph
235 0 : (- (/ (- base-width width) 2)
236 0 : (lglyph-lbearing glyph)
237 0 : base-width) 0 0))))
238 0 : (setq idx (1+ idx)))))))
239 0 : gstring))
240 :
241 : (let* ((base "[\u05D0-\u05F2]")
242 : (combining "[\u0591-\u05BD\u05BF\u05C1-\u05C2\u05C4-\u05C5\u05C7]+")
243 : (pattern1 (concat base combining))
244 : (pattern2 (concat base "\u200D" combining)))
245 : (set-char-table-range
246 : composition-function-table '(#x591 . #x5C7)
247 : (list (vector pattern2 3 'hebrew-shape-gstring)
248 : (vector pattern2 2 'hebrew-shape-gstring)
249 : (vector pattern1 1 'hebrew-shape-gstring)
250 : [nil 0 hebrew-shape-gstring]))
251 : ;; Exclude non-combining characters.
252 : (set-char-table-range
253 : composition-function-table #x5BE nil)
254 : (set-char-table-range
255 : composition-function-table #x5C0 nil)
256 : (set-char-table-range
257 : composition-function-table #x5C3 nil)
258 : (set-char-table-range
259 : composition-function-table #x5C6 nil))
260 :
261 : (provide 'hebrew)
262 :
263 : ;;; hebrew.el ends here
|