LCOV - code coverage report
Current view: top level - lisp - xml.el (source / functions) Hit Total Coverage
Test: tramp-tests.info Lines: 19 467 4.1 %
Date: 2017-08-27 09:44:50 Functions: 4 23 17.4 %

          Line data    Source code
       1             : ;;; xml.el --- XML parser -*- lexical-binding: t -*-
       2             : 
       3             : ;; Copyright (C) 2000-2017 Free Software Foundation, Inc.
       4             : 
       5             : ;; Author: Emmanuel Briot  <briot@gnat.com>
       6             : ;; Maintainer: Mark A. Hershberger <mah@everybody.org>
       7             : ;; Keywords: xml, data
       8             : 
       9             : ;; This file is part of GNU Emacs.
      10             : 
      11             : ;; GNU Emacs is free software: you can redistribute it and/or modify
      12             : ;; it under the terms of the GNU General Public License as published by
      13             : ;; the Free Software Foundation, either version 3 of the License, or
      14             : ;; (at your option) any later version.
      15             : 
      16             : ;; GNU Emacs is distributed in the hope that it will be useful,
      17             : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
      18             : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      19             : ;; GNU General Public License for more details.
      20             : 
      21             : ;; You should have received a copy of the GNU General Public License
      22             : ;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
      23             : 
      24             : ;;; Commentary:
      25             : 
      26             : ;; This file contains a somewhat incomplete non-validating XML parser.  It
      27             : ;; parses a file, and returns a list that can be used internally by
      28             : ;; any other Lisp libraries.
      29             : 
      30             : ;;; FILE FORMAT
      31             : 
      32             : ;; The document type declaration may either be ignored or (optionally)
      33             : ;; parsed, but currently the parsing will only accept element
      34             : ;; declarations.  The XML file is assumed to be well-formed.  In case
      35             : ;; of error, the parsing stops and the XML file is shown where the
      36             : ;; parsing stopped.
      37             : ;;
      38             : ;; It also knows how to ignore comments and processing instructions.
      39             : ;;
      40             : ;; The XML file should have the following format:
      41             : ;;    <node1 attr1="name1" attr2="name2" ...>value
      42             : ;;       <node2 attr3="name3" attr4="name4">value2</node2>
      43             : ;;       <node3 attr5="name5" attr6="name6">value3</node3>
      44             : ;;    </node1>
      45             : ;; Of course, the name of the nodes and attributes can be anything.  There can
      46             : ;; be any number of attributes (or none), as well as any number of children
      47             : ;; below the nodes.
      48             : ;;
      49             : ;; There can be only top level node, but with any number of children below.
      50             : 
      51             : ;;; LIST FORMAT
      52             : 
      53             : ;; The functions `xml-parse-file', `xml-parse-region' and
      54             : ;; `xml-parse-tag' return a list with the following format:
      55             : ;;
      56             : ;;    xml-list   ::= (node node ...)
      57             : ;;    node       ::= (qname attribute-list . child_node_list)
      58             : ;;    child_node_list ::= child_node child_node ...
      59             : ;;    child_node ::= node | string
      60             : ;;    qname      ::= (:namespace-uri . "name") | "name"
      61             : ;;    attribute_list ::= ((qname . "value") (qname . "value") ...)
      62             : ;;                       | nil
      63             : ;;    string     ::= "..."
      64             : ;;
      65             : ;; Some macros are provided to ease the parsing of this list.
      66             : ;; Whitespace is preserved.  Fixme: There should be a tree-walker that
      67             : ;; can remove it.
      68             : 
      69             : ;; TODO:
      70             : ;;  * xml:base, xml:space support
      71             : ;;  * more complete DOCTYPE parsing
      72             : ;;  * pi support
      73             : 
      74             : ;;; Code:
      75             : 
      76             : ;; Note that buffer-substring and match-string were formerly used in
      77             : ;; several places, because the -no-properties variants remove
      78             : ;; composition info.  However, after some discussion on emacs-devel,
      79             : ;; the consensus was that the speed of the -no-properties variants was
      80             : ;; a worthwhile tradeoff especially since we're usually parsing files
      81             : ;; instead of hand-crafted XML.
      82             : 
      83             : ;;;  Macros to parse the list
      84             : 
      85             : (defconst xml-undefined-entity "?"
      86             :   "What to substitute for undefined entities")
      87             : 
      88             : (defconst xml-default-ns '(("" . "")
      89             :                            ("xml" . "http://www.w3.org/XML/1998/namespace")
      90             :                            ("xmlns" . "http://www.w3.org/2000/xmlns/"))
      91             :   "Alist mapping default XML namespaces to their URIs.")
      92             : 
      93             : (defvar xml-entity-alist
      94             :   '(("lt"   . "&#60;")
      95             :     ("gt"   . ">")
      96             :     ("apos" . "'")
      97             :     ("quot" . "\"")
      98             :     ("amp"  . "&#38;"))
      99             :   "Alist mapping XML entities to their replacement text.")
     100             : 
     101             : (defvar xml-entity-expansion-limit 20000
     102             :   "The maximum size of entity reference expansions.
     103             : If the size of the buffer increases by this many characters while
     104             : expanding entity references in a segment of character data, the
     105             : XML parser signals an error.  Setting this to nil removes the
     106             : limit (making the parser vulnerable to XML bombs).")
     107             : 
     108             : (defvar xml-parameter-entity-alist nil
     109             :   "Alist of defined XML parametric entities.")
     110             : 
     111             : (defvar xml-sub-parser nil
     112             :   "Non-nil when the XML parser is parsing an XML fragment.")
     113             : 
     114             : (defvar xml-validating-parser nil
     115             :   "Set to non-nil to get validity checking.")
     116             : 
     117             : (defsubst xml-node-name (node)
     118             :   "Return the tag associated with NODE.
     119             : Without namespace-aware parsing, the tag is a symbol.
     120             : 
     121             : With namespace-aware parsing, the tag is a cons of a string
     122             : representing the uri of the namespace with the local name of the
     123             : tag.  For example,
     124             : 
     125             :     <foo>
     126             : 
     127             : would be represented by
     128             : 
     129             :     (\"\" . \"foo\").
     130             : 
     131             : If you'd just like a plain symbol instead, use `symbol-qnames' in
     132             : the PARSE-NS argument."
     133             : 
     134           0 :   (car node))
     135             : 
     136             : (defsubst xml-node-attributes (node)
     137             :   "Return the list of attributes of NODE.
     138             : The list can be nil."
     139           0 :   (nth 1 node))
     140             : 
     141             : (defsubst xml-node-children (node)
     142             :   "Return the list of children of NODE.
     143             : This is a list of nodes, and it can be nil."
     144           0 :   (cddr node))
     145             : 
     146             : (defun xml-get-children (node child-name)
     147             :   "Return the children of NODE whose tag is CHILD-NAME.
     148             : CHILD-NAME should match the value returned by `xml-node-name'."
     149           0 :   (let ((match ()))
     150           0 :     (dolist (child (xml-node-children node))
     151           0 :       (if (and (listp child)
     152           0 :                (equal (xml-node-name child) child-name))
     153           0 :           (push child match)))
     154           0 :     (nreverse match)))
     155             : 
     156             : (defun xml-get-attribute-or-nil (node attribute)
     157             :   "Get from NODE the value of ATTRIBUTE.
     158             : Return nil if the attribute was not found.
     159             : 
     160             : See also `xml-get-attribute'."
     161           0 :   (cdr (assoc attribute (xml-node-attributes node))))
     162             : 
     163             : (defsubst xml-get-attribute (node attribute)
     164             :   "Get from NODE the value of ATTRIBUTE.
     165             : An empty string is returned if the attribute was not found.
     166             : 
     167             : See also `xml-get-attribute-or-nil'."
     168           0 :   (or (xml-get-attribute-or-nil node attribute) ""))
     169             : 
     170             : ;;; Regular expressions for XML components
     171             : 
     172             : ;; The following regexps are used as subexpressions in regexps that
     173             : ;; are `eval-when-compile'd for efficiency, so they must be defined at
     174             : ;; compile time.
     175             : (eval-and-compile
     176             : 
     177             : ;; [4] NameStartChar
     178             : ;; See the definition of word syntax in `xml-syntax-table'.
     179             : (defconst xml-name-start-char-re (concat "[[:word:]:_]"))
     180             : 
     181             : ;; [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7
     182             : ;;                 | [#x0300-#x036F] | [#x203F-#x2040]
     183             : (defconst xml-name-char-re (concat "[-0-9.[:word:]:_·̀-ͯ‿-⁀]"))
     184             : 
     185             : ;; [5] Name     ::= NameStartChar (NameChar)*
     186             : (defconst xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
     187             : 
     188             : ;; [6] Names    ::= Name (#x20 Name)*
     189             : (defconst xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
     190             : 
     191             : ;; [7] Nmtoken  ::= (NameChar)+
     192             : (defconst xml-nmtoken-re (concat xml-name-char-re "+"))
     193             : 
     194             : ;; [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
     195             : (defconst xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
     196             : 
     197             : ;; [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
     198             : (defconst xml-char-ref-re  "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
     199             : 
     200             : ;; [68] EntityRef   ::= '&' Name ';'
     201             : (defconst xml-entity-ref (concat "&" xml-name-re ";"))
     202             : 
     203             : (defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([0-9a-fA-F]+\\)\\|\\("
     204             :                                             xml-name-re "\\)\\);"))
     205             : 
     206             : ;; [69] PEReference ::= '%' Name ';'
     207             : (defconst xml-pe-reference-re (concat "%\\(" xml-name-re "\\);"))
     208             : 
     209             : ;; [67] Reference   ::= EntityRef | CharRef
     210             : (defconst xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
     211             : 
     212             : ;; [10] AttValue    ::= '"' ([^<&"] | Reference)* '"'
     213             : ;;                    | "'" ([^<&'] | Reference)* "'"
     214             : (defconst xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|"
     215             :                                    xml-reference-re "\\)*\"\\|"
     216             :                                    "'\\(?:[^&']\\|" xml-reference-re
     217             :                                    "\\)*'\\)"))
     218             : 
     219             : ;; [56] TokenizedType ::= 'ID'
     220             : ;;     [VC: ID] [VC: One ID / Element Type] [VC: ID Attribute Default]
     221             : ;;                      | 'IDREF'    [VC: IDREF]
     222             : ;;                      | 'IDREFS'   [VC: IDREF]
     223             : ;;                      | 'ENTITY'   [VC: Entity Name]
     224             : ;;                      | 'ENTITIES' [VC: Entity Name]
     225             : ;;                      | 'NMTOKEN'  [VC: Name Token]
     226             : ;;                      | 'NMTOKENS' [VC: Name Token]
     227             : (defconst xml-tokenized-type-re (concat "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|"
     228             :                                         "ENTITIES\\|NMTOKEN\\|NMTOKENS\\)"))
     229             : 
     230             : ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
     231             : (defconst xml-notation-type-re
     232             :   (concat "\\(?:NOTATION\\s-+(\\s-*" xml-name-re
     233             :           "\\(?:\\s-*|\\s-*" xml-name-re "\\)*\\s-*)\\)"))
     234             : 
     235             : ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
     236             : ;;       [VC: Enumeration] [VC: No Duplicate Tokens]
     237             : (defconst xml-enumeration-re (concat "\\(?:(\\s-*" xml-nmtoken-re
     238             :                                      "\\(?:\\s-*|\\s-*" xml-nmtoken-re
     239             :                                      "\\)*\\s-+)\\)"))
     240             : 
     241             : ;; [57] EnumeratedType ::= NotationType | Enumeration
     242             : (defconst xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re
     243             :                                          "\\|" xml-enumeration-re "\\)"))
     244             : 
     245             : ;; [54] AttType    ::= StringType | TokenizedType | EnumeratedType
     246             : ;; [55] StringType ::= 'CDATA'
     247             : (defconst xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re
     248             :                                   "\\|" xml-notation-type-re
     249             :                                   "\\|" xml-enumerated-type-re "\\)"))
     250             : 
     251             : ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
     252             : (defconst xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|"
     253             :                                       "\\(?:#FIXED\\s-+\\)*"
     254             :                                       xml-att-value-re "\\)"))
     255             : 
     256             : ;; [53] AttDef      ::= S Name S AttType S DefaultDecl
     257             : (defconst xml-att-def-re (concat "\\(?:\\s-*" xml-name-re
     258             :                                  "\\s-*" xml-att-type-re
     259             :                                  "\\s-*" xml-default-decl-re "\\)"))
     260             : 
     261             : ;; [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
     262             : ;;                   | "'" ([^%&'] | PEReference | Reference)* "'"
     263             : (defconst xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|"
     264             :                                       xml-pe-reference-re
     265             :                                       "\\|" xml-reference-re
     266             :                                       "\\)*\"\\|'\\(?:[^%&']\\|"
     267             :                                       xml-pe-reference-re "\\|"
     268             :                                       xml-reference-re "\\)*'\\)"))
     269             : ) ; End of `eval-when-compile'
     270             : 
     271             : 
     272             : ;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
     273             : ;;                   | 'PUBLIC' S PubidLiteral S SystemLiteral
     274             : ;; [76] NDataDecl ::=           S 'NDATA' S
     275             : ;; [73] EntityDef  ::= EntityValue| (ExternalID NDataDecl?)
     276             : ;; [71] GEDecl     ::= '<!ENTITY' S Name S EntityDef S? '>'
     277             : ;; [74] PEDef      ::= EntityValue | ExternalID
     278             : ;; [72] PEDecl     ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
     279             : ;; [70] EntityDecl ::= GEDecl | PEDecl
     280             : 
     281             : ;; Note that this is setup so that we can do whitespace-skipping with
     282             : ;; `(skip-syntax-forward " ")', inter alia.  Previously this was slow
     283             : ;; compared with `re-search-forward', but that has been fixed.
     284             : 
     285             : (defvar xml-syntax-table
     286             :   ;; By default, characters have symbol syntax.
     287             :   (let ((table (make-char-table 'syntax-table '(3))))
     288             :     ;; The XML space chars [3], and nothing else, have space syntax.
     289             :     (dolist (c '(?\s ?\t ?\r ?\n))
     290             :       (modify-syntax-entry c " " table))
     291             :     ;; The characters in NameStartChar [4], aside from ':' and '_',
     292             :     ;; have word syntax.  This is used by `xml-name-start-char-re'.
     293             :     (modify-syntax-entry '(?A . ?Z)         "w" table)
     294             :     (modify-syntax-entry '(?a . ?z)         "w" table)
     295             :     (modify-syntax-entry '(#xC0  . #xD6)    "w" table)
     296             :     (modify-syntax-entry '(#xD8  . #XF6)    "w" table)
     297             :     (modify-syntax-entry '(#xF8  . #X2FF)   "w" table)
     298             :     (modify-syntax-entry '(#x370 . #X37D)   "w" table)
     299             :     (modify-syntax-entry '(#x37F . #x1FFF)  "w" table)
     300             :     (modify-syntax-entry '(#x200C . #x200D) "w" table)
     301             :     (modify-syntax-entry '(#x2070 . #x218F) "w" table)
     302             :     (modify-syntax-entry '(#x2C00 . #x2FEF) "w" table)
     303             :     (modify-syntax-entry '(#x3001 . #xD7FF) "w" table)
     304             :     (modify-syntax-entry '(#xF900 . #xFDCF) "w" table)
     305             :     (modify-syntax-entry '(#xFDF0 . #xFFFD) "w" table)
     306             :     (modify-syntax-entry '(#x10000 . #xEFFFF) "w" table)
     307             :     table)
     308             :   "Syntax table used by the XML parser.
     309             : In this syntax table, the XML space characters [ \\t\\r\\n], and
     310             : only those characters, have whitespace syntax.")
     311             : 
     312             : ;;; Entry points:
     313             : 
     314             : ;;;###autoload
     315             : (defun xml-parse-file (file &optional parse-dtd parse-ns)
     316             :   "Parse the well-formed XML file FILE.
     317             : Return the top node with all its children.
     318             : If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
     319             : 
     320             : If PARSE-NS is non-nil, then QNAMES are expanded.  By default,
     321             : the variable `xml-default-ns' is the mapping from namespaces to
     322             : URIs, and expanded names will be returned as a cons
     323             : 
     324             :   (\"namespace:\" . \"foo\").
     325             : 
     326             : If PARSE-NS is an alist, it will be used as the mapping from
     327             : namespace to URIs instead.
     328             : 
     329             : If it is the symbol `symbol-qnames', expanded names will be
     330             : returned as a plain symbol `namespace:foo' instead of a cons.
     331             : 
     332             : Both features can be combined by providing a cons cell
     333             : 
     334             :   (symbol-qnames . ALIST)."
     335           0 :   (with-temp-buffer
     336           0 :     (insert-file-contents file)
     337           0 :     (xml--parse-buffer parse-dtd parse-ns)))
     338             : 
     339             : ;;;###autoload
     340             : (defun xml-parse-region (&optional beg end buffer parse-dtd parse-ns)
     341             :   "Parse the region from BEG to END in BUFFER.
     342             : Return the XML parse tree, or raise an error if the region does
     343             : not contain well-formed XML.
     344             : 
     345             : If BEG is nil, it defaults to `point-min'.
     346             : If END is nil, it defaults to `point-max'.
     347             : If BUFFER is nil, it defaults to the current buffer.
     348             : If PARSE-DTD is non-nil, parse the DTD and return it as the first
     349             : element of the list.
     350             : If PARSE-NS is non-nil, then QNAMES are expanded.  By default,
     351             : the variable `xml-default-ns' is the mapping from namespaces to
     352             : URIs, and expanded names will be returned as a cons
     353             : 
     354             :   (\"namespace:\" . \"foo\").
     355             : 
     356             : If PARSE-NS is an alist, it will be used as the mapping from
     357             : namespace to URIs instead.
     358             : 
     359             : If it is the symbol `symbol-qnames', expanded names will be
     360             : returned as a plain symbol `namespace:foo' instead of a cons.
     361             : 
     362             : Both features can be combined by providing a cons cell
     363             : 
     364             :   (symbol-qnames . ALIST)."
     365             :   ;; Use fixed syntax table to ensure regexp char classes and syntax
     366             :   ;; specs DTRT.
     367           0 :   (unless buffer
     368           0 :     (setq buffer (current-buffer)))
     369           0 :   (with-temp-buffer
     370           0 :     (insert-buffer-substring-no-properties buffer beg end)
     371           0 :     (xml--parse-buffer parse-dtd parse-ns)))
     372             : 
     373             : ;; XML [5]
     374             : 
     375             : ;; Fixme:  This needs re-writing to deal with the XML grammar properly, i.e.
     376             : ;;   document  ::=  prolog element Misc*
     377             : ;;   prolog    ::=  XMLDecl? Misc* (doctypedecl Misc*)?
     378             : 
     379             : (defun xml--parse-buffer (parse-dtd parse-ns)
     380           0 :   (with-syntax-table xml-syntax-table
     381           0 :     (let ((case-fold-search nil)        ; XML is case-sensitive.
     382             :           ;; Prevent entity definitions from changing the defaults
     383           0 :           (xml-entity-alist xml-entity-alist)
     384           0 :           (xml-parameter-entity-alist xml-parameter-entity-alist)
     385             :           xml result dtd)
     386           0 :       (goto-char (point-min))
     387           0 :       (while (not (eobp))
     388           0 :         (if (search-forward "<" nil t)
     389           0 :             (progn
     390           0 :               (forward-char -1)
     391           0 :               (setq result (xml-parse-tag-1 parse-dtd parse-ns))
     392           0 :               (cond
     393           0 :                ((null result)
     394             :                 ;; Not looking at an xml start tag.
     395           0 :                 (unless (eobp)
     396           0 :                   (forward-char 1)))
     397           0 :                ((and xml (not xml-sub-parser))
     398             :                 ;; Translation of rule [1] of XML specifications
     399           0 :                 (error "XML: (Not Well-Formed) Only one root tag allowed"))
     400           0 :                ((and (listp (car result))
     401           0 :                      parse-dtd)
     402           0 :                 (setq dtd (car result))
     403           0 :                 (if (cdr result)        ; possible leading comment
     404           0 :                     (push (cdr result) xml)))
     405             :                (t
     406           0 :                 (push result xml))))
     407           0 :           (goto-char (point-max))))
     408           0 :       (if parse-dtd
     409           0 :           (cons dtd (nreverse xml))
     410           0 :         (nreverse xml)))))
     411             : 
     412             : (defun xml-maybe-do-ns (name default xml-ns)
     413             :   "Perform any namespace expansion.
     414             : NAME is the name to perform the expansion on.
     415             : DEFAULT is the default namespace.  XML-NS is a cons of namespace
     416             : names to uris.  When namespace-aware parsing is off, then XML-NS
     417             : is nil.
     418             : 
     419             : During namespace-aware parsing, any name without a namespace is
     420             : put into the namespace identified by DEFAULT.  nil is used to
     421             : specify that the name shouldn't be given a namespace.
     422             : Expanded names will by default be returned as a cons.  If you
     423             : would like to get plain symbols instead, provide a cons cell
     424             : 
     425             :   (symbol-qnames . ALIST)
     426             : 
     427             : in the XML-NS argument."
     428           0 :   (if (consp xml-ns)
     429           0 :       (let* ((symbol-qnames (eq (car-safe xml-ns) 'symbol-qnames))
     430           0 :              (nsp (string-match ":" name))
     431           0 :              (lname (if nsp (substring name (match-end 0)) name))
     432           0 :              (prefix (if nsp (substring name 0 (match-beginning 0)) default))
     433           0 :              (special (and (string-equal lname "xmlns") (not prefix)))
     434             :              ;; Setting default to nil will insure that there is not
     435             :              ;; matching cons in xml-ns.  In which case we
     436           0 :              (ns (or (cdr (assoc (if special "xmlns" prefix)
     437           0 :                                  (if symbol-qnames (cdr xml-ns) xml-ns)))
     438           0 :                      "")))
     439           0 :         (if (and symbol-qnames
     440           0 :                  (not special)
     441           0 :                  (not (string= prefix "xmlns")))
     442           0 :             (intern (concat ns lname))
     443           0 :           (cons ns (if special "" lname))))
     444           0 :     (intern name)))
     445             : 
     446             : (defun xml-parse-tag (&optional parse-dtd parse-ns)
     447             :   "Parse the tag at point.
     448             : If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
     449             : returned as the first element in the list.
     450             : If PARSE-NS is non-nil, expand QNAMES; for further details, see
     451             : `xml-parse-region'.
     452             : 
     453             : Return one of:
     454             :  - a list : the matching node
     455             :  - nil    : the point is not looking at a tag.
     456             :  - a pair : the first element is the DTD, the second is the node."
     457           0 :   (let* ((case-fold-search nil)
     458             :          ;; Prevent entity definitions from changing the defaults
     459           0 :          (xml-entity-alist xml-entity-alist)
     460           0 :          (xml-parameter-entity-alist xml-parameter-entity-alist)
     461           0 :          (buf (current-buffer))
     462           0 :          (pos (point)))
     463           0 :     (with-temp-buffer
     464           0 :       (with-syntax-table xml-syntax-table
     465           0 :         (insert-buffer-substring-no-properties buf pos)
     466           0 :         (goto-char (point-min))
     467           0 :         (xml-parse-tag-1 parse-dtd parse-ns)))))
     468             : 
     469             : (defun xml-parse-tag-1 (&optional parse-dtd parse-ns)
     470             :   "Like `xml-parse-tag', but possibly modify the buffer while working."
     471           0 :   (let* ((xml-validating-parser (or parse-dtd xml-validating-parser))
     472             :          (xml-ns
     473           0 :           (cond ((eq parse-ns 'symbol-qnames)
     474           0 :                  (cons 'symbol-qnames xml-default-ns))
     475           0 :                 ((or (consp (car-safe parse-ns))
     476           0 :                      (and (eq (car-safe parse-ns) 'symbol-qnames)
     477           0 :                           (listp (cdr parse-ns))))
     478           0 :                  parse-ns)
     479           0 :                 (parse-ns
     480           0 :                  xml-default-ns))))
     481           0 :     (cond
     482             :      ;; Processing instructions, like <?xml version="1.0"?>.
     483           0 :      ((looking-at-p "<\\?")
     484           0 :       (search-forward "?>")
     485           0 :       (skip-syntax-forward " ")
     486           0 :       (xml-parse-tag-1 parse-dtd xml-ns))
     487             :      ;; Character data (CDATA) sections, in which no tag should be interpreted
     488           0 :      ((looking-at "<!\\[CDATA\\[")
     489           0 :       (let ((pos (match-end 0)))
     490           0 :         (unless (search-forward "]]>" nil t)
     491           0 :           (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
     492           0 :         (concat
     493           0 :          (buffer-substring-no-properties pos (match-beginning 0))
     494           0 :          (xml-parse-string))))
     495             :      ;; DTD for the document
     496           0 :      ((looking-at-p "<!DOCTYPE[ \t\n\r]")
     497           0 :       (let ((dtd (xml-parse-dtd parse-ns)))
     498           0 :         (skip-syntax-forward " ")
     499           0 :         (if xml-validating-parser
     500           0 :             (cons dtd (xml-parse-tag-1 nil xml-ns))
     501           0 :           (xml-parse-tag-1 nil xml-ns))))
     502             :      ;; skip comments
     503           0 :      ((looking-at-p "<!--")
     504           0 :       (search-forward "-->")
     505             :       ;; FIXME: This loses the skipped-over spaces.
     506           0 :       (skip-syntax-forward " ")
     507           0 :       (unless (eobp)
     508           0 :         (let ((xml-sub-parser t))
     509           0 :           (xml-parse-tag-1 parse-dtd xml-ns))))
     510             :      ;; end tag
     511           0 :      ((looking-at-p "</")
     512             :       '())
     513             :      ;; opening tag
     514           1 :      ((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)")))
     515           0 :       (goto-char (match-end 1))
     516             :       ;; Parse this node
     517           0 :       (let* ((node-name (match-string-no-properties 1))
     518             :              ;; Parse the attribute list.
     519           0 :              (attrs (xml-parse-attlist xml-ns))
     520             :              children)
     521             :         ;; add the xmlns:* attrs to our cache
     522           0 :         (when (consp xml-ns)
     523           0 :           (dolist (attr attrs)
     524           0 :             (when (and (consp (car attr))
     525           0 :                        (equal "http://www.w3.org/2000/xmlns/"
     526           0 :                               (caar attr)))
     527           0 :               (push (cons (cdar attr) (cdr attr))
     528           0 :                     (if (symbolp (car xml-ns))
     529           0 :                         (cdr xml-ns)
     530           0 :                       xml-ns)))))
     531           0 :         (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
     532           0 :         (cond
     533             :          ;; is this an empty element ?
     534           0 :          ((looking-at-p "/>")
     535           0 :           (forward-char 2)
     536           0 :           (nreverse children))
     537             :          ;; is this a valid start tag ?
     538           0 :          ((eq (char-after) ?>)
     539           0 :           (forward-char 1)
     540             :           ;; Now check that we have the right end-tag.
     541           0 :           (let ((end (concat "</" node-name "\\s-*>")))
     542           0 :             (while (not (looking-at end))
     543           0 :               (cond
     544           0 :                ((eobp)
     545           0 :                 (error "XML: (Not Well-Formed) End of document while reading element `%s'"
     546           0 :                        node-name))
     547           0 :                ((looking-at-p "</")
     548           0 :                 (forward-char 2)
     549           0 :                 (error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')"
     550           0 :                        (let ((pos (point)))
     551           0 :                          (buffer-substring pos (if (re-search-forward "\\s-*>" nil t)
     552           0 :                                                    (match-beginning 0)
     553           0 :                                                  (point-max))))
     554           0 :                        node-name))
     555             :                ;; Read a sub-element and push it onto CHILDREN.
     556           0 :                ((= (char-after) ?<)
     557           0 :                 (let ((tag (xml-parse-tag-1 nil xml-ns)))
     558           0 :                   (when tag
     559           0 :                     (push tag children))))
     560             :                ;; Read some character data.
     561             :                (t
     562           0 :                 (let ((expansion (xml-parse-string)))
     563           0 :                   (push (if (stringp (car children))
     564             :                             ;; If two strings were separated by a
     565             :                             ;; comment, concat them.
     566           0 :                             (concat (pop children) expansion)
     567           0 :                           expansion)
     568           0 :                         children)))))
     569             :             ;; Move point past the end-tag.
     570           0 :             (goto-char (match-end 0))
     571           0 :             (nreverse children)))
     572             :          ;; Otherwise this was an invalid start tag (expected ">" not found.)
     573             :          (t
     574           0 :           (error "XML: (Well-Formed) Couldn't parse tag: %s"
     575           0 :                  (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
     576             : 
     577             :      ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
     578             :      (t
     579           0 :       (unless xml-sub-parser   ; Usually, we error out.
     580           0 :         (error "XML: (Well-Formed) Invalid character"))
     581             :       ;; However, if we're parsing incrementally, then we need to deal
     582             :       ;; with stray CDATA.
     583           0 :       (let ((s (xml-parse-string)))
     584           0 :         (when (zerop (length s))
     585             :           ;; We haven't consumed any input! We must throw an error in
     586             :           ;; order to prevent looping forever.
     587           0 :           (error "XML: (Not Well-Formed) Could not parse: %s"
     588           0 :                  (buffer-substring-no-properties
     589           0 :                   (point) (min (+ (point) 10) (point-max)))))
     590           0 :         s)))))
     591             : 
     592             : (defun xml-parse-string ()
     593             :   "Parse character data at point, and return it as a string.
     594             : Leave point at the start of the next thing to parse.  This
     595             : function can modify the buffer by expanding entity and character
     596             : references."
     597           0 :   (let ((start (point))
     598             :         ;; Keep track of the size of the rest of the buffer:
     599           0 :         (old-remaining-size (- (buffer-size) (point)))
     600             :         ref val)
     601           0 :     (while (and (not (eobp))
     602           0 :                 (not (looking-at-p "<")))
     603             :       ;; Find the next < or & character.
     604           0 :       (skip-chars-forward "^<&")
     605           0 :       (when (eq (char-after) ?&)
     606             :         ;; If we find an entity or character reference, expand it.
     607           0 :         (unless (looking-at xml-entity-or-char-ref-re)
     608           0 :           (error "XML: (Not Well-Formed) Invalid entity reference"))
     609             :         ;; For a character reference, the next entity or character
     610             :         ;; reference must be after the replacement.  [4.6] "Numerical
     611             :         ;; character references are expanded immediately when
     612             :         ;; recognized and MUST be treated as character data."
     613           0 :         (if (setq ref (match-string 2))
     614           0 :             (progn  ; Numeric char reference
     615           0 :               (setq val (save-match-data
     616           0 :                           (decode-char 'ucs (string-to-number
     617           0 :                                              ref (if (match-string 1) 16)))))
     618           0 :               (and (null val)
     619           0 :                    xml-validating-parser
     620           0 :                    (error "XML: (Validity) Invalid character reference `%s'"
     621           0 :                           (match-string 0)))
     622           0 :               (replace-match (if val (string val) xml-undefined-entity) t t))
     623             :           ;; For an entity reference, search again from the start of
     624             :           ;; the replaced text, since the replacement can contain
     625             :           ;; entity or character references, or markup.
     626           0 :           (setq ref (match-string 3)
     627           0 :                 val (assoc ref xml-entity-alist))
     628           0 :           (and (null val)
     629           0 :                xml-validating-parser
     630           0 :                (error "XML: (Validity) Undefined entity `%s'" ref))
     631           0 :           (replace-match (or (cdr val) xml-undefined-entity) t t)
     632           0 :           (goto-char (match-beginning 0)))
     633             :         ;; Check for XML bombs.
     634           0 :         (and xml-entity-expansion-limit
     635           0 :              (> (- (buffer-size) (point))
     636           0 :                 (+ old-remaining-size xml-entity-expansion-limit))
     637           0 :              (error "XML: Entity reference expansion \
     638           0 : surpassed `xml-entity-expansion-limit'"))))
     639             :     ;; [2.11] Clean up line breaks.
     640           0 :     (let ((end-marker (point-marker)))
     641           0 :       (goto-char start)
     642           0 :       (while (re-search-forward "\r\n?" end-marker t)
     643           0 :         (replace-match "\n" t t))
     644           0 :       (goto-char end-marker)
     645           0 :       (buffer-substring start (point)))))
     646             : 
     647             : (defun xml-parse-attlist (&optional xml-ns)
     648             :   "Return the attribute-list after point.
     649             : Leave point at the first non-blank character after the tag."
     650           0 :   (let ((attlist ())
     651             :         end-pos name)
     652           0 :     (skip-syntax-forward " ")
     653           0 :     (while (looking-at (eval-when-compile
     654           1 :                          (concat "\\(" xml-name-re "\\)\\s-*=\\s-*")))
     655           0 :       (setq end-pos (match-end 0))
     656           0 :       (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
     657           0 :       (goto-char end-pos)
     658             : 
     659             :       ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
     660             : 
     661             :       ;; Do we have a string between quotes (or double-quotes),
     662             :       ;;  or a simple word ?
     663           0 :       (if (looking-at "\"\\([^\"]*\\)\"")
     664           0 :           (setq end-pos (match-end 0))
     665           0 :         (if (looking-at "'\\([^']*\\)'")
     666           0 :             (setq end-pos (match-end 0))
     667           0 :           (error "XML: (Not Well-Formed) Attribute values must be given between quotes")))
     668             : 
     669             :       ;; Each attribute must be unique within a given element
     670           0 :       (if (assoc name attlist)
     671           0 :           (error "XML: (Not Well-Formed) Each attribute must be unique within an element"))
     672             : 
     673             :       ;; Multiple whitespace characters should be replaced with a single one
     674             :       ;; in the attributes
     675           0 :       (let ((string (match-string-no-properties 1)))
     676           0 :         (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
     677           0 :         (let ((expansion (xml-substitute-special string)))
     678           0 :           (unless (stringp expansion)
     679             :             ;; We say this is the constraint.  It is actually that
     680             :             ;; neither external entities nor "<" can be in an
     681             :             ;; attribute value.
     682           0 :             (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
     683           0 :           (push (cons name expansion) attlist)))
     684             : 
     685           0 :       (goto-char end-pos)
     686           0 :       (skip-syntax-forward " "))
     687           0 :     (nreverse attlist)))
     688             : 
     689             : ;;; DTD (document type declaration)
     690             : 
     691             : ;; The following functions know how to skip or parse the DTD of a
     692             : ;; document.  FIXME: it fails at least if the DTD contains conditional
     693             : ;; sections.
     694             : 
     695             : (defun xml-skip-dtd ()
     696             :   "Skip the DTD at point.
     697             : This follows the rule [28] in the XML specifications."
     698           0 :   (let ((xml-validating-parser nil))
     699           0 :     (xml-parse-dtd)))
     700             : 
     701             : (defun xml-parse-dtd (&optional _parse-ns)
     702             :   "Parse the DTD at point."
     703           1 :   (forward-char (eval-when-compile (length "<!DOCTYPE")))
     704           0 :   (skip-syntax-forward " ")
     705           0 :   (if (and (looking-at-p ">")
     706           0 :            xml-validating-parser)
     707           0 :       (error "XML: (Validity) Invalid DTD (expecting name of the document)"))
     708             : 
     709             :   ;;  Get the name of the document
     710           0 :   (looking-at xml-name-re)
     711           0 :   (let ((dtd (list (match-string-no-properties 0) 'dtd))
     712           0 :         (xml-parameter-entity-alist xml-parameter-entity-alist)
     713             :         next-parameter-entity)
     714           0 :     (goto-char (match-end 0))
     715           0 :     (skip-syntax-forward " ")
     716             : 
     717             :     ;; External subset (XML [75])
     718           0 :     (cond ((looking-at "PUBLIC\\s-+")
     719           0 :            (goto-char (match-end 0))
     720           0 :            (unless (or (re-search-forward
     721             :                         "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
     722           0 :                         nil t)
     723           0 :                        (re-search-forward
     724             :                         "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
     725           0 :                         nil t))
     726           0 :              (error "XML: Missing Public ID"))
     727           0 :            (let ((pubid (match-string-no-properties 1)))
     728           0 :              (skip-syntax-forward " ")
     729           0 :              (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
     730           0 :                          (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
     731           0 :                (error "XML: Missing System ID"))
     732           0 :              (push (list pubid (match-string-no-properties 1) 'public) dtd)))
     733           0 :           ((looking-at "SYSTEM\\s-+")
     734           0 :            (goto-char (match-end 0))
     735           0 :            (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
     736           0 :                        (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
     737           0 :              (error "XML: Missing System ID"))
     738           0 :            (push (list (match-string-no-properties 1) 'system) dtd)))
     739           0 :     (skip-syntax-forward " ")
     740             : 
     741           0 :     (if (eq (char-after) ?>)
     742             : 
     743             :         ;; No internal subset
     744           0 :         (forward-char)
     745             : 
     746             :       ;; Internal subset (XML [28b])
     747           0 :       (unless (eq (char-after) ?\[)
     748           0 :         (error "XML: Bad DTD"))
     749           0 :       (forward-char)
     750             : 
     751             :       ;; [2.8]: "markup declarations may be made up in whole or in
     752             :       ;; part of the replacement text of parameter entities."
     753             : 
     754             :       ;; Since parameter entities are valid only within the DTD, we
     755             :       ;; first search for the position of the next possible parameter
     756             :       ;; entity.  Then, search for the next DTD element; if it ends
     757             :       ;; before the next parameter entity, expand the parameter entity
     758             :       ;; and try again.
     759           0 :       (setq next-parameter-entity
     760           0 :             (save-excursion
     761           0 :               (if (re-search-forward xml-pe-reference-re nil t)
     762           0 :                   (match-beginning 0))))
     763             : 
     764             :       ;; Parse the rest of the DTD
     765             :       ;; Fixme: Deal with NOTATION, PIs.
     766           0 :       (while (not (looking-at-p "\\s-*\\]"))
     767           0 :         (skip-syntax-forward " ")
     768           0 :         (cond
     769           0 :          ((eobp)
     770           0 :           (error "XML: (Well-Formed) End of document while reading DTD"))
     771             :          ;; Element declaration [45]:
     772           0 :          ((and (looking-at (eval-when-compile
     773           1 :                              (concat "<!ELEMENT\\s-+\\(" xml-name-re
     774           1 :                                      "\\)\\s-+\\([^>]+\\)>")))
     775           0 :                (or (null next-parameter-entity)
     776           0 :                    (<= (match-end 0) next-parameter-entity)))
     777           0 :           (let ((element (match-string-no-properties 1))
     778           0 :                 (type    (match-string-no-properties 2))
     779           0 :                 (end-pos (match-end 0)))
     780             :             ;; Translation of rule [46] of XML specifications
     781           0 :             (cond
     782           0 :              ((string-match-p "\\`EMPTY\\s-*\\'" type)  ; empty declaration
     783           0 :               (setq type 'empty))
     784           0 :              ((string-match-p "\\`ANY\\s-*$" type)      ; any type of contents
     785           0 :               (setq type 'any))
     786           0 :              ((string-match "\\`(\\(.*\\))\\s-*\\'" type) ; children ([47])
     787           0 :               (setq type (xml-parse-elem-type
     788           0 :                           (match-string-no-properties 1 type))))
     789           0 :              ((string-match-p "^%[^;]+;[ \t\n\r]*\\'" type) ; substitution
     790             :               nil)
     791           0 :              (xml-validating-parser
     792           0 :               (error "XML: (Validity) Invalid element type in the DTD")))
     793             : 
     794             :             ;; rule [45]: the element declaration must be unique
     795           0 :             (and (assoc element dtd)
     796           0 :                  xml-validating-parser
     797           0 :                  (error "XML: (Validity) DTD element declarations must be unique (<%s>)"
     798           0 :                         element))
     799             : 
     800             :             ;;  Store the element in the DTD
     801           0 :             (push (list element type) dtd)
     802           0 :             (goto-char end-pos)))
     803             : 
     804             :          ;; Attribute-list declaration [52] (currently unsupported):
     805           0 :          ((and (looking-at (eval-when-compile
     806           1 :                              (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
     807           1 :                                      "\\)[ \t\n\r]*\\(" xml-att-def-re
     808           1 :                                      "\\)*[ \t\n\r]*>")))
     809           0 :                (or (null next-parameter-entity)
     810           0 :                    (<= (match-end 0) next-parameter-entity)))
     811           0 :           (goto-char (match-end 0)))
     812             : 
     813             :          ;; Comments (skip to end, ignoring parameter entity):
     814           0 :          ((looking-at-p "<!--")
     815           0 :           (search-forward "-->")
     816           0 :           (and next-parameter-entity
     817           0 :                (> (point) next-parameter-entity)
     818           0 :                (setq next-parameter-entity
     819           0 :                      (save-excursion
     820           0 :                        (if (re-search-forward xml-pe-reference-re nil t)
     821           0 :                            (match-beginning 0))))))
     822             : 
     823             :          ;; Internal entity declarations:
     824           0 :          ((and (looking-at (eval-when-compile
     825           1 :                              (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
     826           1 :                                      xml-name-re "\\)[ \t\n\r]*\\("
     827           1 :                                      xml-entity-value-re "\\)[ \t\n\r]*>")))
     828           0 :                (or (null next-parameter-entity)
     829           0 :                    (<= (match-end 0) next-parameter-entity)))
     830           0 :           (let* ((name (prog1 (match-string-no-properties 2)
     831           0 :                          (goto-char (match-end 0))))
     832           0 :                  (alist (if (match-string 1)
     833             :                             'xml-parameter-entity-alist
     834           0 :                           'xml-entity-alist))
     835             :                  ;; Retrieve the deplacement text:
     836           0 :                  (value (xml--entity-replacement-text
     837             :                          ;; Entity value, sans quotation marks:
     838           0 :                          (substring (match-string-no-properties 3) 1 -1))))
     839             :             ;; If the same entity is declared more than once, the
     840             :             ;; first declaration is binding.
     841           0 :             (unless (assoc name (symbol-value alist))
     842           0 :               (set alist (cons (cons name value) (symbol-value alist))))))
     843             : 
     844             :          ;; External entity declarations (currently unsupported):
     845           0 :          ((and (or (looking-at (eval-when-compile
     846           1 :                                  (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
     847           1 :                                          xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
     848           1 :                                          "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
     849           0 :                    (looking-at (eval-when-compile
     850           1 :                                  (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
     851           1 :                                          xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
     852             :                                          "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
     853             :                                          "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
     854             :                                          "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
     855           1 :                                          "[ \t\n\r]*>"))))
     856           0 :                (or (null next-parameter-entity)
     857           0 :                    (<= (match-end 0) next-parameter-entity)))
     858           0 :           (goto-char (match-end 0)))
     859             : 
     860             :          ;; If a parameter entity is in the way, expand it.
     861           0 :          (next-parameter-entity
     862           0 :           (save-excursion
     863           0 :             (goto-char next-parameter-entity)
     864           0 :             (unless (looking-at xml-pe-reference-re)
     865           0 :               (error "XML: Internal error"))
     866           0 :             (let* ((entity (match-string 1))
     867           0 :                    (elt    (assoc entity xml-parameter-entity-alist)))
     868           0 :               (if elt
     869           0 :                   (progn
     870           0 :                     (replace-match (cdr elt) t t)
     871             :                     ;; The replacement can itself be a parameter entity.
     872           0 :                     (goto-char next-parameter-entity))
     873           0 :                 (goto-char (match-end 0))))
     874           0 :             (setq next-parameter-entity
     875           0 :                   (if (re-search-forward xml-pe-reference-re nil t)
     876           0 :                       (match-beginning 0)))))
     877             : 
     878             :          ;; Anything else is garbage (ignored if not validating).
     879           0 :          (xml-validating-parser
     880           0 :           (error "XML: (Validity) Invalid DTD item"))
     881             :          (t
     882           0 :           (skip-chars-forward "^]"))))
     883             : 
     884           0 :       (if (looking-at "\\s-*]>")
     885           0 :           (goto-char (match-end 0))))
     886           0 :     (nreverse dtd)))
     887             : 
     888             : (defun xml--entity-replacement-text (string)
     889             :   "Return the replacement text for the entity value STRING.
     890             : The replacement text is obtained by replacing character
     891             : references and parameter-entity references."
     892           0 :   (let ((ref-re (eval-when-compile
     893           1 :                   (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
     894           1 :                           xml-name-re "\\)\\);")))
     895             :         children)
     896           0 :     (while (string-match ref-re string)
     897           0 :       (push (substring string 0 (match-beginning 0)) children)
     898           0 :       (let ((remainder (substring string (match-end 0)))
     899             :             ref val)
     900           0 :         (cond ((setq ref (match-string 1 string))
     901             :                ;; Decimal character reference
     902           0 :                (setq val (decode-char 'ucs (string-to-number ref)))
     903           0 :                (if val (push (string val) children)))
     904             :               ;; Hexadecimal character reference
     905           0 :               ((setq ref (match-string 2 string))
     906           0 :                (setq val (decode-char 'ucs (string-to-number ref 16)))
     907           0 :                (if val (push (string val) children)))
     908             :               ;; Parameter entity reference
     909           0 :               ((setq ref (match-string 3 string))
     910           0 :                (setq val (assoc ref xml-parameter-entity-alist))
     911           0 :                (and (null val)
     912           0 :                     xml-validating-parser
     913           0 :                     (error "XML: (Validity) Undefined parameter entity `%s'" ref))
     914           0 :                (push (or (cdr val) xml-undefined-entity) children)))
     915           0 :         (setq string remainder)))
     916           0 :     (mapconcat 'identity (nreverse (cons string children)) "")))
     917             : 
     918             : (defun xml-parse-elem-type (string)
     919             :   "Convert element type STRING into a Lisp structure."
     920             : 
     921           0 :   (let (elem modifier)
     922           0 :     (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
     923           0 :         (progn
     924           0 :           (setq elem     (match-string-no-properties 1 string)
     925           0 :                 modifier (match-string-no-properties 2 string))
     926           0 :           (if (string-match-p "|" elem)
     927           0 :               (setq elem (cons 'choice
     928           0 :                                (mapcar 'xml-parse-elem-type
     929           0 :                                        (split-string elem "|"))))
     930           0 :             (if (string-match-p "," elem)
     931           0 :                 (setq elem (cons 'seq
     932           0 :                                  (mapcar 'xml-parse-elem-type
     933           0 :                                          (split-string elem ",")))))))
     934           0 :       (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
     935           0 :           (setq elem     (match-string-no-properties 1 string)
     936           0 :                 modifier (match-string-no-properties 2 string))))
     937             : 
     938           0 :     (if (and (stringp elem) (string= elem "#PCDATA"))
     939           0 :         (setq elem 'pcdata))
     940             : 
     941           0 :     (cond
     942           0 :      ((string= modifier "+")
     943           0 :       (list '+ elem))
     944           0 :      ((string= modifier "*")
     945           0 :       (list '* elem))
     946           0 :      ((string= modifier "?")
     947           0 :       (list '\? elem))
     948             :      (t
     949           0 :       elem))))
     950             : 
     951             : ;;; Substituting special XML sequences
     952             : 
     953             : (defun xml-substitute-special (string)
     954             :   "Return STRING, after substituting entity and character references.
     955             : STRING is assumed to occur in an XML attribute value."
     956           0 :   (let ((strlen (length string))
     957             :         children)
     958           0 :     (while (string-match xml-entity-or-char-ref-re string)
     959           0 :       (push (substring string 0 (match-beginning 0)) children)
     960           0 :       (let* ((remainder (substring string (match-end 0)))
     961           0 :              (is-hex (match-string 1 string)) ; Is it a hex numeric reference?
     962           0 :              (ref (match-string 2 string)))   ; Numeric part of reference
     963           0 :         (if ref
     964             :             ;; [4.6] Character references are included as
     965             :             ;; character data.
     966           0 :             (let ((val (decode-char 'ucs (string-to-number ref (if is-hex 16)))))
     967           0 :               (push (cond (val (string val))
     968           0 :                           (xml-validating-parser
     969           0 :                            (error "XML: (Validity) Undefined character `x%s'" ref))
     970           0 :                           (t xml-undefined-entity))
     971           0 :                     children)
     972           0 :               (setq string remainder
     973           0 :                     strlen (length string)))
     974             :           ;; [4.4.5] Entity references are "included in literal".
     975             :           ;; Note that we don't need do anything special to treat
     976             :           ;; quotes as normal data characters.
     977           0 :           (setq ref (match-string 3 string)) ; entity name
     978           0 :           (let ((val (or (cdr (assoc ref xml-entity-alist))
     979           0 :                          (if xml-validating-parser
     980           0 :                              (error "XML: (Validity) Undefined entity `%s'" ref)
     981           0 :                            xml-undefined-entity))))
     982           0 :             (setq string (concat val remainder)))
     983           0 :           (and xml-entity-expansion-limit
     984           0 :                (> (length string) (+ strlen xml-entity-expansion-limit))
     985           0 :                (error "XML: Passed `xml-entity-expansion-limit' while expanding `&%s;'"
     986           0 :                       ref)))))
     987           0 :     (mapconcat 'identity (nreverse (cons string children)) "")))
     988             : 
     989             : (defun xml-substitute-numeric-entities (string)
     990             :   "Substitute SGML numeric entities by their respective utf characters.
     991             : This function replaces numeric entities in the input STRING and
     992             : returns the modified string.  For example \"&#42;\" gets replaced
     993             : by \"*\"."
     994           0 :   (if (and string (stringp string))
     995           0 :       (let ((start 0))
     996           0 :         (while (string-match "&#\\([0-9]+\\);" string start)
     997           0 :           (ignore-errors
     998           0 :             (setq string (replace-match
     999           0 :                           (string (read (substring string
    1000           0 :                                                    (match-beginning 1)
    1001           0 :                                                    (match-end 1))))
    1002           0 :                           nil nil string)))
    1003           0 :           (setq start (1+ (match-beginning 0))))
    1004           0 :         string)
    1005           0 :     nil))
    1006             : 
    1007             : ;;; Printing a parse tree (mainly for debugging).
    1008             : 
    1009             : (defun xml-debug-print (xml &optional indent-string)
    1010             :   "Outputs the XML in the current buffer.
    1011             : XML can be a tree or a list of nodes.
    1012             : The first line is indented with the optional INDENT-STRING."
    1013           0 :   (setq indent-string (or indent-string ""))
    1014           0 :   (dolist (node xml)
    1015           0 :     (xml-debug-print-internal node indent-string)))
    1016             : 
    1017             : (defalias 'xml-print 'xml-debug-print)
    1018             : 
    1019             : (defun xml-escape-string (string)
    1020             :   "Convert STRING into a string containing valid XML character data.
    1021             : Replace occurrences of &<>\\='\" in STRING with their default XML
    1022             : entity references (e.g., replace each & with &amp;).
    1023             : 
    1024             : XML character data must not contain & or < characters, nor the >
    1025             : character under some circumstances.  The XML spec does not impose
    1026             : restriction on \" or \\=', but we just substitute for these too
    1027             : \(as is permitted by the spec)."
    1028           0 :   (with-temp-buffer
    1029           0 :     (insert string)
    1030           0 :     (dolist (substitution '(("&" . "&amp;")
    1031             :                             ("<" . "&lt;")
    1032             :                             (">" . "&gt;")
    1033             :                             ("'" . "&apos;")
    1034             :                             ("\"" . "&quot;")))
    1035           0 :       (goto-char (point-min))
    1036           0 :       (while (search-forward (car substitution) nil t)
    1037           0 :         (replace-match (cdr substitution) t t nil)))
    1038           0 :     (buffer-string)))
    1039             : 
    1040             : (defun xml-debug-print-internal (xml indent-string)
    1041             :   "Outputs the XML tree in the current buffer.
    1042             : The first line is indented with INDENT-STRING."
    1043           0 :   (let ((tree xml)
    1044             :         attlist)
    1045           0 :     (insert indent-string ?< (symbol-name (xml-node-name tree)))
    1046             : 
    1047             :     ;;  output the attribute list
    1048           0 :     (setq attlist (xml-node-attributes tree))
    1049           0 :     (while attlist
    1050           0 :       (insert ?\  (symbol-name (caar attlist)) "=\""
    1051           0 :               (xml-escape-string (cdar attlist)) ?\")
    1052           0 :       (setq attlist (cdr attlist)))
    1053             : 
    1054           0 :     (setq tree (xml-node-children tree))
    1055             : 
    1056           0 :     (if (null tree)
    1057           0 :         (insert ?/ ?>)
    1058           0 :       (insert ?>)
    1059             : 
    1060             :       ;;  output the children
    1061           0 :       (dolist (node tree)
    1062           0 :         (cond
    1063           0 :          ((listp node)
    1064           0 :           (insert ?\n)
    1065           0 :           (xml-debug-print-internal node (concat indent-string "  ")))
    1066           0 :          ((stringp node)
    1067           0 :           (insert (xml-escape-string node)))
    1068             :          (t
    1069           0 :           (error "Invalid XML tree"))))
    1070             : 
    1071           0 :       (when (not (and (null (cdr tree))
    1072           0 :                       (stringp (car tree))))
    1073           0 :         (insert ?\n indent-string))
    1074           0 :       (insert ?< ?/ (symbol-name (xml-node-name xml)) ?>))))
    1075             : 
    1076             : (provide 'xml)
    1077             : 
    1078             : ;;; xml.el ends here

Generated by: LCOV version 1.12