(in-package :user)
-(let ((filenames
+(let ((filenames
(list
- "pxml0"
- "pxml1"
- "pxml3"
- "pxml2")))
+ "pxml0"
+ "pxml1"
+ "pxml3"
+ "pxml2")))
(dolist (f filenames)
(compile-file-if-needed (concatenate 'string f ".cl"))
(load (concatenate 'string f ".fasl")))
-
+
(with-open-file (out "pxml.fasl"
- :element-type '(unsigned-byte 8)
- :direction :output
- :if-exists :supersede
- :if-does-not-exist :create)
+ :element-type '(unsigned-byte 8)
+ :direction :output
+ :if-exists :supersede
+ :if-does-not-exist :create)
(dolist (file filenames)
(with-open-file (in (concatenate 'string file ".fasl")
- :element-type '(unsigned-byte 8))
+ :element-type '(unsigned-byte 8))
(format t "~%; ~s" file)
- (let ((buf (make-array 2048 :element-type '(unsigned-byte 8))))
- (loop as x = (read-sequence buf in)
- until (= x 0)
- do (write-sequence buf out :end x)))))))
-
+ (let ((buf (make-array 2048 :element-type '(unsigned-byte 8))))
+ (loop as x = (read-sequence buf in)
+ until (= x 0)
+ do (write-sequence buf out :end x)))))))
+
-;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
;;
;; This code is free software; you can redistribute it and/or
;; modify it under the terms of the version 2.1 of
-;; the GNU Lesser General Public License as published by
+;; the GNU Lesser General Public License as published by
;; the Free Software Foundation, as clarified by the AllegroServe
;; prequel found in license-allegroserve.txt.
;;
;; merchantability or fitness for a particular purpose. See the GNU
;; Lesser General Public License for more details.
;;
-;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; Version 2.1 of the GNU Lesser General Public License is in the file
;; license-lgpl.txt that was distributed with this file.
;; If it is not present, you can access it from
;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
-;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
;; Suite 330, Boston, MA 02111-1307 USA
;;
<!-- this should be <h1>one</h1> string -->
<head>
<style> this should be <h1>one</h1> string </STYLE>
- <title> this is some title text </title>
+ <title> this is some title text </title>
<body> this is some body text
<a name=\"this is an anchor\">with some text </a>
<!-- testing allowing looser attribute parsing -->
'((:html
(:comment "this should be <h1>one</h1> string")
(:head
- (:style "this should be <h1>one</h1> string")
- (:title "this is some title text"))
- (:body
- "this is some body text"
+ (:style "this should be <h1>one</h1> string")
+ (:title "this is some title text"))
+ (:body
+ "this is some body text"
((:a :name "this is an anchor") "with some text")
- (:comment "testing allowing looser attribute parsing")
- ((:a :href "mailto:lmcelroy@performigence.com")
- "lmcelroy@performigence.com")
- :br
- "this is some more text"
- (:bogus "tests parser 'looseness'")
- (:select
- (:option "1")
- (:option "2"))
- (:ul
- (:li "item 1")
- (:li "item 2"))
- (:dl
- (:dt "a term")
- (:dd "its definition")
- (:dt "another term")
- (:dd "another definition"))
- (:table
- (:colgroup
- ((:col :align "right"))
- ((:col :align "center")))
- (:thead
- (:tr
- (:th "this cell is aligned right")
- (:th "this cell is centered")))
- (:tfoot
- (:tr
- (:th "this cell is aligned right")
- (:th "this cell is centered")))
- (:tbody
- (:tr
- (:td "this cell is aligned right")
- (:td "this cell is centered")))
- (:tbody
- (:tr
- (:td "this cell is aligned right")
- (:td "this cell is centered"))))
- (:pp
- (:object
- (:pp "Navigate the site:"
- ((:map :name "mainmap")
- ((:area :shape "rect" :coords "0,100,100,200"))
- ((:area :shape "rect" :coords "100,100,100,200"))))))
- (:abbr "WWW")
- "is an abbreviation"
- (:b "force")
- (:pp "whitespace only")
- ))))
+ (:comment "testing allowing looser attribute parsing")
+ ((:a :href "mailto:lmcelroy@performigence.com")
+ "lmcelroy@performigence.com")
+ :br
+ "this is some more text"
+ (:bogus "tests parser 'looseness'")
+ (:select
+ (:option "1")
+ (:option "2"))
+ (:ul
+ (:li "item 1")
+ (:li "item 2"))
+ (:dl
+ (:dt "a term")
+ (:dd "its definition")
+ (:dt "another term")
+ (:dd "another definition"))
+ (:table
+ (:colgroup
+ ((:col :align "right"))
+ ((:col :align "center")))
+ (:thead
+ (:tr
+ (:th "this cell is aligned right")
+ (:th "this cell is centered")))
+ (:tfoot
+ (:tr
+ (:th "this cell is aligned right")
+ (:th "this cell is centered")))
+ (:tbody
+ (:tr
+ (:td "this cell is aligned right")
+ (:td "this cell is centered")))
+ (:tbody
+ (:tr
+ (:td "this cell is aligned right")
+ (:td "this cell is centered"))))
+ (:pp
+ (:object
+ (:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect" :coords "100,100,100,200"))))))
+ (:abbr "WWW")
+ "is an abbreviation"
+ (:b "force")
+ (:pp "whitespace only")
+ ))))
(setf *test-string2*
"<i><b id=1>text</i> more text</b>
(setf *test-string3*
"<ICMETA URL='nytimes.html'>
<NYT_HEADER version='1.0' type='homepage'>
-<body bgcolor='#ffffff' background='back5.gif'
+<body bgcolor='#ffffff' background='back5.gif'
vlink='4' link='6'>
<NYT_BANNER version='1.0' type='homepage'>
<table border=0 cellspacing=0 cellpadding=0>
((:table :border "0" :cellspacing "0" :cellpadding "0")
(:tr
((:td :bgcolor "0" :rowspan "4" :width "126" :align "left" :valign "center")
- ((:nyt_ad :version "1.0" :location "")
- ((:a :href "ads.gif" :target "top")
- ((:img :src "http://ads2.gif" :border "0" :width "120" :height "90" :alt
- "E-Mail Updates from NYTimes.com"))))))))))
+ ((:nyt_ad :version "1.0" :location "")
+ ((:a :href "ads.gif" :target "top")
+ ((:img :src "http://ads2.gif" :border "0" :width "120" :height "90" :alt
+ "E-Mail Updates from NYTimes.com"))))))))))
(defmethod lhtml-equal ((a t) (b t))
(loop
(if* (and (= i (length a)) (= j (length b))) then (return t)
elseif (and (< i (length a)) (white-space-p (nth i a))) then
- (incf i)
+ (incf i)
elseif (white-space-p (nth j b)) then
- (incf j)
+ (incf j)
elseif (and (= i (length a)) (/= j (length b))) then
- (return
- (loop
- (when (= j (length b)) (return t))
- (when (not (white-space-p (nth j b))) (return nil))
- (incf j)))
+ (return
+ (loop
+ (when (= j (length b)) (return t))
+ (when (not (white-space-p (nth j b))) (return nil))
+ (incf j)))
elseif (and (/= i (length a)) (= j (length b))) then
- (return
- (loop
- (when (= i (length a)) (return t))
- (when (not (white-space-p (nth i a))) (return nil))
- (incf i)))
+ (return
+ (loop
+ (when (= i (length a)) (return t))
+ (when (not (white-space-p (nth i a))) (return nil))
+ (incf i)))
elseif (not (lhtml-equal (nth i a) (nth j b))) then
- (return nil)
- else
- (incf i)
- (incf j)))))
+ (return nil)
+ else
+ (incf i)
+ (incf j)))))
(defmethod lhtml-equal ((a string) (b string))
(let ((i 0) (j 0))
;; skip white space in beginning
(loop
(let ((char (elt a i)))
- (when (and (not (eq char #\space))
- (not (eq char #\tab))
- (not (eq char #\return))
- (not (eq char #\linefeed)))
- (return)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return)))
(incf i))
(loop
(let ((char (elt b j)))
- (when (and (not (eq char #\space))
- (not (eq char #\tab))
- (not (eq char #\return))
- (not (eq char #\linefeed)))
- (return)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return)))
(incf j))
(loop
(when (and (= i (length a)) (= j (length b))) (return t))
(when (and (= i (length a)) (/= j (length b)))
- (return
- (loop
- (when (= j (length b)) (return t))
- (let ((char (elt b j)))
- (when (and (not (eq char #\space))
- (not (eq char #\tab))
- (not (eq char #\return))
- (not (eq char #\linefeed)))
- (return t)))
- (incf j))))
+ (return
+ (loop
+ (when (= j (length b)) (return t))
+ (let ((char (elt b j)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return t)))
+ (incf j))))
(when (and (/= i (length a)) (= j (length b)))
- (return
- (loop
- (when (= i (length a)) (return t))
- (let ((char (elt a i)))
- (when (and (not (eq char #\space))
- (not (eq char #\tab))
- (not (eq char #\return))
- (not (eq char #\linefeed)))
- (return t)))
- (incf i))))
+ (return
+ (loop
+ (when (= i (length a)) (return t))
+ (let ((char (elt a i)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return t)))
+ (incf i))))
(when (not (eq (elt a i) (elt b j))) (return nil))
(incf i)
(incf j))))
(defmethod white-space-p ((a string))
(let ((i 0)
- (length (length a)))
+ (length (length a)))
(loop
(when (= i length) (return t))
(let ((char (elt a i)))
- (when (and (not (eq char #\space))
- (not (eq char #\tab))
- (not (eq char #\return))
- (not (eq char #\linefeed)))
- (return nil)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return nil)))
(incf i))))
;;------------------------------------------------
(incf *callback-called*)
(if* (= *pass* 0)
then
- (incf *pass*)
- (test t (lhtml-equal arg
- '((:a :name "this is an anchor")
- "with some text")))
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '((:a :name "this is an anchor")
+ "with some text")))
else
- (setf *pass* 0)
- (test t (lhtml-equal arg
- '((:a :href
- "mailto:lmcelroy@performigence.com")
- "lmcelroy@performigence.com"))))))
+ (setf *pass* 0)
+ (test t (lhtml-equal arg
+ '((:a :href
+ "mailto:lmcelroy@performigence.com")
+ "lmcelroy@performigence.com"))))))
(let ((*pass* 0))
(defun nested-callback (arg)
(incf *callback-called*)
(if* (= *pass* 0)
then
- (incf *pass*)
- (test t (lhtml-equal arg
- '(:pp "Navigate the site:"
- ((:map :name "mainmap")
- ((:area :shape "rect" :coords "0,100,100,200"))
- ((:area :shape "rect" :coords "100,100,100,200"))))))
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '(:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect" :coords "100,100,100,200"))))))
elseif (= *pass* 1)
then
- (incf *pass*)
- (test t (lhtml-equal arg
- '(:pp
- (:object
- (:pp "Navigate the site:"
- ((:map :name "mainmap")
- ((:area :shape "rect" :coords "0,100,100,200"))
- ((:area :shape "rect"
- :coords "100,100,100,200"))))))))
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '(:pp
+ (:object
+ (:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect"
+ :coords "100,100,100,200"))))))))
else
- (setf *pass* 0)
- (test t (lhtml-equal arg
- '(:pp "whitespace only"))))))
+ (setf *pass* 0)
+ (test t (lhtml-equal arg
+ '(:pp "whitespace only"))))))
(defun testit ()
(let ((util.test:*test-errors* 0)
- (util.test:*test-successes* 0))
+ (util.test:*test-successes* 0))
(test t (lhtml-equal (parse-html *test-string2*) *expected-result2*))
(setf *callback-called* 0)
(test t (lhtml-equal (parse-html *test-string*) *expected-result*))
(test 0 *callback-called*)
;;(setf (element-callback :a) 'callback-test-func)
(setf *callback-called* 0)
- (test t (lhtml-equal (parse-html *test-string*
- :callbacks (acons :a 'callback-test-func nil))
- *expected-result*))
+ (test t (lhtml-equal (parse-html *test-string*
+ :callbacks (acons :a 'callback-test-func nil))
+ *expected-result*))
(test 2 *callback-called*)
(setf *callback-called* 0)
(test t (lhtml-equal (parse-html *test-string*) *expected-result*))
(setf *callback-called* 0)
;; make sure function is OK arg
;;(setf (element-callback :a) (symbol-function 'callback-test-func))
- (test t (lhtml-equal
- (parse-html *test-string*
- :callbacks (acons :a (symbol-function 'callback-test-func) nil))
- *expected-result*))
+ (test t (lhtml-equal
+ (parse-html *test-string*
+ :callbacks (acons :a (symbol-function 'callback-test-func) nil))
+ *expected-result*))
(test 2 *callback-called*)
;; try with :callback-only t
(setf *callback-called* 0)
;;(setf (element-callback :a) 'callback-test-func)
(parse-html *test-string* :callback-only t
- :callbacks (acons :a 'callback-test-func nil)) ;; won't return parse output
+ :callbacks (acons :a 'callback-test-func nil)) ;; won't return parse output
(test 2 *callback-called*)
;; try nested callback
(setf *callback-called* 0)
;;(setf (element-callback :p) 'nested-callback)
(test t (lhtml-equal (parse-html *test-string*
- :callbacks (acons :pp 'nested-callback nil))
- *expected-result*))
+ :callbacks (acons :pp 'nested-callback nil))
+ *expected-result*))
(test 3 *callback-called*)
(setf *callback-called* 0)
(parse-html *test-string* :callback-only t
- :callbacks (acons :pp 'nested-callback nil))
+ :callbacks (acons :pp 'nested-callback nil))
(test 3 *callback-called*)
(test-error (parse-html "b<a"))
(test t (lhtml-equal
- (multiple-value-bind (res rogues)
- (parse-html *test-string3* :collect-rogue-tags t)
- (declare (ignorable res))
- (parse-html *test-string3* :no-body-tags rogues))
- *expected-result3*))
+ (multiple-value-bind (res rogues)
+ (parse-html *test-string3* :collect-rogue-tags t)
+ (declare (ignorable res))
+ (parse-html *test-string3* :no-body-tags rogues))
+ *expected-result3*))
(format t "End test: ~s, ~d errors, ~d successes~%"
- "parse-html" util.test:*test-errors* util.test:*test-successes*)
+ "parse-html" util.test:*test-errors* util.test:*test-successes*)
))
:type :system
:post-loadable t)
-;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
;;
;; This code is free software; you can redistribute it and/or
;; modify it under the terms of the version 2.1 of
-;; the GNU Lesser General Public License as published by
+;; the GNU Lesser General Public License as published by
;; the Free Software Foundation, as clarified by the AllegroServe
;; prequel found in license-allegroserve.txt.
;;
;; merchantability or fitness for a particular purpose. See the GNU
;; Lesser General Public License for more details.
;;
-;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; Version 2.1 of the GNU Lesser General Public License is in the file
;; license-lgpl.txt that was distributed with this file.
;; If it is not present, you can access it from
;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
-;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
;; Suite 330, Boston, MA 02111-1307 USA
;;
;; Change Log
;; 05/14/02 - add :parse-entities arg to parse-html. If true then
-;; entities are converted to the character they represent.
+;; entities are converted to the character they represent.
;;
;; 02/05/01 symbols mapped to preferred case at runtime (as opposed to
;; a compile time macro determining the case mapping)
(defmacro tag-name (expr)
`(let ((.xx. ,expr))
(if* (consp .xx.)
- then (car .xx.)
- else .xx.)))
+ then (car .xx.)
+ else .xx.)))
)
-(defstruct collector
+(defstruct collector
next ; next index to set
max ; 1+max index to set
data ; string vector
(let (col)
(without-scheduling
(do* ((cols *collectors* (cdr cols))
- (this (car cols) (car cols)))
- ((null cols))
- (if* this
- then (setf (car cols) nil)
- (setq col this)
- (return))))
+ (this (car cols) (car cols)))
+ ((null cols))
+ (if* this
+ then (setf (car cols) nil)
+ (setq col this)
+ (return))))
(if* col
then (setf (collector-next col) 0)
- col
+ col
else (make-collector
- :next 0
- :max 100
- :data (make-string 100)))))
+ :next 0
+ :max 100
+ :data (make-string 100)))))
(defun put-back-collector (col)
(declare (optimize (speed 3) (safety 1)))
- (without-scheduling
+ (without-scheduling
(do ((cols *collectors* (cdr cols)))
- ((null cols)
- ; toss it away
- nil)
+ ((null cols)
+ ; toss it away
+ nil)
(if* (null (car cols))
- then (setf (car cols) col)
- (return)))))
-
+ then (setf (car cols) col)
+ (return)))))
+
(defun grow-and-add (coll ch)
;; increase the size of the data portion of the collector and then
;; add the given char at the end
(let* ((odata (collector-data coll))
- (ndata (make-string (* 2 (length odata)))))
+ (ndata (make-string (* 2 (length odata)))))
(dotimes (i (length odata))
(setf (schar ndata i) (schar odata i)))
(setf (collector-data coll) ndata)
(setf (schar ndata next) ch)
(setf (collector-next coll) (1+ next)))))
-
-
-
-
+
+
+
+
;; character characteristics
(defconstant char-tagcharacter 1) ; valid char for a tag
(defconstant char-attribnamechar 2) ; valid char for an attribute name
(defconstant char-attribundelimattribvalue 4) ; valid for undelimited value
(defconstant char-spacechar 8)
-(defparameter *characteristics*
+(defparameter *characteristics*
;; array of bits describing character characteristics
(let ((arr (make-array 128 :initial-element 0)))
(declare (optimize (speed 3) (safety 1)))
(macrolet ((with-range ((var from to) &rest body)
- `(do ((,var (char-code ,from) (1+ ,var))
- (mmax (char-code ,to)))
- ((> ,var mmax))
- ,@body))
-
- (addit (index charistic)
- `(setf (svref arr ,index)
- (logior (svref arr ,index)
- ,charistic)))
- )
-
- (with-range (i #\A #\Z)
- (addit i (+ char-tagcharacter
- char-attribnamechar
- char-attribundelimattribvalue)))
-
- (with-range (i #\a #\z)
- (addit i (+ char-tagcharacter
- char-attribnamechar
- char-attribundelimattribvalue)))
-
- (with-range (i #\0 #\9)
- (addit i (+ char-tagcharacter
- char-attribnamechar
- char-attribundelimattribvalue)))
-
- ;; let colon be legal tag character
- (addit (char-code #\:) (+ char-attribnamechar
- char-tagcharacter))
-
- ;; NY times special tags have _
- (addit (char-code #\_) (+ char-attribnamechar
- char-tagcharacter))
-
- ; now the unusual cases
- (addit (char-code #\-) (+ char-attribnamechar
- char-attribundelimattribvalue))
- (addit (char-code #\.) (+ char-attribnamechar
- char-attribundelimattribvalue))
-
- ;; adding all typeable chars except for whitespace and >
- (addit (char-code #\:) char-attribundelimattribvalue)
- (addit (char-code #\@) char-attribundelimattribvalue)
- (addit (char-code #\/) char-attribundelimattribvalue)
- (addit (char-code #\!) char-attribundelimattribvalue)
- (addit (char-code #\#) char-attribundelimattribvalue)
- (addit (char-code #\$) char-attribundelimattribvalue)
- (addit (char-code #\%) char-attribundelimattribvalue)
- (addit (char-code #\^) char-attribundelimattribvalue)
- (addit (char-code #\&) char-attribundelimattribvalue)
- (addit (char-code #\() char-attribundelimattribvalue)
- (addit (char-code #\)) char-attribundelimattribvalue)
- (addit (char-code #\_) char-attribundelimattribvalue)
- (addit (char-code #\=) char-attribundelimattribvalue)
- (addit (char-code #\+) char-attribundelimattribvalue)
- (addit (char-code #\\) char-attribundelimattribvalue)
- (addit (char-code #\|) char-attribundelimattribvalue)
- (addit (char-code #\{) char-attribundelimattribvalue)
- (addit (char-code #\}) char-attribundelimattribvalue)
- (addit (char-code #\[) char-attribundelimattribvalue)
- (addit (char-code #\]) char-attribundelimattribvalue)
- (addit (char-code #\;) char-attribundelimattribvalue)
- (addit (char-code #\') char-attribundelimattribvalue)
- (addit (char-code #\") char-attribundelimattribvalue)
- (addit (char-code #\,) char-attribundelimattribvalue)
- (addit (char-code #\<) char-attribundelimattribvalue)
- (addit (char-code #\?) char-attribundelimattribvalue)
-
- ; i'm not sure what can be in a tag name but we know that
- ; ! and - must be there since it's used in comments
-
- (addit (char-code #\-) char-tagcharacter)
- (addit (char-code #\!) char-tagcharacter)
-
- ; spaces
- (addit (char-code #\space) char-spacechar)
- (addit (char-code #\tab) char-spacechar)
- (addit (char-code #\return) char-spacechar)
- (addit (char-code #\linefeed) char-spacechar)
-
- )
-
-
-
+ `(do ((,var (char-code ,from) (1+ ,var))
+ (mmax (char-code ,to)))
+ ((> ,var mmax))
+ ,@body))
+
+ (addit (index charistic)
+ `(setf (svref arr ,index)
+ (logior (svref arr ,index)
+ ,charistic)))
+ )
+
+ (with-range (i #\A #\Z)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ (with-range (i #\a #\z)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ (with-range (i #\0 #\9)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ ;; let colon be legal tag character
+ (addit (char-code #\:) (+ char-attribnamechar
+ char-tagcharacter))
+
+ ;; NY times special tags have _
+ (addit (char-code #\_) (+ char-attribnamechar
+ char-tagcharacter))
+
+ ; now the unusual cases
+ (addit (char-code #\-) (+ char-attribnamechar
+ char-attribundelimattribvalue))
+ (addit (char-code #\.) (+ char-attribnamechar
+ char-attribundelimattribvalue))
+
+ ;; adding all typeable chars except for whitespace and >
+ (addit (char-code #\:) char-attribundelimattribvalue)
+ (addit (char-code #\@) char-attribundelimattribvalue)
+ (addit (char-code #\/) char-attribundelimattribvalue)
+ (addit (char-code #\!) char-attribundelimattribvalue)
+ (addit (char-code #\#) char-attribundelimattribvalue)
+ (addit (char-code #\$) char-attribundelimattribvalue)
+ (addit (char-code #\%) char-attribundelimattribvalue)
+ (addit (char-code #\^) char-attribundelimattribvalue)
+ (addit (char-code #\&) char-attribundelimattribvalue)
+ (addit (char-code #\() char-attribundelimattribvalue)
+ (addit (char-code #\)) char-attribundelimattribvalue)
+ (addit (char-code #\_) char-attribundelimattribvalue)
+ (addit (char-code #\=) char-attribundelimattribvalue)
+ (addit (char-code #\+) char-attribundelimattribvalue)
+ (addit (char-code #\\) char-attribundelimattribvalue)
+ (addit (char-code #\|) char-attribundelimattribvalue)
+ (addit (char-code #\{) char-attribundelimattribvalue)
+ (addit (char-code #\}) char-attribundelimattribvalue)
+ (addit (char-code #\[) char-attribundelimattribvalue)
+ (addit (char-code #\]) char-attribundelimattribvalue)
+ (addit (char-code #\;) char-attribundelimattribvalue)
+ (addit (char-code #\') char-attribundelimattribvalue)
+ (addit (char-code #\") char-attribundelimattribvalue)
+ (addit (char-code #\,) char-attribundelimattribvalue)
+ (addit (char-code #\<) char-attribundelimattribvalue)
+ (addit (char-code #\?) char-attribundelimattribvalue)
+
+ ; i'm not sure what can be in a tag name but we know that
+ ; ! and - must be there since it's used in comments
+
+ (addit (char-code #\-) char-tagcharacter)
+ (addit (char-code #\!) char-tagcharacter)
+
+ ; spaces
+ (addit (char-code #\space) char-spacechar)
+ (addit (char-code #\tab) char-spacechar)
+ (addit (char-code #\return) char-spacechar)
+ (addit (char-code #\linefeed) char-spacechar)
+
+ )
+
+
+
arr))
-
+
(defun char-characteristic (char bit)
(declare (optimize (speed 3) (safety 1)))
- ;; return true if the given char has the given bit set in
+ ;; return true if the given char has the given bit set in
;; the characteristic array
(let ((code (char-code char)))
(if* (<= 0 code 127)
then ; in range
- (not (zerop (logand (svref *characteristics* code) bit))))))
+ (not (zerop (logand (svref *characteristics* code) bit))))))
-(defvar *html-entity-to-code*
+(defvar *html-entity-to-code*
(let ((table (make-hash-table :test #'equal)))
(dolist (ent '(("nbsp" . 160)
- ("iexcl" . 161)
- ("cent" . 162)
- ("pound" . 163)
- ("curren" . 164)
- ("yen" . 165)
- ("brvbar" . 166)
- ("sect" . 167)
- ("uml" . 168)
- ("copy" . 169)
- ("ordf" . 170)
- ("laquo" . 171)
- ("not" . 172)
- ("shy" . 173)
- ("reg" . 174)
- ("macr" . 175)
- ("deg" . 176)
- ("plusmn" . 177)
- ("sup2" . 178)
- ("sup3" . 179)
- ("acute" . 180)
- ("micro" . 181)
- ("para" . 182)
- ("middot" . 183)
- ("cedil" . 184)
- ("sup1" . 185)
- ("ordm" . 186)
- ("raquo" . 187)
- ("frac14" . 188)
- ("frac12" . 189)
- ("frac34" . 190)
- ("iquest" . 191)
- ("Agrave" . 192)
- ("Aacute" . 193)
- ("Acirc" . 194)
- ("Atilde" . 195)
- ("Auml" . 196)
- ("Aring" . 197)
- ("AElig" . 198)
- ("Ccedil" . 199)
- ("Egrave" . 200)
- ("Eacute" . 201)
- ("Ecirc" . 202)
- ("Euml" . 203)
- ("Igrave" . 204)
- ("Iacute" . 205)
- ("Icirc" . 206)
- ("Iuml" . 207)
- ("ETH" . 208)
- ("Ntilde" . 209)
- ("Ograve" . 210)
- ("Oacute" . 211)
- ("Ocirc" . 212)
- ("Otilde" . 213)
- ("Ouml" . 214)
- ("times" . 215)
- ("Oslash" . 216)
- ("Ugrave" . 217)
- ("Uacute" . 218)
- ("Ucirc" . 219)
- ("Uuml" . 220)
- ("Yacute" . 221)
- ("THORN" . 222)
- ("szlig" . 223)
- ("agrave" . 224)
- ("aacute" . 225)
- ("acirc" . 226)
- ("atilde" . 227)
- ("auml" . 228)
- ("aring" . 229)
- ("aelig" . 230)
- ("ccedil" . 231)
- ("egrave" . 232)
- ("eacute" . 233)
- ("ecirc" . 234)
- ("euml" . 235)
- ("igrave" . 236)
- ("iacute" . 237)
- ("icirc" . 238)
- ("iuml" . 239)
- ("eth" . 240)
- ("ntilde" . 241)
- ("ograve" . 242)
- ("oacute" . 243)
- ("ocirc" . 244)
- ("otilde" . 245)
- ("ouml" . 246)
- ("divide" . 247)
- ("oslash" . 248)
- ("ugrave" . 249)
- ("uacute" . 250)
- ("ucirc" . 251)
- ("uuml" . 252)
- ("yacute" . 253)
- ("thorn" . 254)
- ("yuml" . 255)
- ("fnof" . 402)
- ("Alpha" . 913)
- ("Beta" . 914)
- ("Gamma" . 915)
- ("Delta" . 916)
- ("Epsilon" . 917)
- ("Zeta" . 918)
- ("Eta" . 919)
- ("Theta" . 920)
- ("Iota" . 921)
- ("Kappa" . 922)
- ("Lambda" . 923)
- ("Mu" . 924)
- ("Nu" . 925)
- ("Xi" . 926)
- ("Omicron" . 927)
- ("Pi" . 928)
- ("Rho" . 929)
- ("Sigma" . 931)
- ("Tau" . 932)
- ("Upsilon" . 933)
- ("Phi" . 934)
- ("Chi" . 935)
- ("Psi" . 936)
- ("Omega" . 937)
- ("alpha" . 945)
- ("beta" . 946)
- ("gamma" . 947)
- ("delta" . 948)
- ("epsilon" . 949)
- ("zeta" . 950)
- ("eta" . 951)
- ("theta" . 952)
- ("iota" . 953)
- ("kappa" . 954)
- ("lambda" . 955)
- ("mu" . 956)
- ("nu" . 957)
- ("xi" . 958)
- ("omicron" . 959)
- ("pi" . 960)
- ("rho" . 961)
- ("sigmaf" . 962)
- ("sigma" . 963)
- ("tau" . 964)
- ("upsilon" . 965)
- ("phi" . 966)
- ("chi" . 967)
- ("psi" . 968)
- ("omega" . 969)
- ("thetasym" . 977)
- ("upsih" . 978)
- ("piv" . 982)
- ("bull" . 8226)
- ("hellip" . 8230)
- ("prime" . 8242)
- ("Prime" . 8243)
- ("oline" . 8254)
- ("frasl" . 8260)
- ("weierp" . 8472)
- ("image" . 8465)
- ("real" . 8476)
- ("trade" . 8482)
- ("alefsym" . 8501)
- ("larr" . 8592)
- ("uarr" . 8593)
- ("rarr" . 8594)
- ("darr" . 8595)
- ("harr" . 8596)
- ("crarr" . 8629)
- ("lArr" . 8656)
- ("uArr" . 8657)
- ("rArr" . 8658)
- ("dArr" . 8659)
- ("hArr" . 8660)
- ("forall" . 8704)
- ("part" . 8706)
- ("exist" . 8707)
- ("empty" . 8709)
- ("nabla" . 8711)
- ("isin" . 8712)
- ("notin" . 8713)
- ("ni" . 8715)
- ("prod" . 8719)
- ("sum" . 8721)
- ("minus" . 8722)
- ("lowast" . 8727)
- ("radic" . 8730)
- ("prop" . 8733)
- ("infin" . 8734)
- ("ang" . 8736)
- ("and" . 8743)
- ("or" . 8744)
- ("cap" . 8745)
- ("cup" . 8746)
- ("int" . 8747)
- ("there4" . 8756)
- ("sim" . 8764)
- ("cong" . 8773)
- ("asymp" . 8776)
- ("ne" . 8800)
- ("equiv" . 8801)
- ("le" . 8804)
- ("ge" . 8805)
- ("sub" . 8834)
- ("sup" . 8835)
- ("nsub" . 8836)
- ("sube" . 8838)
- ("supe" . 8839)
- ("oplus" . 8853)
- ("otimes" . 8855)
- ("perp" . 8869)
- ("sdot" . 8901)
- ("lceil" . 8968)
- ("rceil" . 8969)
- ("lfloor" . 8970)
- ("rfloor" . 8971)
- ("lang" . 9001)
- ("rang" . 9002)
- ("loz" . 9674)
- ("spades" . 9824)
- ("clubs" . 9827)
- ("hearts" . 9829)
- ("diams" . 9830)
- ("quot" . 34)
- ("amp" . 38)
- ("lt" . 60)
- ("gt" . 62)
- ("OElig" . 338)
- ("oelig" . 339)
- ("Scaron" . 352)
- ("scaron" . 353)
- ("Yuml" . 376)
- ("circ" . 710)
- ("tilde" . 732)
- ("ensp" . 8194)
- ("emsp" . 8195)
- ("thinsp" . 8201)
- ("zwnj" . 8204)
- ("zwj" . 8205)
- ("lrm" . 8206)
- ("rlm" . 8207)
- ("ndash" . 8211)
- ("mdash" . 8212)
- ("lsquo" . 8216)
- ("rsquo" . 8217)
- ("sbquo" . 8218)
- ("ldquo" . 8220)
- ("rdquo" . 8221)
- ("bdquo" . 8222)
- ("dagger" . 8224)
- ("Dagger" . 8225)
- ("permil" . 8240)
- ("lsaquo" . 8249)
- ("rsaquo" . 8250)
- ("euro" . 8364)
- ))
- (setf (gethash (car ent) table) (cdr ent)))
+ ("iexcl" . 161)
+ ("cent" . 162)
+ ("pound" . 163)
+ ("curren" . 164)
+ ("yen" . 165)
+ ("brvbar" . 166)
+ ("sect" . 167)
+ ("uml" . 168)
+ ("copy" . 169)
+ ("ordf" . 170)
+ ("laquo" . 171)
+ ("not" . 172)
+ ("shy" . 173)
+ ("reg" . 174)
+ ("macr" . 175)
+ ("deg" . 176)
+ ("plusmn" . 177)
+ ("sup2" . 178)
+ ("sup3" . 179)
+ ("acute" . 180)
+ ("micro" . 181)
+ ("para" . 182)
+ ("middot" . 183)
+ ("cedil" . 184)
+ ("sup1" . 185)
+ ("ordm" . 186)
+ ("raquo" . 187)
+ ("frac14" . 188)
+ ("frac12" . 189)
+ ("frac34" . 190)
+ ("iquest" . 191)
+ ("Agrave" . 192)
+ ("Aacute" . 193)
+ ("Acirc" . 194)
+ ("Atilde" . 195)
+ ("Auml" . 196)
+ ("Aring" . 197)
+ ("AElig" . 198)
+ ("Ccedil" . 199)
+ ("Egrave" . 200)
+ ("Eacute" . 201)
+ ("Ecirc" . 202)
+ ("Euml" . 203)
+ ("Igrave" . 204)
+ ("Iacute" . 205)
+ ("Icirc" . 206)
+ ("Iuml" . 207)
+ ("ETH" . 208)
+ ("Ntilde" . 209)
+ ("Ograve" . 210)
+ ("Oacute" . 211)
+ ("Ocirc" . 212)
+ ("Otilde" . 213)
+ ("Ouml" . 214)
+ ("times" . 215)
+ ("Oslash" . 216)
+ ("Ugrave" . 217)
+ ("Uacute" . 218)
+ ("Ucirc" . 219)
+ ("Uuml" . 220)
+ ("Yacute" . 221)
+ ("THORN" . 222)
+ ("szlig" . 223)
+ ("agrave" . 224)
+ ("aacute" . 225)
+ ("acirc" . 226)
+ ("atilde" . 227)
+ ("auml" . 228)
+ ("aring" . 229)
+ ("aelig" . 230)
+ ("ccedil" . 231)
+ ("egrave" . 232)
+ ("eacute" . 233)
+ ("ecirc" . 234)
+ ("euml" . 235)
+ ("igrave" . 236)
+ ("iacute" . 237)
+ ("icirc" . 238)
+ ("iuml" . 239)
+ ("eth" . 240)
+ ("ntilde" . 241)
+ ("ograve" . 242)
+ ("oacute" . 243)
+ ("ocirc" . 244)
+ ("otilde" . 245)
+ ("ouml" . 246)
+ ("divide" . 247)
+ ("oslash" . 248)
+ ("ugrave" . 249)
+ ("uacute" . 250)
+ ("ucirc" . 251)
+ ("uuml" . 252)
+ ("yacute" . 253)
+ ("thorn" . 254)
+ ("yuml" . 255)
+ ("fnof" . 402)
+ ("Alpha" . 913)
+ ("Beta" . 914)
+ ("Gamma" . 915)
+ ("Delta" . 916)
+ ("Epsilon" . 917)
+ ("Zeta" . 918)
+ ("Eta" . 919)
+ ("Theta" . 920)
+ ("Iota" . 921)
+ ("Kappa" . 922)
+ ("Lambda" . 923)
+ ("Mu" . 924)
+ ("Nu" . 925)
+ ("Xi" . 926)
+ ("Omicron" . 927)
+ ("Pi" . 928)
+ ("Rho" . 929)
+ ("Sigma" . 931)
+ ("Tau" . 932)
+ ("Upsilon" . 933)
+ ("Phi" . 934)
+ ("Chi" . 935)
+ ("Psi" . 936)
+ ("Omega" . 937)
+ ("alpha" . 945)
+ ("beta" . 946)
+ ("gamma" . 947)
+ ("delta" . 948)
+ ("epsilon" . 949)
+ ("zeta" . 950)
+ ("eta" . 951)
+ ("theta" . 952)
+ ("iota" . 953)
+ ("kappa" . 954)
+ ("lambda" . 955)
+ ("mu" . 956)
+ ("nu" . 957)
+ ("xi" . 958)
+ ("omicron" . 959)
+ ("pi" . 960)
+ ("rho" . 961)
+ ("sigmaf" . 962)
+ ("sigma" . 963)
+ ("tau" . 964)
+ ("upsilon" . 965)
+ ("phi" . 966)
+ ("chi" . 967)
+ ("psi" . 968)
+ ("omega" . 969)
+ ("thetasym" . 977)
+ ("upsih" . 978)
+ ("piv" . 982)
+ ("bull" . 8226)
+ ("hellip" . 8230)
+ ("prime" . 8242)
+ ("Prime" . 8243)
+ ("oline" . 8254)
+ ("frasl" . 8260)
+ ("weierp" . 8472)
+ ("image" . 8465)
+ ("real" . 8476)
+ ("trade" . 8482)
+ ("alefsym" . 8501)
+ ("larr" . 8592)
+ ("uarr" . 8593)
+ ("rarr" . 8594)
+ ("darr" . 8595)
+ ("harr" . 8596)
+ ("crarr" . 8629)
+ ("lArr" . 8656)
+ ("uArr" . 8657)
+ ("rArr" . 8658)
+ ("dArr" . 8659)
+ ("hArr" . 8660)
+ ("forall" . 8704)
+ ("part" . 8706)
+ ("exist" . 8707)
+ ("empty" . 8709)
+ ("nabla" . 8711)
+ ("isin" . 8712)
+ ("notin" . 8713)
+ ("ni" . 8715)
+ ("prod" . 8719)
+ ("sum" . 8721)
+ ("minus" . 8722)
+ ("lowast" . 8727)
+ ("radic" . 8730)
+ ("prop" . 8733)
+ ("infin" . 8734)
+ ("ang" . 8736)
+ ("and" . 8743)
+ ("or" . 8744)
+ ("cap" . 8745)
+ ("cup" . 8746)
+ ("int" . 8747)
+ ("there4" . 8756)
+ ("sim" . 8764)
+ ("cong" . 8773)
+ ("asymp" . 8776)
+ ("ne" . 8800)
+ ("equiv" . 8801)
+ ("le" . 8804)
+ ("ge" . 8805)
+ ("sub" . 8834)
+ ("sup" . 8835)
+ ("nsub" . 8836)
+ ("sube" . 8838)
+ ("supe" . 8839)
+ ("oplus" . 8853)
+ ("otimes" . 8855)
+ ("perp" . 8869)
+ ("sdot" . 8901)
+ ("lceil" . 8968)
+ ("rceil" . 8969)
+ ("lfloor" . 8970)
+ ("rfloor" . 8971)
+ ("lang" . 9001)
+ ("rang" . 9002)
+ ("loz" . 9674)
+ ("spades" . 9824)
+ ("clubs" . 9827)
+ ("hearts" . 9829)
+ ("diams" . 9830)
+ ("quot" . 34)
+ ("amp" . 38)
+ ("lt" . 60)
+ ("gt" . 62)
+ ("OElig" . 338)
+ ("oelig" . 339)
+ ("Scaron" . 352)
+ ("scaron" . 353)
+ ("Yuml" . 376)
+ ("circ" . 710)
+ ("tilde" . 732)
+ ("ensp" . 8194)
+ ("emsp" . 8195)
+ ("thinsp" . 8201)
+ ("zwnj" . 8204)
+ ("zwj" . 8205)
+ ("lrm" . 8206)
+ ("rlm" . 8207)
+ ("ndash" . 8211)
+ ("mdash" . 8212)
+ ("lsquo" . 8216)
+ ("rsquo" . 8217)
+ ("sbquo" . 8218)
+ ("ldquo" . 8220)
+ ("rdquo" . 8221)
+ ("bdquo" . 8222)
+ ("dagger" . 8224)
+ ("Dagger" . 8225)
+ ("permil" . 8240)
+ ("lsaquo" . 8249)
+ ("rsaquo" . 8250)
+ ("euro" . 8364)
+ ))
+ (setf (gethash (car ent) table) (cdr ent)))
table))
(let (buf)
(without-scheduling
(do* ((bufs *tokenbufs* (cdr bufs))
- (this (car bufs) (car bufs)))
- ((null bufs))
- (if* this
- then (setf (car bufs) nil)
- (setq buf this)
- (return))))
+ (this (car bufs) (car bufs)))
+ ((null bufs))
+ (if* this
+ then (setf (car bufs) nil)
+ (setq buf this)
+ (return))))
(if* buf
then (setf (tokenbuf-cur buf) 0)
- (setf (tokenbuf-max buf) 0)
- buf
+ (setf (tokenbuf-max buf) 0)
+ buf
else (make-tokenbuf
- :cur 0
- :max 0
- :data (make-array 1024 :element-type 'character)))))
+ :cur 0
+ :max 0
+ :data (make-array 1024 :element-type 'character)))))
(defun put-back-tokenbuf (buf)
(declare (optimize (speed 3) (safety 1)))
- (without-scheduling
+ (without-scheduling
(do ((bufs *tokenbufs* (cdr bufs)))
- ((null bufs)
- ; toss it away
- nil)
+ ((null bufs)
+ ; toss it away
+ nil)
(if* (null (car bufs))
- then (setf (car bufs) buf)
- (return)))))
+ then (setf (car bufs) buf)
+ (return)))))
(defun to-preferred-case (ch)
(if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
then (char-upcase ch)
else (char-downcase ch)))
-
-
+
+
(defun next-token (stream ignore-strings raw-mode-delimiter
- read-sequence-func tokenbuf parse-entities)
+ read-sequence-func tokenbuf parse-entities)
(declare (optimize (speed 3) (safety 1)))
- ;; return two values:
+ ;; return two values:
;; the next token from the stream.
- ;; the kind of token (:pcdata, :start-tag, :end-tag, :eof)
+ ;; the kind of token (:pcdata, :start-tag, :end-tag, :eof)
;;
;; if read-sequence-func is non-nil,
;; read-sequence-func is called to fetch the next character
(macrolet ((next-char (stream)
- `(let ((cur (tokenbuf-cur tokenbuf))
- (tb (tokenbuf-data tokenbuf)))
- (if* (>= cur (tokenbuf-max tokenbuf))
- then ; fill buffer
- (if* (zerop (setf (tokenbuf-max tokenbuf)
- (if* read-sequence-func
- then (funcall read-sequence-func tb stream)
- else (read-sequence tb stream))))
- then (setq cur nil) ; eof
- else (setq cur 0)))
- (if* cur
- then (prog1 (schar tb cur)
- (setf (tokenbuf-cur tokenbuf) (1+ cur))))))
-
-
- (un-next-char (stream ch)
- `(decf (tokenbuf-cur tokenbuf)))
-
- (clear-coll (coll)
- `(setf (collector-next coll) 0))
-
- (add-to-coll (coll ch)
- `(let ((.next. (collector-next ,coll)))
- (if* (>= .next. (collector-max ,coll))
- then (grow-and-add ,coll ,ch)
- else (setf (schar (collector-data ,coll) .next.)
- ,ch)
- (setf (collector-next ,coll) (1+ .next.)))))
-
- )
-
+ `(let ((cur (tokenbuf-cur tokenbuf))
+ (tb (tokenbuf-data tokenbuf)))
+ (if* (>= cur (tokenbuf-max tokenbuf))
+ then ; fill buffer
+ (if* (zerop (setf (tokenbuf-max tokenbuf)
+ (if* read-sequence-func
+ then (funcall read-sequence-func tb stream)
+ else (read-sequence tb stream))))
+ then (setq cur nil) ; eof
+ else (setq cur 0)))
+ (if* cur
+ then (prog1 (schar tb cur)
+ (setf (tokenbuf-cur tokenbuf) (1+ cur))))))
+
+
+ (un-next-char (stream ch)
+ `(decf (tokenbuf-cur tokenbuf)))
+
+ (clear-coll (coll)
+ `(setf (collector-next coll) 0))
+
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
+
+ )
+
(let ((state (if* raw-mode-delimiter then state-rawdata else state-pcdata))
- (coll (get-collector))
- (ch)
-
- (value-delim)
-
- (tag-to-return)
- (attribs-to-return)
-
- (end-tag)
-
- (attrib-name)
- (attrib-value)
-
- (name-length 0) ;; count only when it could be a comment
-
- (raw-length 0)
+ (coll (get-collector))
+ (ch)
+
+ (value-delim)
+
+ (tag-to-return)
+ (attribs-to-return)
+
+ (end-tag)
+
+ (attrib-name)
+ (attrib-value)
+
+ (name-length 0) ;; count only when it could be a comment
+
+ (raw-length 0)
(xml-bailout)
- )
-
+ )
+
(loop
-
- (setq ch (next-char stream))
- ;;(format t "ch: ~s state: ~s~%" ch state)
-
- (if* (null ch)
- then (return) ; eof -- exit loop
- )
-
-
- (case state
- (#.state-pcdata
- ; collect everything until we see a <
- (if* (eq ch #\<)
- then ; if we've collected nothing then get a tag
- (if* (> (collector-next coll) 0)
- then ; have collected something, return this string
- (un-next-char stream ch) ; push back the <
- (return)
- else ; collect a tag
- (setq state state-readtagfirst))
- elseif (and parse-entities (eq ch #\&))
- then ; reading an entity. entity ends at semicolon
- (let (res (max 10))
- (loop (let ((ch (next-char stream)))
- (if* (null ch)
- then (error "End of file after & entity marker")
- elseif (eq ch #\;)
- then (return)
- elseif (zerop (decf max))
- then (error "No semicolon found after entity starting: &~{~a~}" (nreverse res))
- else (push ch res))))
- (setq res (nreverse res))
- (if* (eq (car res) #\#)
- then ; decimal entity
- (let ((count 0))
- (dolist (ch (cdr res))
- (let ((code (char-code ch)))
- (if* (<= #.(char-code #\0)
- code
- #.(char-code #\9))
- then (setq count
- (+ (* 10 count)
- (- code
- #.(char-code #\0))))
- else (error "non decimal digit after &# - ~s" ch)
- )))
- (add-to-coll coll (code-char count)))
- else (let ((name (make-array (length res)
- :element-type 'character
- :initial-contents res)))
- (let ((ch (gethash name *html-entity-to-code*)))
- (if* ch
- then (add-to-coll coll (code-char ch))
- else (error "No such entity as ~s" name))))))
-
- else ; we will check for & here eventually
- (if* (not (eq ch #\return))
- then (add-to-coll coll ch))))
-
- (#.state-readtagfirst
- ; starting to read a tag name
- (if* (eq #\/ ch)
- then ; end tag
- (setq end-tag t)
- else (if* (eq #\! ch) ; possible comment
- then (setf xml-bailout t)
- (setq name-length 0))
- (un-next-char stream ch))
- (setq state state-readtag))
-
- (#.state-readtag
- ;; reading the whole tag name
- (if* (char-characteristic ch char-tagcharacter)
- then (add-to-coll coll (to-preferred-case ch))
- (incf name-length)
- (if* (and (eq name-length 3)
- (coll-has-comment coll))
- then (clear-coll coll)
- (setq state state-readcomment))
-
- else (setq tag-to-return (compute-tag coll))
- (clear-coll coll)
- (if* (eq ch #\>)
- then (return) ; we're done
- elseif xml-bailout then
- (un-next-char stream ch)
- (return)
- else (if* (eq tag-to-return :!--)
- then ; a comment
- (setq state state-readcomment)
- else (un-next-char stream ch)
- (setq state state-findattribname)))))
-
- (#.state-findattribname
- ;; search until we find the start of an attribute name
- ;; or the end of the tag
- (if* (eq ch #\>)
- then ; end of the line
- (return)
- elseif (eq ch #\=)
- then ; value for previous attribute name
- ; (syntax "foo = bar" is bogus I think but it's
- ; used some places, here is where we handle this
- (pop attribs-to-return)
- (setq attrib-name (pop attribs-to-return))
- (setq state state-findvalue)
- elseif (char-characteristic ch char-attribnamechar)
- then (un-next-char stream ch)
- (setq state state-attribname)
- else nil ; ignore other things
- ))
-
- (#.state-findvalue
- ;; find the start of the value
- (if* (char-characteristic ch char-spacechar)
- thenret ; keep looking
- elseif (eq ch #\>)
- then ; no value, set the value to be the
- ; name as a string
- (setq attrib-value
- (string-downcase (string attrib-name)))
-
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (un-next-char stream ch)
- (setq state state-findattribname)
- else (un-next-char stream ch)
- (setq state state-attribstartvalue)))
-
-
- (#.state-attribname
- ;; collect attribute name
-
- (if* (char-characteristic ch char-attribnamechar)
- then (add-to-coll coll (to-preferred-case ch))
- elseif (eq #\= ch)
- then ; end of attribute name, value is next
- (setq attrib-name (compute-tag coll))
- (clear-coll coll)
- (setq state state-attribstartvalue)
- else ; end of attribute name with no value,
- (setq attrib-name (compute-tag coll))
- (clear-coll coll)
- (setq attrib-value
- (string-downcase (string attrib-name)))
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (un-next-char stream ch)
- (setq state state-findattribname)))
-
- (#.state-attribstartvalue
- ;; begin to collect value
- (if* (or (eq ch #\")
- (eq ch #\'))
- then (setq value-delim ch)
- (setq state state-attribvaluedelim)
- ;; gobble spaces; assume since we've seen a '=' there really is a value
- elseif (eq #\space ch) then nil
- else (un-next-char stream ch)
- (setq state state-attribvaluenodelim)))
-
- (#.state-attribvaluedelim
- (if* (eq ch value-delim)
- then (setq attrib-value (compute-coll-string coll))
- (clear-coll coll)
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (setq state state-findattribname)
- else (add-to-coll coll ch)))
-
- (#.state-attribvaluenodelim
- ;; an attribute value not delimited by ' or " and thus restricted
- ;; in the possible characters
- (if* (char-characteristic ch char-attribundelimattribvalue)
- then (add-to-coll coll ch)
- else (un-next-char stream ch)
- (setq attrib-value (compute-coll-string coll))
- (clear-coll coll)
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (setq state state-findattribname)))
-
- (#.state-readcomment
- ;; a comment ends on the first --, but we'll look for -->
- ;; since that's what most people expect
- (if* (eq ch #\-)
- then (setq state state-readcomment-one)
- else (add-to-coll coll ch)))
-
- (#.state-readcomment-one
- ;; seen one -, looking for ->
-
- (if* (eq ch #\-)
- then (setq state state-readcomment-two)
- else ; not a comment end, put back the -'s
- (add-to-coll coll #\-)
- (add-to-coll coll ch)
- (setq state state-readcomment)))
-
- (#.state-readcomment-two
- ;; seen two -'s, looking for >
-
- (if* (eq ch #\>)
- then ; end of the line
- (return)
- elseif (eq ch #\-)
- then ; still at two -'s, have to put out first
- (add-to-coll coll #\-)
- else ; put out two hypens and back to looking for a hypen
- (add-to-coll coll #\-)
- (add-to-coll coll #\-)
- (setq state state-readcomment)))
-
- (#.state-rawdata
- ;; collect everything until we see the delimiter
- (if* (eq (to-preferred-case ch) (elt raw-mode-delimiter raw-length))
- then
- (incf raw-length)
- (when (= raw-length (length raw-mode-delimiter))
- ;; push the end tag back so it can then be lexed
- ;; but don't do it for xml stuff
- (when (/= (length raw-mode-delimiter) 1)
- (push :end-tag (tokenbuf-first-pass tokenbuf))
- (if* (equal raw-mode-delimiter "</STYLE>")
- then (push :STYLE (tokenbuf-first-pass tokenbuf))
- elseif (equal raw-mode-delimiter "</style>")
- then (push :style (tokenbuf-first-pass tokenbuf))
- elseif (equal raw-mode-delimiter "</SCRIPT>")
- then (push :SCRIPT (tokenbuf-first-pass tokenbuf))
- elseif (equal raw-mode-delimiter "</script>")
- then (push :script (tokenbuf-first-pass tokenbuf))
- else (error "unexpected raw-mode-delimiter"))
- )
- ;; set state to state-pcdata for next section
- (return))
- else
- ;; push partial matches into data string
- (dotimes (i raw-length)
- (add-to-coll coll (elt raw-mode-delimiter i)))
- (setf raw-length 0)
- (add-to-coll coll ch)))
-
- ))
-
-
- ;; out of the loop.
+
+ (setq ch (next-char stream))
+ ;;(format t "ch: ~s state: ~s~%" ch state)
+
+ (if* (null ch)
+ then (return) ; eof -- exit loop
+ )
+
+
+ (case state
+ (#.state-pcdata
+ ; collect everything until we see a <
+ (if* (eq ch #\<)
+ then ; if we've collected nothing then get a tag
+ (if* (> (collector-next coll) 0)
+ then ; have collected something, return this string
+ (un-next-char stream ch) ; push back the <
+ (return)
+ else ; collect a tag
+ (setq state state-readtagfirst))
+ elseif (and parse-entities (eq ch #\&))
+ then ; reading an entity. entity ends at semicolon
+ (let (res (max 10))
+ (loop (let ((ch (next-char stream)))
+ (if* (null ch)
+ then (error "End of file after & entity marker")
+ elseif (eq ch #\;)
+ then (return)
+ elseif (zerop (decf max))
+ then (error "No semicolon found after entity starting: &~{~a~}" (nreverse res))
+ else (push ch res))))
+ (setq res (nreverse res))
+ (if* (eq (car res) #\#)
+ then ; decimal entity
+ (let ((count 0))
+ (dolist (ch (cdr res))
+ (let ((code (char-code ch)))
+ (if* (<= #.(char-code #\0)
+ code
+ #.(char-code #\9))
+ then (setq count
+ (+ (* 10 count)
+ (- code
+ #.(char-code #\0))))
+ else (error "non decimal digit after &# - ~s" ch)
+ )))
+ (add-to-coll coll (code-char count)))
+ else (let ((name (make-array (length res)
+ :element-type 'character
+ :initial-contents res)))
+ (let ((ch (gethash name *html-entity-to-code*)))
+ (if* ch
+ then (add-to-coll coll (code-char ch))
+ else (error "No such entity as ~s" name))))))
+
+ else ; we will check for & here eventually
+ (if* (not (eq ch #\return))
+ then (add-to-coll coll ch))))
+
+ (#.state-readtagfirst
+ ; starting to read a tag name
+ (if* (eq #\/ ch)
+ then ; end tag
+ (setq end-tag t)
+ else (if* (eq #\! ch) ; possible comment
+ then (setf xml-bailout t)
+ (setq name-length 0))
+ (un-next-char stream ch))
+ (setq state state-readtag))
+
+ (#.state-readtag
+ ;; reading the whole tag name
+ (if* (char-characteristic ch char-tagcharacter)
+ then (add-to-coll coll (to-preferred-case ch))
+ (incf name-length)
+ (if* (and (eq name-length 3)
+ (coll-has-comment coll))
+ then (clear-coll coll)
+ (setq state state-readcomment))
+
+ else (setq tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (if* (eq ch #\>)
+ then (return) ; we're done
+ elseif xml-bailout then
+ (un-next-char stream ch)
+ (return)
+ else (if* (eq tag-to-return :!--)
+ then ; a comment
+ (setq state state-readcomment)
+ else (un-next-char stream ch)
+ (setq state state-findattribname)))))
+
+ (#.state-findattribname
+ ;; search until we find the start of an attribute name
+ ;; or the end of the tag
+ (if* (eq ch #\>)
+ then ; end of the line
+ (return)
+ elseif (eq ch #\=)
+ then ; value for previous attribute name
+ ; (syntax "foo = bar" is bogus I think but it's
+ ; used some places, here is where we handle this
+ (pop attribs-to-return)
+ (setq attrib-name (pop attribs-to-return))
+ (setq state state-findvalue)
+ elseif (char-characteristic ch char-attribnamechar)
+ then (un-next-char stream ch)
+ (setq state state-attribname)
+ else nil ; ignore other things
+ ))
+
+ (#.state-findvalue
+ ;; find the start of the value
+ (if* (char-characteristic ch char-spacechar)
+ thenret ; keep looking
+ elseif (eq ch #\>)
+ then ; no value, set the value to be the
+ ; name as a string
+ (setq attrib-value
+ (string-downcase (string attrib-name)))
+
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (un-next-char stream ch)
+ (setq state state-findattribname)
+ else (un-next-char stream ch)
+ (setq state state-attribstartvalue)))
+
+
+ (#.state-attribname
+ ;; collect attribute name
+
+ (if* (char-characteristic ch char-attribnamechar)
+ then (add-to-coll coll (to-preferred-case ch))
+ elseif (eq #\= ch)
+ then ; end of attribute name, value is next
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribstartvalue)
+ else ; end of attribute name with no value,
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq attrib-value
+ (string-downcase (string attrib-name)))
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (un-next-char stream ch)
+ (setq state state-findattribname)))
+
+ (#.state-attribstartvalue
+ ;; begin to collect value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (setq state state-attribvaluedelim)
+ ;; gobble spaces; assume since we've seen a '=' there really is a value
+ elseif (eq #\space ch) then nil
+ else (un-next-char stream ch)
+ (setq state state-attribvaluenodelim)))
+
+ (#.state-attribvaluedelim
+ (if* (eq ch value-delim)
+ then (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattribname)
+ else (add-to-coll coll ch)))
+
+ (#.state-attribvaluenodelim
+ ;; an attribute value not delimited by ' or " and thus restricted
+ ;; in the possible characters
+ (if* (char-characteristic ch char-attribundelimattribvalue)
+ then (add-to-coll coll ch)
+ else (un-next-char stream ch)
+ (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattribname)))
+
+ (#.state-readcomment
+ ;; a comment ends on the first --, but we'll look for -->
+ ;; since that's what most people expect
+ (if* (eq ch #\-)
+ then (setq state state-readcomment-one)
+ else (add-to-coll coll ch)))
+
+ (#.state-readcomment-one
+ ;; seen one -, looking for ->
+
+ (if* (eq ch #\-)
+ then (setq state state-readcomment-two)
+ else ; not a comment end, put back the -'s
+ (add-to-coll coll #\-)
+ (add-to-coll coll ch)
+ (setq state state-readcomment)))
+
+ (#.state-readcomment-two
+ ;; seen two -'s, looking for >
+
+ (if* (eq ch #\>)
+ then ; end of the line
+ (return)
+ elseif (eq ch #\-)
+ then ; still at two -'s, have to put out first
+ (add-to-coll coll #\-)
+ else ; put out two hypens and back to looking for a hypen
+ (add-to-coll coll #\-)
+ (add-to-coll coll #\-)
+ (setq state state-readcomment)))
+
+ (#.state-rawdata
+ ;; collect everything until we see the delimiter
+ (if* (eq (to-preferred-case ch) (elt raw-mode-delimiter raw-length))
+ then
+ (incf raw-length)
+ (when (= raw-length (length raw-mode-delimiter))
+ ;; push the end tag back so it can then be lexed
+ ;; but don't do it for xml stuff
+ (when (/= (length raw-mode-delimiter) 1)
+ (push :end-tag (tokenbuf-first-pass tokenbuf))
+ (if* (equal raw-mode-delimiter "</STYLE>")
+ then (push :STYLE (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</style>")
+ then (push :style (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</SCRIPT>")
+ then (push :SCRIPT (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</script>")
+ then (push :script (tokenbuf-first-pass tokenbuf))
+ else (error "unexpected raw-mode-delimiter"))
+ )
+ ;; set state to state-pcdata for next section
+ (return))
+ else
+ ;; push partial matches into data string
+ (dotimes (i raw-length)
+ (add-to-coll coll (elt raw-mode-delimiter i)))
+ (setf raw-length 0)
+ (add-to-coll coll ch)))
+
+ ))
+
+
+ ;; out of the loop.
;; if we're in certain states then it means we should return a value
;;
(case state
- ((#.state-pcdata #.state-rawdata)
- ;; return the buffer as a string
- (if* (zerop (collector-next coll))
- then (values nil (if (eq state state-pcdata) :eof :pcdata))
- else (values (prog1
- (if* (null ignore-strings)
- then (compute-coll-string coll))
- (put-back-collector coll))
- :pcdata)))
-
- (#.state-readtag
- (when (null tag-to-return)
- (error "unexpected end of input encountered"))
- ;; we've read a tag with no attributes
- (put-back-collector coll)
- (values tag-to-return
- (if* end-tag
- then :end-tag
- else (if* xml-bailout then :xml else :start-tag))
- ))
-
- (#.state-findattribname
- ;; returning a tag with possible attributes
- (put-back-collector coll)
- (if* end-tag
- then ; ignore any attributes
- (values tag-to-return :end-tag)
- elseif attribs-to-return
- then (values (cons tag-to-return
- (nreverse attribs-to-return))
- :start-tag)
- else (values tag-to-return :start-tag)))
-
- (#.state-readcomment-two
- ;; returning a comment
- (values (prog1 (if* (null ignore-strings)
- then (compute-coll-string coll))
- (put-back-collector coll))
- :comment))
-
- (t
- (if* (null ch) then (error "unexpected end of input encountered")
- else (error "internal error, can't be here in state ~d" state)))))))
+ ((#.state-pcdata #.state-rawdata)
+ ;; return the buffer as a string
+ (if* (zerop (collector-next coll))
+ then (values nil (if (eq state state-pcdata) :eof :pcdata))
+ else (values (prog1
+ (if* (null ignore-strings)
+ then (compute-coll-string coll))
+ (put-back-collector coll))
+ :pcdata)))
+
+ (#.state-readtag
+ (when (null tag-to-return)
+ (error "unexpected end of input encountered"))
+ ;; we've read a tag with no attributes
+ (put-back-collector coll)
+ (values tag-to-return
+ (if* end-tag
+ then :end-tag
+ else (if* xml-bailout then :xml else :start-tag))
+ ))
+
+ (#.state-findattribname
+ ;; returning a tag with possible attributes
+ (put-back-collector coll)
+ (if* end-tag
+ then ; ignore any attributes
+ (values tag-to-return :end-tag)
+ elseif attribs-to-return
+ then (values (cons tag-to-return
+ (nreverse attribs-to-return))
+ :start-tag)
+ else (values tag-to-return :start-tag)))
+
+ (#.state-readcomment-two
+ ;; returning a comment
+ (values (prog1 (if* (null ignore-strings)
+ then (compute-coll-string coll))
+ (put-back-collector coll))
+ :comment))
+
+ (t
+ (if* (null ch) then (error "unexpected end of input encountered")
+ else (error "internal error, can't be here in state ~d" state)))))))
(defvar *kwd-package* (find-package :keyword))
(declare (optimize (speed 3) (safety 1)))
;; return the string that's in the collection
(let ((str (make-string (collector-next coll)))
- (from (collector-data coll)))
+ (from (collector-data coll)))
(dotimes (i (collector-next coll))
(setf (schar str i) (schar from i)))
-
+
str))
(defun coll-has-comment (coll)
;; true if the collector has exactly "!--" in it
(and (eq 3 (collector-next coll))
(let ((data (collector-data coll)))
- (and (eq #\! (schar data 0))
- (eq #\- (schar data 1))
- (eq #\- (schar data 2))))))
-
+ (and (eq #\! (schar data 0))
+ (eq #\- (schar data 1))
+ (eq #\- (schar data 2))))))
+
;;;;;;;;;;; quick and dirty parse
; the elements with no body and thus no end tag
-(dolist (opt '(:area :base :basefont :bgsound :br :button :col
- ;;:colgroup - no, this is an element with contents
- :embed :hr :img :frame
- :input :isindex :keygen :link :meta
- :plaintext :spacer :wbr))
+(dolist (opt '(:area :base :basefont :bgsound :br :button :col
+ ;;:colgroup - no, this is an element with contents
+ :embed :hr :img :frame
+ :input :isindex :keygen :link :meta
+ :plaintext :spacer :wbr))
(setf (tag-no-end opt) t))
(defvar *in-line* '(:tt :i :b :big :small :em :strong :dfn :code :samp :kbd
- :var :cite :abbr :acronym :a :img :object :br :map
- :q :sub :sup :span :bdo :input :select :textarea :label :button :font))
+ :var :cite :abbr :acronym :a :img :object :br :map
+ :q :sub :sup :span :bdo :input :select :textarea :label :button :font))
(defvar *ch-format* '(:i :b :tt :big :small :strike :s :u
- :em :strong :font))
+ :em :strong :font))
(defvar *known-tags* '(:!doctype :a :acronym :address :applet :area :b :base :basefont
- :bdo :bgsound :big :blink :blockquote :body :br :button :caption
- :center :cite :code :col :colgroup :comment :dd :del :dfn :dir
- :div :dl :dt :em :embed :fieldset :font :form :frame :frameset
- :h1 :h2 :h3 :h4 :h5 :h6 :head :hr :html :i :iframe :img :input
- :ins :isindex :kbd :label :layer :legend :li :link :listing :map
- :marquee :menu :meta :multicol :nobr :noframes :noscript :object
- :ol :option :p :param :plaintext :pre :q :samp :script :select
- :small :spacer :span :s :strike :strong :style :sub :sup :table
- :tbody :td :textarea :tfoot :th :thead :title :tr :tt :u :ul :var
- :wbr :xmp))
+ :bdo :bgsound :big :blink :blockquote :body :br :button :caption
+ :center :cite :code :col :colgroup :comment :dd :del :dfn :dir
+ :div :dl :dt :em :embed :fieldset :font :form :frame :frameset
+ :h1 :h2 :h3 :h4 :h5 :h6 :head :hr :html :i :iframe :img :input
+ :ins :isindex :kbd :label :layer :legend :li :link :listing :map
+ :marquee :menu :meta :multicol :nobr :noframes :noscript :object
+ :ol :option :p :param :plaintext :pre :q :samp :script :select
+ :small :spacer :span :s :strike :strong :style :sub :sup :table
+ :tbody :td :textarea :tfoot :th :thead :title :tr :tt :u :ul :var
+ :wbr :xmp))
; the elements whose start tag can end a previous tag
(defmethod parse-html ((p stream) &key callback-only callbacks collect-rogue-tags
- no-body-tags
- parse-entities)
+ no-body-tags
+ parse-entities)
(declare (optimize (speed 3) (safety 1)))
(phtml-internal p nil callback-only callbacks collect-rogue-tags
- no-body-tags parse-entities))
+ no-body-tags parse-entities))
(defmacro tag-callback (tag)
`(rest (assoc ,tag callbacks)))
-(defun phtml-internal (p read-sequence-func callback-only
- callbacks collect-rogue-tags
- no-body-tags
- parse-entities)
+(defun phtml-internal (p read-sequence-func callback-only
+ callbacks collect-rogue-tags
+ no-body-tags
+ parse-entities)
(declare (optimize (speed 3) (safety 1)))
(let ((raw-mode-delimiter nil)
- (pending nil)
- (current-tag :start-parse)
- (last-tag :start-parse)
- (current-callback-tags nil)
- (pending-ch-format nil)
- (closed-pending-ch-format nil)
- (new-opens nil)
- (tokenbuf (get-tokenbuf))
- (guts)
- (rogue-tags)
- )
+ (pending nil)
+ (current-tag :start-parse)
+ (last-tag :start-parse)
+ (current-callback-tags nil)
+ (pending-ch-format nil)
+ (closed-pending-ch-format nil)
+ (new-opens nil)
+ (tokenbuf (get-tokenbuf))
+ (guts)
+ (rogue-tags)
+ )
(labels ((close-off-tags (name stop-at collect-rogues once-only)
- ;; close off an open 'name' tag, but search no further
- ;; than a 'stop-at' tag.
- #+ignore (format t "close off name ~s, stop at ~s, ct ~s~%"
- name stop-at current-tag)
- (if* (member (tag-name current-tag) name :test #'eq)
- then ;; close current tag(s)
- (loop
- (when (and collect-rogues
- (not (member (tag-name current-tag)
- *known-tags*)))
- (push (tag-name current-tag) rogue-tags))
- (close-current-tag)
- (if* (or once-only
- (member (tag-name current-tag)
- *ch-format*)
- (not (member
- (tag-name current-tag) name :test #'eq)))
- then (return)))
- elseif (member (tag-name current-tag) stop-at :test #'eq)
- then nil
- else ; search if there is a tag to close
- (dolist (ent pending)
- (if* (member (tag-name (car ent)) name :test #'eq)
- then ; found one to close
- (loop
- (when (and collect-rogues
- (not (member (tag-name current-tag)
- *known-tags*)))
- (push (tag-name current-tag) rogue-tags))
- (close-current-tag)
- (if* (member (tag-name current-tag) name
- :test #'eq)
- then (close-current-tag)
- (return)))
- (return)
- elseif (member (tag-name (car ent)) stop-at
- :test #'eq)
- then (return) ;; do nothing
- ))))
-
- (close-current-tag ()
- ;; close off the current tag and open the pending tag
- (when (member (tag-name current-tag) *ch-format* :test #'eq)
- (push current-tag closed-pending-ch-format)
- )
- (let (element)
- (if* (tag-no-pcdata (tag-name current-tag))
- then (setq element `(,current-tag
- ,@(strip-rev-pcdata guts)))
- else (setq element `(,current-tag ,@(nreverse guts))))
- (let ((callback (tag-callback (tag-name current-tag))))
- (when callback
- (setf current-callback-tags (rest current-callback-tags))
- (funcall callback element)))
- (let* ((prev (pop pending)))
- (setq current-tag (car prev)
- guts (cdr prev))
- (push element guts))))
-
- (save-state ()
- ;; push the current tag state since we're starting:
- ;; a new open tag
- (push (cons current-tag guts) pending)
- #+ignore (format t "state saved, pending ~s~%" pending)
- )
-
-
- (strip-rev-pcdata (stuff)
- ;; reverse the list stuff, omitting all the strings
- (let (res)
- (dolist (st stuff)
- (if* (not (stringp st)) then (push st res)))
- res))
- (check-in-line (check-tag)
- (setf new-opens nil)
- (let (val kind (i 0)
- (length (length (tokenbuf-first-pass tokenbuf))))
- (loop
- (if* (< i length) then
- (setf val (nth i (tokenbuf-first-pass tokenbuf)))
- (setf kind (nth (+ i 1) (tokenbuf-first-pass tokenbuf)))
- (setf i (+ i 2))
- (if* (= i length) then (setf (tokenbuf-first-pass tokenbuf)
- (nreverse (tokenbuf-first-pass tokenbuf))))
- else
- (multiple-value-setq (val kind)
- (get-next-token t))
- (push val (tokenbuf-first-pass tokenbuf))
- (push kind (tokenbuf-first-pass tokenbuf))
- )
- (when (eq kind :eof)
- (if* (= i length) then
- (setf (tokenbuf-first-pass tokenbuf)
- (nreverse (tokenbuf-first-pass tokenbuf))))
- (return))
- (when (and (eq val check-tag) (eq kind :end-tag))
- (if* (= i length) then
- (setf (tokenbuf-first-pass tokenbuf)
- (nreverse (tokenbuf-first-pass tokenbuf))))
- (return))
- (when (member val *ch-format* :test #'eq)
- (if* (eq kind :start-tag) then (push val new-opens)
- elseif (member val new-opens :test #'eq) then
- (setf new-opens (remove val new-opens :count 1))
- else (close-off-tags (list val) nil nil nil)
- )))))
-
- (get-next-token (force)
- (if* (or force (null (tokenbuf-first-pass tokenbuf))) then
- (multiple-value-bind (val kind)
- (next-token p nil raw-mode-delimiter read-sequence-func
- tokenbuf parse-entities)
- (values val kind))
- else
- (let ((val (first (tokenbuf-first-pass tokenbuf)))
- (kind (second (tokenbuf-first-pass tokenbuf))))
- (setf (tokenbuf-first-pass tokenbuf)
- (rest (rest (tokenbuf-first-pass tokenbuf))))
- (values val kind))))
- )
+ ;; close off an open 'name' tag, but search no further
+ ;; than a 'stop-at' tag.
+ #+ignore (format t "close off name ~s, stop at ~s, ct ~s~%"
+ name stop-at current-tag)
+ (if* (member (tag-name current-tag) name :test #'eq)
+ then ;; close current tag(s)
+ (loop
+ (when (and collect-rogues
+ (not (member (tag-name current-tag)
+ *known-tags*)))
+ (push (tag-name current-tag) rogue-tags))
+ (close-current-tag)
+ (if* (or once-only
+ (member (tag-name current-tag)
+ *ch-format*)
+ (not (member
+ (tag-name current-tag) name :test #'eq)))
+ then (return)))
+ elseif (member (tag-name current-tag) stop-at :test #'eq)
+ then nil
+ else ; search if there is a tag to close
+ (dolist (ent pending)
+ (if* (member (tag-name (car ent)) name :test #'eq)
+ then ; found one to close
+ (loop
+ (when (and collect-rogues
+ (not (member (tag-name current-tag)
+ *known-tags*)))
+ (push (tag-name current-tag) rogue-tags))
+ (close-current-tag)
+ (if* (member (tag-name current-tag) name
+ :test #'eq)
+ then (close-current-tag)
+ (return)))
+ (return)
+ elseif (member (tag-name (car ent)) stop-at
+ :test #'eq)
+ then (return) ;; do nothing
+ ))))
+
+ (close-current-tag ()
+ ;; close off the current tag and open the pending tag
+ (when (member (tag-name current-tag) *ch-format* :test #'eq)
+ (push current-tag closed-pending-ch-format)
+ )
+ (let (element)
+ (if* (tag-no-pcdata (tag-name current-tag))
+ then (setq element `(,current-tag
+ ,@(strip-rev-pcdata guts)))
+ else (setq element `(,current-tag ,@(nreverse guts))))
+ (let ((callback (tag-callback (tag-name current-tag))))
+ (when callback
+ (setf current-callback-tags (rest current-callback-tags))
+ (funcall callback element)))
+ (let* ((prev (pop pending)))
+ (setq current-tag (car prev)
+ guts (cdr prev))
+ (push element guts))))
+
+ (save-state ()
+ ;; push the current tag state since we're starting:
+ ;; a new open tag
+ (push (cons current-tag guts) pending)
+ #+ignore (format t "state saved, pending ~s~%" pending)
+ )
+
+
+ (strip-rev-pcdata (stuff)
+ ;; reverse the list stuff, omitting all the strings
+ (let (res)
+ (dolist (st stuff)
+ (if* (not (stringp st)) then (push st res)))
+ res))
+ (check-in-line (check-tag)
+ (setf new-opens nil)
+ (let (val kind (i 0)
+ (length (length (tokenbuf-first-pass tokenbuf))))
+ (loop
+ (if* (< i length) then
+ (setf val (nth i (tokenbuf-first-pass tokenbuf)))
+ (setf kind (nth (+ i 1) (tokenbuf-first-pass tokenbuf)))
+ (setf i (+ i 2))
+ (if* (= i length) then (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ else
+ (multiple-value-setq (val kind)
+ (get-next-token t))
+ (push val (tokenbuf-first-pass tokenbuf))
+ (push kind (tokenbuf-first-pass tokenbuf))
+ )
+ (when (eq kind :eof)
+ (if* (= i length) then
+ (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ (return))
+ (when (and (eq val check-tag) (eq kind :end-tag))
+ (if* (= i length) then
+ (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ (return))
+ (when (member val *ch-format* :test #'eq)
+ (if* (eq kind :start-tag) then (push val new-opens)
+ elseif (member val new-opens :test #'eq) then
+ (setf new-opens (remove val new-opens :count 1))
+ else (close-off-tags (list val) nil nil nil)
+ )))))
+
+ (get-next-token (force)
+ (if* (or force (null (tokenbuf-first-pass tokenbuf))) then
+ (multiple-value-bind (val kind)
+ (next-token p nil raw-mode-delimiter read-sequence-func
+ tokenbuf parse-entities)
+ (values val kind))
+ else
+ (let ((val (first (tokenbuf-first-pass tokenbuf)))
+ (kind (second (tokenbuf-first-pass tokenbuf))))
+ (setf (tokenbuf-first-pass tokenbuf)
+ (rest (rest (tokenbuf-first-pass tokenbuf))))
+ (values val kind))))
+ )
(loop
- (multiple-value-bind (val kind)
- (get-next-token nil)
- #+ignore (format t "val: ~s kind: ~s last-tag ~s pending ~s~%" val kind
- last-tag pending)
- (case kind
- (:pcdata
- (when (or (and callback-only current-callback-tags)
- (not callback-only))
- (if* (member last-tag *in-line*)
- then
- (push val guts)
- else
- (when (dotimes (i (length val) nil)
- (when (not (char-characteristic (elt val i)
- char-spacechar))
- (return t)))
- (push val guts))))
- (when (and (= (length raw-mode-delimiter) 1) ;; xml tag...
- (or (and callback-only current-callback-tags)
- (not callback-only)))
- (close-off-tags (list last-tag) nil nil t))
- (setf raw-mode-delimiter nil)
- )
-
- (:xml
- (setf last-tag val)
- (setf raw-mode-delimiter ">")
- (let* ((name (tag-name val)))
- (when (and callback-only (tag-callback name))
- (push name current-callback-tags))
- (save-state)
- (setq current-tag val)
- (setq guts nil)
- ))
-
- (:start-tag
- (setf last-tag val)
- (if* (or (eq last-tag :style)
- (and (listp last-tag) (eq (first last-tag) :style)))
- then
- (setf raw-mode-delimiter
- (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
- then "</STYLE>"
- else "</style>"))
- elseif (or (eq last-tag :script)
- (and (listp last-tag) (eq (first last-tag) :script)))
- then
- (setf raw-mode-delimiter
- (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
- then "</SCRIPT>"
- else "</script>")))
- ; maybe this is an end tag too
- (let* ((name (tag-name val))
- (auto-close (tag-auto-close name))
- (auto-close-stop nil)
- (no-end (or (tag-no-end name) (member name no-body-tags))))
- (when (and callback-only (tag-callback name))
- (push name current-callback-tags))
- (when (or (and callback-only current-callback-tags)
- (not callback-only))
- (if* auto-close
- then (setq auto-close-stop (tag-auto-close-stop name))
- (close-off-tags auto-close auto-close-stop nil nil))
- (when (and pending-ch-format (not no-end))
- (if* (member name *ch-format* :test #'eq) then nil
- elseif (member name *in-line* :test #'eq) then
- ;; close off only tags that are within *in-line* block
- (check-in-line name)
- else ;; close ALL pending char tags and then reopen
- (dolist (this-tag (reverse pending-ch-format))
- (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil nil))
- ))
- (if* no-end
- then ; this is a singleton tag
- (let ((callback (tag-callback (tag-name (if* (atom val)
- then val
- else (first val))))))
- (when callback
- (funcall callback (if* (atom val)
- then val
- else (list val)))))
- (push (if* (atom val)
- then val
- else (list val))
- guts)
- else (save-state)
- (setq current-tag val)
- (setq guts nil))
- (if* (member name *ch-format* :test #'eq)
- then (push val pending-ch-format)
- else (when (not
- (or (eq last-tag :style)
- (and (listp last-tag) (eq (first last-tag) :style))
- (eq last-tag :script)
- (and (listp last-tag) (eq (first last-tag) :script))))
- (dolist (tmp (reverse closed-pending-ch-format))
- (save-state)
- (setf current-tag tmp)
- (setf guts nil)))
- )
- (when (not
- (or (eq last-tag :style)
- (and (listp last-tag) (eq (first last-tag) :style))
- (eq last-tag :script)
- (and (listp last-tag) (eq (first last-tag) :script))))
- (setf closed-pending-ch-format nil))
- )))
-
- (:end-tag
- (setf raw-mode-delimiter nil)
- (when (or (and callback-only current-callback-tags)
- (not callback-only))
- (close-off-tags (list val) nil nil t)
- (when (member val *ch-format* :test #'eq)
- (setf pending-ch-format
- (remove val pending-ch-format :count 1
- :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
- (setf closed-pending-ch-format
- (remove val closed-pending-ch-format :count 1
- :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
- )
- (dolist (tmp (reverse closed-pending-ch-format))
- (save-state)
- (setf current-tag tmp)
- (setf guts nil))
- (setf closed-pending-ch-format nil)
- ))
-
- (:comment
- (setf raw-mode-delimiter nil)
- (when (or (and callback-only current-callback-tags)
- (not callback-only))
- (push `(:comment ,val) guts)))
-
- (:eof
- (setf raw-mode-delimiter nil)
- ;; close off all tags
- (when (or (and callback-only current-callback-tags)
- (not callback-only))
- (close-off-tags '(:start-parse) nil collect-rogue-tags nil))
- (put-back-tokenbuf tokenbuf)
- (if collect-rogue-tags
- (return (values (cdar guts) rogue-tags))
- (return (cdar guts))))))))))
-
-
+ (multiple-value-bind (val kind)
+ (get-next-token nil)
+ #+ignore (format t "val: ~s kind: ~s last-tag ~s pending ~s~%" val kind
+ last-tag pending)
+ (case kind
+ (:pcdata
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (if* (member last-tag *in-line*)
+ then
+ (push val guts)
+ else
+ (when (dotimes (i (length val) nil)
+ (when (not (char-characteristic (elt val i)
+ char-spacechar))
+ (return t)))
+ (push val guts))))
+ (when (and (= (length raw-mode-delimiter) 1) ;; xml tag...
+ (or (and callback-only current-callback-tags)
+ (not callback-only)))
+ (close-off-tags (list last-tag) nil nil t))
+ (setf raw-mode-delimiter nil)
+ )
+
+ (:xml
+ (setf last-tag val)
+ (setf raw-mode-delimiter ">")
+ (let* ((name (tag-name val)))
+ (when (and callback-only (tag-callback name))
+ (push name current-callback-tags))
+ (save-state)
+ (setq current-tag val)
+ (setq guts nil)
+ ))
+
+ (:start-tag
+ (setf last-tag val)
+ (if* (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style)))
+ then
+ (setf raw-mode-delimiter
+ (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
+ then "</STYLE>"
+ else "</style>"))
+ elseif (or (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script)))
+ then
+ (setf raw-mode-delimiter
+ (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
+ then "</SCRIPT>"
+ else "</script>")))
+ ; maybe this is an end tag too
+ (let* ((name (tag-name val))
+ (auto-close (tag-auto-close name))
+ (auto-close-stop nil)
+ (no-end (or (tag-no-end name) (member name no-body-tags))))
+ (when (and callback-only (tag-callback name))
+ (push name current-callback-tags))
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (if* auto-close
+ then (setq auto-close-stop (tag-auto-close-stop name))
+ (close-off-tags auto-close auto-close-stop nil nil))
+ (when (and pending-ch-format (not no-end))
+ (if* (member name *ch-format* :test #'eq) then nil
+ elseif (member name *in-line* :test #'eq) then
+ ;; close off only tags that are within *in-line* block
+ (check-in-line name)
+ else ;; close ALL pending char tags and then reopen
+ (dolist (this-tag (reverse pending-ch-format))
+ (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil nil))
+ ))
+ (if* no-end
+ then ; this is a singleton tag
+ (let ((callback (tag-callback (tag-name (if* (atom val)
+ then val
+ else (first val))))))
+ (when callback
+ (funcall callback (if* (atom val)
+ then val
+ else (list val)))))
+ (push (if* (atom val)
+ then val
+ else (list val))
+ guts)
+ else (save-state)
+ (setq current-tag val)
+ (setq guts nil))
+ (if* (member name *ch-format* :test #'eq)
+ then (push val pending-ch-format)
+ else (when (not
+ (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style))
+ (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script))))
+ (dolist (tmp (reverse closed-pending-ch-format))
+ (save-state)
+ (setf current-tag tmp)
+ (setf guts nil)))
+ )
+ (when (not
+ (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style))
+ (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script))))
+ (setf closed-pending-ch-format nil))
+ )))
+
+ (:end-tag
+ (setf raw-mode-delimiter nil)
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (close-off-tags (list val) nil nil t)
+ (when (member val *ch-format* :test #'eq)
+ (setf pending-ch-format
+ (remove val pending-ch-format :count 1
+ :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
+ (setf closed-pending-ch-format
+ (remove val closed-pending-ch-format :count 1
+ :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
+ )
+ (dolist (tmp (reverse closed-pending-ch-format))
+ (save-state)
+ (setf current-tag tmp)
+ (setf guts nil))
+ (setf closed-pending-ch-format nil)
+ ))
+
+ (:comment
+ (setf raw-mode-delimiter nil)
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (push `(:comment ,val) guts)))
+
+ (:eof
+ (setf raw-mode-delimiter nil)
+ ;; close off all tags
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (close-off-tags '(:start-parse) nil collect-rogue-tags nil))
+ (put-back-tokenbuf tokenbuf)
+ (if collect-rogue-tags
+ (return (values (cdar guts) rogue-tags))
+ (return (cdar guts))))))))))
+
+
(defmethod parse-html (file &key callback-only callbacks collect-rogue-tags
- no-body-tags parse-entities)
+ no-body-tags parse-entities)
(declare (optimize (speed 3) (safety 1)))
(with-open-file (p file :direction :input)
(parse-html p :callback-only callback-only :callbacks callbacks
- :collect-rogue-tags collect-rogue-tags
- :no-body-tags no-body-tags
- :parse-entities parse-entities
- )))
-
+ :collect-rogue-tags collect-rogue-tags
+ :no-body-tags no-body-tags
+ :parse-entities parse-entities
+ )))
+
(defmethod parse-html ((str string) &key callback-only callbacks collect-rogue-tags
- no-body-tags parse-entities)
+ no-body-tags parse-entities)
(declare (optimize (speed 3) (safety 1)))
- (parse-html (make-string-input-stream str)
- :callback-only callback-only :callbacks callbacks
- :collect-rogue-tags collect-rogue-tags
- :no-body-tags no-body-tags
- :parse-entities parse-entities
- ))
-
-
-
-
-
-
-
-
-
+ (parse-html (make-string-input-stream str)
+ :callback-only callback-only :callbacks callbacks
+ :collect-rogue-tags collect-rogue-tags
+ :no-body-tags no-body-tags
+ :parse-entities parse-entities
+ ))
+
+
+
+
+
+
+
+
+
;;;;;;;;;;;; test
;;;(defun doit (ignore-data)
;;; (with-open-file (p "readme.htm")
;;; (loop
;;; (multiple-value-bind (val kind) (next-token p ignore-data)
-;;; ;(format t "~s -> ~s~%" kind val)
-;;;
-;;; (if* (eq kind :eof) then (return))))))
+;;; ;(format t "~s -> ~s~%" kind val)
+;;;
+;;; (if* (eq kind :eof) then (return))))))
;;;
;;;(defun pdoit (&optional (file "testa.html"))
;;; (with-open-file (p file)
;;;
;;;;; requires http client module to work
;;;(defun getparse (host path)
-;;; (parse-html (httpr-body
-;;; (parse-response
-;;; (simple-get host path)))))
+;;; (parse-html (httpr-body
+;;; (parse-response
+;;; (simple-get host path)))))
(provide :phtml)
;;
-;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
;;
;; This code is free software; you can redistribute it and/or
;; modify it under the terms of the version 2.1 of
-;; the GNU Lesser General Public License as published by
+;; the GNU Lesser General Public License as published by
;; the Free Software Foundation, as clarified by the AllegroServe
;; prequel found in license-allegroserve.txt.
;;
;; merchantability or fitness for a particular purpose. See the GNU
;; Lesser General Public License for more details.
;;
-;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; Version 2.1 of the GNU Lesser General Public License is in the file
;; license-lgpl.txt that was distributed with this file.
;; If it is not present, you can access it from
;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
-;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
;; Suite 330, Boston, MA 02111-1307 USA
;;
-;; Change Log
+;; Change Log
;;
;; 10/14/00 add namespace example; xml-error related change
(defun test-one-file (int external-callback)
(let ((filename (concatenate 'string (format nil "~3,'0d" int) ".xml")))
- (equalp (with-open-file (p filename)
- (parse-xml p :external-callback external-callback
- :content-only t))
- (with-open-file (p (concatenate 'string "out/" filename))
- (parse-xml p)))))
+ (equalp (with-open-file (p filename)
+ (parse-xml p :external-callback external-callback
+ :content-only t))
+ (with-open-file (p (concatenate 'string "out/" filename))
+ (parse-xml p)))))
(defun test-some-files (max &key skip-list external-callback)
(dotimes (i max)
(if* (member (+ 1 i) skip-list) then
- (format t "i: ~s skipping...~%" (+ 1 i))
+ (format t "i: ~s skipping...~%" (+ 1 i))
else
- (format t "i: ~s equalp: ~s~%" (+ 1 i) (test-one-file (+ 1 i) external-callback)))))
+ (format t "i: ~s equalp: ~s~%" (+ 1 i) (test-one-file (+ 1 i) external-callback)))))
;; have to be in valid/sa directory when this is run
(defun test-sa-files ()
(defun test-one-bad-file (filename external-callback)
(ignore-errors
- (with-open-file (p filename)
+ (with-open-file (p filename)
(parse-xml p :external-callback external-callback
- :content-only t))))
+ :content-only t))))
(defun test-some-bad-files (max external-callback)
(dotimes (i max)
(let* ((index (+ 1 i))
- (filename (concatenate 'string (format nil "~3,'0d" index) ".xml")))
+ (filename (concatenate 'string (format nil "~3,'0d" index) ".xml")))
(multiple-value-bind (val error)
- (test-one-bad-file filename external-callback)
- (format t "i: ~s error: ~s~%"
- index (if error
- (simple-condition-format-arguments error) val))))))
+ (test-one-bad-file filename external-callback)
+ (format t "i: ~s error: ~s~%"
+ index (if error
+ (simple-condition-format-arguments error) val))))))
;; have to be in not-wf/sa directory when this is run
(defun test-not-wf-sa-files ()
(setf var-name (uri-path var-name))
(if* (equal var-name "null") then nil
else
- (let ((string (eval (intern var-name (find-package :user)))))
- (make-string-input-stream string))))
+ (let ((string (eval (intern var-name (find-package :user)))))
+ (make-string-input-stream string))))
(defvar *xml-example-string*
"<?xml version='1.0' encoding='utf-8'?>
(let ((pxml-version-strings nil))
(defun pxml-dribble-bug-hook (stream-or-string)
(if (stringp stream-or-string)
- (push stream-or-string pxml-version-strings)
- (loop for string in (reverse pxml-version-strings)
- do (write-string string stream-or-string)
- (terpri stream-or-string))))
+ (push stream-or-string pxml-version-strings)
+ (loop for string in (reverse pxml-version-strings)
+ do (write-string string stream-or-string)
+ (terpri stream-or-string))))
#+excl
(push 'pxml-dribble-bug-hook excl:*dribble-bug-hooks*)))
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (eq code #x20)
- (eq code #x9)
- (eq code #xD)
- (eq code #xA))))
+ (eq code #x9)
+ (eq code #xD)
+ (eq code #xA))))
#+unused
(defmacro xml-eql-char-p (char)
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (<= #x0041 code #x005A) (<= #x0061 code #x007A)
- (<= #x00C0 code #x00D6) (<= #x00D8 code #x00F6)
- (<= #x00F8 code #x00FF) (<= #x0100 code #x0131)
- (<= #x0134 code #x013E) (<= #x0141 code #x0148)
- (<= #x014A code #x017E) (<= #x0180 code #x01C3)
- (<= #x01CD code #x01F0) (<= #x01F4 code #x01F5)
- (<= #x01FA code #x0217) (<= #x0250 code #x02A8)
- (<= #x02BB code #x02C1) (= code #x0386) (<= #x0388 code #x038A)
- (= code #x038C) (<= #x038E code #x03A1) (<= #x03A3 code #x03CE)
- (<= #x03D0 code #x03D6) (= code #x03DA) (= code #x03DC) (= code #x03DE)
- (= code #x03E0) (<= #x03E2 code #x03F3) (<= #x0401 code #x040C)
- (<= #x040E code #x044F) (<= #x0451 code #x045C)
- (<= #x045E code #x0481) (<= #x0490 code #x04C4)
- (<= #x04C7 code #x04C8) (<= #x04CB code #x04CC)
- (<= #x04D0 code #x04EB) (<= #x04EE code #x04F5)
- (<= #x04F8 code #x04F9) (<= #x0531 code #x0556) (= code #x0559)
- (<= #x0561 code #x0586) (<= #x05D0 code #x05EA)
- (<= #x05F0 code #x05F2) (<= #x0621 code #x063A)
- (<= #x0641 code #x064A) (<= #x0671 code #x06B7)
- (<= #x06BA code #x06BE) (<= #x06C0 code #x06CE)
- (<= #x06D0 code #x06D3) (= code #x06D5) (<= #x06E5 code #x06E6)
- (<= #x0905 code #x0939) (= code #x093D) (<= #x0958 code #x0961)
- (<= #x0985 code #x098C) (<= #x098F code #x0990)
- (<= #x0993 code #x09A8) (<= #x09AA code #x09B0) (= code #x09B2)
- (<= #x09B6 code #x09B9) (<= #x09DC code #x09DD)
- (<= #x09DF code #x09E1) (<= #x09F0 code #x09F1)
- (<= #x0A05 code #x0A0A) (<= #x0A0F code #x0A10)
- (<= #x0A13 code #x0A28) (<= #x0A2A code #x0A30)
- (<= #x0A32 code #x0A33) (<= #x0A35 code #x0A36)
- (<= #x0A38 code #x0A39) (<= #x0A59 code #x0A5C) (= code #x0A5E)
- (<= #x0A72 code #x0A74) (<= #x0A85 code #x0A8B) (= code #x0A8D)
- (<= #x0A8F code #x0A91) (<= #x0A93 code #x0AA8)
- (<= #x0AAA code #x0AB0) (<= #x0AB2 code #x0AB3)
- (<= #x0AB5 code #x0AB9) (<= #x0ABD code #x0AE0)
- (<= #x0B05 code #x0B0C) (<= #x0B0F code #x0B10)
- (<= #x0B13 code #x0B28) (<= #x0B2A code #x0B30)
- (<= #x0B32 code #x0B33) (<= #x0B36 code #x0B39) (= code #x0B3D)
- (<= #x0B5C code #x0B5D) (<= #x0B5F code #x0B61)
- (<= #x0B85 code #x0B8A) (<= #x0B8E code #x0B90)
- (<= #x0B92 code #x0B95) (<= #x0B99 code #x0B9A) (= code #x0B9C)
- (<= #x0B9E code #x0B9F) (<= #x0BA3 code #x0BA4)
- (<= #x0BA8 code #x0BAA) (<= #x0BAE code #x0BB5)
- (<= #x0BB7 code #x0BB9) (<= #x0C05 code #x0C0C)
- (<= #x0C0E code #x0C10) (<= #x0C12 code #x0C28)
- (<= #x0C2A code #x0C33) (<= #x0C35 code #x0C39)
- (<= #x0C60 code #x0C61) (<= #x0C85 code #x0C8C)
- (<= #x0C8E code #x0C90) (<= #x0C92 code #x0CA8)
- (<= #x0CAA code #x0CB3) (<= #x0CB5 code #x0CB9) (= code #x0CDE)
- (<= #x0CE0 code #x0CE1) (<= #x0D05 code #x0D0C)
- (<= #x0D0E code #x0D10) (<= #x0D12 code #x0D28)
- (<= #x0D2A code #x0D39) (<= #x0D60 code #x0D61)
- (<= #x0E01 code #x0E2E) (= code #x0E30) (<= #x0E32 code #x0E33)
- (<= #x0E40 code #x0E45) (<= #x0E81 code #x0E82) (= code #x0E84)
- (<= #x0E87 code #x0E88) (= code #x0E8A) (= code #x0E8D)
- (<= #x0E94 code #x0E97) (<= #x0E99 code #x0E9F)
- (<= #x0EA1 code #x0EA3) (= code #x0EA5) (= code #x0EA7)
- (<= #x0EAA code #x0EAB) (<= #x0EAD code #x0EAE) (= code #x0EB0)
- (<= #x0EB2 code #x0EB3) (= code #x0EBD) (<= #x0EC0 code #x0EC4)
- (<= #x0F40 code #x0F47) (<= #x0F49 code #x0F69)
- (<= #x10A0 code #x10C5) (<= #x10D0 code #x10F6) (= code #x1100)
- (<= #x1102 code #x1103) (<= #x1105 code #x1107) (= code #x1109)
- (<= #x110B code #x110C) (<= #x110E code #x1112) (= code #x113C)
- (= code #x113E) (= code #x1140) (= code #x114C) (= code #x114E) (= code #x1150)
- (<= #x1154 code #x1155) (= code #x1159) (<= #x115F code #x1161)
- (= code #x1163) (= code #x1165) (= code #x1167) (= code #x1169)
- (<= #x116D code #x116E) (<= #x1172 code #x1173) (= code #x1175)
- (= code #x119E) (= code #x11A8) (= code #x11AB) (<= #x11AE code #x11AF)
- (<= #x11B7 code #x11B8) (= code #x11BA) (<= #x11BC code #x11C2)
- (= code #x11EB) (= code #x11F0) (= code #x11F9) (<= #x1E00 code #x1E9B)
- (<= #x1EA0 code #x1EF9) (<= #x1F00 code #x1F15)
- (<= #x1F18 code #x1F1D) (<= #x1F20 code #x1F45)
- (<= #x1F48 code #x1F4D) (<= #x1F50 code #x1F57) (= code #x1F59)
- (= code #x1F5B) (= code #x1F5D) (<= #x1F5F code #x1F7D)
- (<= #x1F80 code #x1FB4) (<= #x1FB6 code #x1FBC) (= code #x1FBE)
- (<= #x1FC2 code #x1FC4) (<= #x1FC6 code #x1FCC)
- (<= #x1FD0 code #x1FD3) (<= #x1FD6 code #x1FDB)
- (<= #x1FE0 code #x1FEC) (<= #x1FF2 code #x1FF4)
- (<= #x1FF6 code #x1FFC) (= code #x2126) (<= #x212A code #x212B)
- (= code #x212E) (<= #x2180 code #x2182) (<= #x3041 code #x3094)
- (<= #x30A1 code #x30FA) (<= #x3105 code #x312C)
- (<= #xAC00 code #xD7A3)
- )))
+ (<= #x00C0 code #x00D6) (<= #x00D8 code #x00F6)
+ (<= #x00F8 code #x00FF) (<= #x0100 code #x0131)
+ (<= #x0134 code #x013E) (<= #x0141 code #x0148)
+ (<= #x014A code #x017E) (<= #x0180 code #x01C3)
+ (<= #x01CD code #x01F0) (<= #x01F4 code #x01F5)
+ (<= #x01FA code #x0217) (<= #x0250 code #x02A8)
+ (<= #x02BB code #x02C1) (= code #x0386) (<= #x0388 code #x038A)
+ (= code #x038C) (<= #x038E code #x03A1) (<= #x03A3 code #x03CE)
+ (<= #x03D0 code #x03D6) (= code #x03DA) (= code #x03DC) (= code #x03DE)
+ (= code #x03E0) (<= #x03E2 code #x03F3) (<= #x0401 code #x040C)
+ (<= #x040E code #x044F) (<= #x0451 code #x045C)
+ (<= #x045E code #x0481) (<= #x0490 code #x04C4)
+ (<= #x04C7 code #x04C8) (<= #x04CB code #x04CC)
+ (<= #x04D0 code #x04EB) (<= #x04EE code #x04F5)
+ (<= #x04F8 code #x04F9) (<= #x0531 code #x0556) (= code #x0559)
+ (<= #x0561 code #x0586) (<= #x05D0 code #x05EA)
+ (<= #x05F0 code #x05F2) (<= #x0621 code #x063A)
+ (<= #x0641 code #x064A) (<= #x0671 code #x06B7)
+ (<= #x06BA code #x06BE) (<= #x06C0 code #x06CE)
+ (<= #x06D0 code #x06D3) (= code #x06D5) (<= #x06E5 code #x06E6)
+ (<= #x0905 code #x0939) (= code #x093D) (<= #x0958 code #x0961)
+ (<= #x0985 code #x098C) (<= #x098F code #x0990)
+ (<= #x0993 code #x09A8) (<= #x09AA code #x09B0) (= code #x09B2)
+ (<= #x09B6 code #x09B9) (<= #x09DC code #x09DD)
+ (<= #x09DF code #x09E1) (<= #x09F0 code #x09F1)
+ (<= #x0A05 code #x0A0A) (<= #x0A0F code #x0A10)
+ (<= #x0A13 code #x0A28) (<= #x0A2A code #x0A30)
+ (<= #x0A32 code #x0A33) (<= #x0A35 code #x0A36)
+ (<= #x0A38 code #x0A39) (<= #x0A59 code #x0A5C) (= code #x0A5E)
+ (<= #x0A72 code #x0A74) (<= #x0A85 code #x0A8B) (= code #x0A8D)
+ (<= #x0A8F code #x0A91) (<= #x0A93 code #x0AA8)
+ (<= #x0AAA code #x0AB0) (<= #x0AB2 code #x0AB3)
+ (<= #x0AB5 code #x0AB9) (<= #x0ABD code #x0AE0)
+ (<= #x0B05 code #x0B0C) (<= #x0B0F code #x0B10)
+ (<= #x0B13 code #x0B28) (<= #x0B2A code #x0B30)
+ (<= #x0B32 code #x0B33) (<= #x0B36 code #x0B39) (= code #x0B3D)
+ (<= #x0B5C code #x0B5D) (<= #x0B5F code #x0B61)
+ (<= #x0B85 code #x0B8A) (<= #x0B8E code #x0B90)
+ (<= #x0B92 code #x0B95) (<= #x0B99 code #x0B9A) (= code #x0B9C)
+ (<= #x0B9E code #x0B9F) (<= #x0BA3 code #x0BA4)
+ (<= #x0BA8 code #x0BAA) (<= #x0BAE code #x0BB5)
+ (<= #x0BB7 code #x0BB9) (<= #x0C05 code #x0C0C)
+ (<= #x0C0E code #x0C10) (<= #x0C12 code #x0C28)
+ (<= #x0C2A code #x0C33) (<= #x0C35 code #x0C39)
+ (<= #x0C60 code #x0C61) (<= #x0C85 code #x0C8C)
+ (<= #x0C8E code #x0C90) (<= #x0C92 code #x0CA8)
+ (<= #x0CAA code #x0CB3) (<= #x0CB5 code #x0CB9) (= code #x0CDE)
+ (<= #x0CE0 code #x0CE1) (<= #x0D05 code #x0D0C)
+ (<= #x0D0E code #x0D10) (<= #x0D12 code #x0D28)
+ (<= #x0D2A code #x0D39) (<= #x0D60 code #x0D61)
+ (<= #x0E01 code #x0E2E) (= code #x0E30) (<= #x0E32 code #x0E33)
+ (<= #x0E40 code #x0E45) (<= #x0E81 code #x0E82) (= code #x0E84)
+ (<= #x0E87 code #x0E88) (= code #x0E8A) (= code #x0E8D)
+ (<= #x0E94 code #x0E97) (<= #x0E99 code #x0E9F)
+ (<= #x0EA1 code #x0EA3) (= code #x0EA5) (= code #x0EA7)
+ (<= #x0EAA code #x0EAB) (<= #x0EAD code #x0EAE) (= code #x0EB0)
+ (<= #x0EB2 code #x0EB3) (= code #x0EBD) (<= #x0EC0 code #x0EC4)
+ (<= #x0F40 code #x0F47) (<= #x0F49 code #x0F69)
+ (<= #x10A0 code #x10C5) (<= #x10D0 code #x10F6) (= code #x1100)
+ (<= #x1102 code #x1103) (<= #x1105 code #x1107) (= code #x1109)
+ (<= #x110B code #x110C) (<= #x110E code #x1112) (= code #x113C)
+ (= code #x113E) (= code #x1140) (= code #x114C) (= code #x114E) (= code #x1150)
+ (<= #x1154 code #x1155) (= code #x1159) (<= #x115F code #x1161)
+ (= code #x1163) (= code #x1165) (= code #x1167) (= code #x1169)
+ (<= #x116D code #x116E) (<= #x1172 code #x1173) (= code #x1175)
+ (= code #x119E) (= code #x11A8) (= code #x11AB) (<= #x11AE code #x11AF)
+ (<= #x11B7 code #x11B8) (= code #x11BA) (<= #x11BC code #x11C2)
+ (= code #x11EB) (= code #x11F0) (= code #x11F9) (<= #x1E00 code #x1E9B)
+ (<= #x1EA0 code #x1EF9) (<= #x1F00 code #x1F15)
+ (<= #x1F18 code #x1F1D) (<= #x1F20 code #x1F45)
+ (<= #x1F48 code #x1F4D) (<= #x1F50 code #x1F57) (= code #x1F59)
+ (= code #x1F5B) (= code #x1F5D) (<= #x1F5F code #x1F7D)
+ (<= #x1F80 code #x1FB4) (<= #x1FB6 code #x1FBC) (= code #x1FBE)
+ (<= #x1FC2 code #x1FC4) (<= #x1FC6 code #x1FCC)
+ (<= #x1FD0 code #x1FD3) (<= #x1FD6 code #x1FDB)
+ (<= #x1FE0 code #x1FEC) (<= #x1FF2 code #x1FF4)
+ (<= #x1FF6 code #x1FFC) (= code #x2126) (<= #x212A code #x212B)
+ (= code #x212E) (<= #x2180 code #x2182) (<= #x3041 code #x3094)
+ (<= #x30A1 code #x30FA) (<= #x3105 code #x312C)
+ (<= #xAC00 code #xD7A3)
+ )))
(defun xml-ideographic-p (char)
(declare (optimize (speed 3) (safety 1)))
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (<= #x0300 code #x0345) (<= #x0360 code #x0361)
- (<= #x0483 code #x0486) (<= #x0591 code #x05A1)
- (<= #x05A3 code #x05B9) (<= #x05BB code #x05BD) (= code #x05BF)
- (<= #x05C1 code #x05C2) (= code #x05C4) (<= #x064B code #x0652)
- (= code #x0670) (<= #x06D6 code #x06DC) (<= #x06DD code #x06DF)
- (<= #x06E0 code #x06E4) (<= #x06E7 code #x06E8)
- (<= #x06EA code #x06ED) (<= #x0901 code #x0903) (= code #x093C)
- (<= #x093E code #x094C) (= code #x094D) (<= #x0951 code #x0954)
- (<= #x0962 code #x0963) (<= #x0981 code #x0983) (= code #x09BC)
- (<= #x09BE code #x09BF) (<= #x09C0 code #x09C4)
- (<= #x09C7 code #x09C8) (<= #x09CB code #x09CD) (= code #x09D7)
- (<= #x09E2 code #x09E3) (= code #x0A02) (= code #x0A3C) (= code #x0A3E)
- (= code #x0A3F) (<= #x0A40 code #x0A42) (<= #x0A47 code #x0A48)
- (<= #x0A4B code #x0A4D) (<= #x0A70 code #x0A71)
- (<= #x0A81 code #x0A83) (= code #x0ABC) (<= #x0ABE code #x0AC5)
- (<= #x0AC7 code #x0AC9) (<= #x0ACB code #x0ACD)
- (<= #x0B01 code #x0B03) (= code #x0B3C) (<= #x0B3E code #x0B43)
- (<= #x0B47 code #x0B48) (<= #x0B4B code #x0B4D)
- (<= #x0B56 code #x0B57) (<= #x0B82 code #x0B83)
- (<= #x0BBE code #x0BC2) (<= #x0BC6 code #x0BC8)
- (<= #x0BCA code #x0BCD) (= code #x0BD7) (<= #x0C01 code #x0C03)
- (<= #x0C3E code #x0C44) (<= #x0C46 code #x0C48)
- (<= #x0C4A code #x0C4D) (<= #x0C55 code #x0C56)
- (<= #x0C82 code #x0C83) (<= #x0CBE code #x0CC4)
- (<= #x0CC6 code #x0CC8) (<= #x0CCA code #x0CCD)
- (<= #x0CD5 code #x0CD6) (<= #x0D02 code #x0D03)
- (<= #x0D3E code #x0D43) (<= #x0D46 code #x0D48)
- (<= #x0D4A code #x0D4D) (= code #x0D57) (= code #x0E31)
- (<= #x0E34 code #x0E3A) (<= #x0E47 code #x0E4E) (= code #x0EB1)
- (<= #x0EB4 code #x0EB9) (<= #x0EBB code #x0EBC)
- (<= #x0EC8 code #x0ECD) (<= #x0F18 code #x0F19) (= code #x0F35)
- (= code #x0F37) (= code #x0F39) (= code #x0F3E) (= code #x0F3F)
- (<= #x0F71 code #x0F84) (<= #x0F86 code #x0F8B)
- (<= #x0F90 code #x0F95) (= code #x0F97) (<= #x0F99 code #x0FAD)
- (<= #x0FB1 code #x0FB7) (= code #x0FB9) (<= #x20D0 code #x20DC)
- (= code #x20E1) (<= #x302A code #x302F) (= code #x3099) (= code #x309A)
- )))
+ (<= #x0483 code #x0486) (<= #x0591 code #x05A1)
+ (<= #x05A3 code #x05B9) (<= #x05BB code #x05BD) (= code #x05BF)
+ (<= #x05C1 code #x05C2) (= code #x05C4) (<= #x064B code #x0652)
+ (= code #x0670) (<= #x06D6 code #x06DC) (<= #x06DD code #x06DF)
+ (<= #x06E0 code #x06E4) (<= #x06E7 code #x06E8)
+ (<= #x06EA code #x06ED) (<= #x0901 code #x0903) (= code #x093C)
+ (<= #x093E code #x094C) (= code #x094D) (<= #x0951 code #x0954)
+ (<= #x0962 code #x0963) (<= #x0981 code #x0983) (= code #x09BC)
+ (<= #x09BE code #x09BF) (<= #x09C0 code #x09C4)
+ (<= #x09C7 code #x09C8) (<= #x09CB code #x09CD) (= code #x09D7)
+ (<= #x09E2 code #x09E3) (= code #x0A02) (= code #x0A3C) (= code #x0A3E)
+ (= code #x0A3F) (<= #x0A40 code #x0A42) (<= #x0A47 code #x0A48)
+ (<= #x0A4B code #x0A4D) (<= #x0A70 code #x0A71)
+ (<= #x0A81 code #x0A83) (= code #x0ABC) (<= #x0ABE code #x0AC5)
+ (<= #x0AC7 code #x0AC9) (<= #x0ACB code #x0ACD)
+ (<= #x0B01 code #x0B03) (= code #x0B3C) (<= #x0B3E code #x0B43)
+ (<= #x0B47 code #x0B48) (<= #x0B4B code #x0B4D)
+ (<= #x0B56 code #x0B57) (<= #x0B82 code #x0B83)
+ (<= #x0BBE code #x0BC2) (<= #x0BC6 code #x0BC8)
+ (<= #x0BCA code #x0BCD) (= code #x0BD7) (<= #x0C01 code #x0C03)
+ (<= #x0C3E code #x0C44) (<= #x0C46 code #x0C48)
+ (<= #x0C4A code #x0C4D) (<= #x0C55 code #x0C56)
+ (<= #x0C82 code #x0C83) (<= #x0CBE code #x0CC4)
+ (<= #x0CC6 code #x0CC8) (<= #x0CCA code #x0CCD)
+ (<= #x0CD5 code #x0CD6) (<= #x0D02 code #x0D03)
+ (<= #x0D3E code #x0D43) (<= #x0D46 code #x0D48)
+ (<= #x0D4A code #x0D4D) (= code #x0D57) (= code #x0E31)
+ (<= #x0E34 code #x0E3A) (<= #x0E47 code #x0E4E) (= code #x0EB1)
+ (<= #x0EB4 code #x0EB9) (<= #x0EBB code #x0EBC)
+ (<= #x0EC8 code #x0ECD) (<= #x0F18 code #x0F19) (= code #x0F35)
+ (= code #x0F37) (= code #x0F39) (= code #x0F3E) (= code #x0F3F)
+ (<= #x0F71 code #x0F84) (<= #x0F86 code #x0F8B)
+ (<= #x0F90 code #x0F95) (= code #x0F97) (<= #x0F99 code #x0FAD)
+ (<= #x0FB1 code #x0FB7) (= code #x0FB9) (<= #x20D0 code #x20DC)
+ (= code #x20E1) (<= #x302A code #x302F) (= code #x3099) (= code #x309A)
+ )))
(defun xml-digit-p (char)
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (<= #x0030 code #x0039) (<= #x0660 code #x0669)
- (<= #x06F0 code #x06F9) (<= #x0966 code #x096F)
- (<= #x09E6 code #x09EF) (<= #x0A66 code #x0A6F)
- (<= #x0AE6 code #x0AEF) (<= #x0B66 code #x0B6F)
- (<= #x0BE7 code #x0BEF) (<= #x0C66 code #x0C6F)
- (<= #x0CE6 code #x0CEF) (<= #x0D66 code #x0D6F)
- (<= #x0E50 code #x0E59) (<= #x0ED0 code #x0ED9)
- (<= #x0F20 code #x0F29)
- )))
+ (<= #x06F0 code #x06F9) (<= #x0966 code #x096F)
+ (<= #x09E6 code #x09EF) (<= #x0A66 code #x0A6F)
+ (<= #x0AE6 code #x0AEF) (<= #x0B66 code #x0B6F)
+ (<= #x0BE7 code #x0BEF) (<= #x0C66 code #x0C6F)
+ (<= #x0CE6 code #x0CEF) (<= #x0D66 code #x0D6F)
+ (<= #x0E50 code #x0E59) (<= #x0ED0 code #x0ED9)
+ (<= #x0F20 code #x0F29)
+ )))
(defun xml-extender-p (char)
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (= code #x00B7) (= code #x02D0) (= code #x02D1) (= code #x0387) (= code #x0640)
- (= code #x0E46) (= code #x0EC6) (= code #x3005) (<= #x3031 code #x3035)
- (<= #x309D code #x309E) (<= #x30FC code #x30FE)
- )))
+ (= code #x0E46) (= code #x0EC6) (= code #x3005) (<= #x3031 code #x3035)
+ (<= #x309D code #x309E) (<= #x30FC code #x30FE)
+ )))
(defmacro xml-letter-p (char)
`(or (xml-base-char-p ,char) (xml-ideographic-p ,char)))
(declare (optimize (speed 3) (safety 1)))
(without-scheduling
(do ((cols *collectors* (cdr cols)))
- ((null cols)
- ; toss it away
- nil)
+ ((null cols)
+ ; toss it away
+ nil)
(if* (null (car cols))
- then (setf (car cols) col)
- (return)))))
+ then (setf (car cols) col)
+ (return)))))
(defun pub-id-char-p (char)
(declare (optimize (speed 3) (safety 1)))
(let ((code (char-code char)))
(or (= #x20 code) (= #xD code) (= #xA code)
- (<= (char-code #\a) code (char-code #\z))
- (<= (char-code #\A) code (char-code #\Z))
- (<= (char-code #\0) code (char-code #\9))
- (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
- #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
+ (<= (char-code #\a) code (char-code #\z))
+ (<= (char-code #\A) code (char-code #\Z))
+ (<= (char-code #\0) code (char-code #\9))
+ (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
+ #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
(defparameter *keyword-package* (find-package :keyword))
(let (buf)
(without-scheduling
(do* ((bufs *tokenbufs* (cdr bufs))
- (this (car bufs) (car bufs)))
- ((null bufs))
- (if* this
- then (setf (car bufs) nil)
- (setq buf this)
- (return))))
+ (this (car bufs) (car bufs)))
+ ((null bufs))
+ (if* this
+ then (setf (car bufs) nil)
+ (setq buf this)
+ (return))))
(if* buf
then (setf (tokenbuf-cur buf) 0)
- (setf (tokenbuf-max buf) 0)
- (setf (tokenbuf-stream buf) nil)
- buf
+ (setf (tokenbuf-max buf) 0)
+ (setf (tokenbuf-stream buf) nil)
+ buf
else (make-tokenbuf
- :cur 0
- :max 0
- :data (make-array 1024 :element-type 'character)))))
+ :cur 0
+ :max 0
+ :data (make-array 1024 :element-type 'character)))))
(defstruct collector
next ; next index to set
(if* (not ns-to-package)
then (excl::intern* (collector-data coll) (collector-next coll) package)
else
- (let (new-package (data (collector-data coll)))
- (if* (and (eq (schar data 0) #\x)
- (eq (schar data 1) #\m)
- (eq (schar data 2) #\l)
- (eq (schar data 3) #\n)
- (eq (schar data 4) #\s)
- (or (eq (schar data 5) #\:)
- (= (collector-next coll) 5)))
- then ;; putting xmlns: in :none namespace
- (setf new-package (assoc :none ns-to-package))
- (when new-package (setf package (rest new-package)))
- (excl::intern* (collector-data coll) (collector-next coll) package)
- else
- (let ((colon-index -1)
- (data (collector-data coll)))
- (dotimes (i (collector-next coll))
- (when (eq (schar data i) #\:)
- (setf colon-index i)
- (return)))
- (if* (> colon-index -1) then
- (let ((string1 (make-string colon-index))
- new-package string2)
- (dotimes (i colon-index)
- (setf (schar string1 i) (schar data i)))
- (setf new-package (assoc string1 ns-to-package :test 'string=))
- (if* new-package
- then
- (setf string2 (make-string (- (collector-next coll)
- (+ 1 colon-index))))
- (dotimes (i (- (collector-next coll)
- (+ 1 colon-index)))
- (setf (schar string2 i)
- (schar data (+ colon-index 1 i))))
- (excl::intern string2 (rest new-package))
- else
- (excl::intern* (collector-data coll)
- (collector-next coll) package)))
- else
- (let ((new-package (assoc :none ns-to-package)))
- (when new-package
- (setf package (rest new-package))))
- (excl::intern* (collector-data coll)
- (collector-next coll) package)))
- ))
- ))
+ (let (new-package (data (collector-data coll)))
+ (if* (and (eq (schar data 0) #\x)
+ (eq (schar data 1) #\m)
+ (eq (schar data 2) #\l)
+ (eq (schar data 3) #\n)
+ (eq (schar data 4) #\s)
+ (or (eq (schar data 5) #\:)
+ (= (collector-next coll) 5)))
+ then ;; putting xmlns: in :none namespace
+ (setf new-package (assoc :none ns-to-package))
+ (when new-package (setf package (rest new-package)))
+ (excl::intern* (collector-data coll) (collector-next coll) package)
+ else
+ (let ((colon-index -1)
+ (data (collector-data coll)))
+ (dotimes (i (collector-next coll))
+ (when (eq (schar data i) #\:)
+ (setf colon-index i)
+ (return)))
+ (if* (> colon-index -1) then
+ (let ((string1 (make-string colon-index))
+ new-package string2)
+ (dotimes (i colon-index)
+ (setf (schar string1 i) (schar data i)))
+ (setf new-package (assoc string1 ns-to-package :test 'string=))
+ (if* new-package
+ then
+ (setf string2 (make-string (- (collector-next coll)
+ (+ 1 colon-index))))
+ (dotimes (i (- (collector-next coll)
+ (+ 1 colon-index)))
+ (setf (schar string2 i)
+ (schar data (+ colon-index 1 i))))
+ (excl::intern string2 (rest new-package))
+ else
+ (excl::intern* (collector-data coll)
+ (collector-next coll) package)))
+ else
+ (let ((new-package (assoc :none ns-to-package)))
+ (when new-package
+ (setf package (rest new-package))))
+ (excl::intern* (collector-data coll)
+ (collector-next coll) package)))
+ ))
+ ))
(defun compute-coll-string (coll)
(declare (optimize (speed 3) (safety 1)))
;; return the string that's in the collection
(let ((str (make-string (collector-next coll)))
- (from (collector-data coll)))
+ (from (collector-data coll)))
(dotimes (i (collector-next coll))
(setf (schar str i) (schar from i)))
;; increase the size of the data portion of the collector and then
;; add the given char at the end
(let* ((odata (collector-data coll))
- (ndata (make-string (* 2 (length odata)))))
+ (ndata (make-string (* 2 (length odata)))))
(dotimes (i (length odata))
(setf (schar ndata i) (schar odata i)))
(setf (collector-data coll) ndata)
(declare (optimize (speed 3) (safety 1)))
(without-scheduling
(do ((bufs *tokenbufs* (cdr bufs)))
- ((null bufs)
- ; toss it away
- nil)
+ ((null bufs)
+ ; toss it away
+ nil)
(if* (null (car bufs))
- then (setf (car bufs) buf)
- (return)))))
+ then (setf (car bufs) buf)
+ (return)))))
(defun get-collector ()
(declare (optimize (speed 3) (safety 1)))
(let (col)
(without-scheduling
(do* ((cols *collectors* (cdr cols))
- (this (car cols) (car cols)))
- ((null cols))
- (if* this
- then (setf (car cols) nil)
- (setq col this)
- (return))))
+ (this (car cols) (car cols)))
+ ((null cols))
+ (if* this
+ then (setf (car cols) nil)
+ (setq col this)
+ (return))))
(if* col
then (setf (collector-next col) 0)
- col
+ col
else (make-collector
- :next 0
- :max 100
- :data (make-string 100)))))
+ :next 0
+ :max 100
+ :data (make-string 100)))))
(defmacro next-char (tokenbuf read-sequence-func)
`(let ((cur (tokenbuf-cur ,tokenbuf))
- (tb (tokenbuf-data ,tokenbuf)))
+ (tb (tokenbuf-data ,tokenbuf)))
(if* (>= cur (tokenbuf-max ,tokenbuf))
- then ;; fill buffer
- (if* (or (not (tokenbuf-stream ,tokenbuf))
- (zerop (setf (tokenbuf-max ,tokenbuf)
- (if* ,read-sequence-func
- then (funcall ,read-sequence-func tb
- (tokenbuf-stream ,tokenbuf))
- else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
- then (setq cur nil) ;; eof
- else (setq cur 0)))
+ then ;; fill buffer
+ (if* (or (not (tokenbuf-stream ,tokenbuf))
+ (zerop (setf (tokenbuf-max ,tokenbuf)
+ (if* ,read-sequence-func
+ then (funcall ,read-sequence-func tb
+ (tokenbuf-stream ,tokenbuf))
+ else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
+ then (setq cur nil) ;; eof
+ else (setq cur 0)))
(if* cur
- then (prog1
- (let ((cc (schar tb cur)))
- (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
- (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
+ then (prog1
+ (let ((cc (schar tb cur)))
+ (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
+ (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
(defun get-next-char (iostruct)
(declare (optimize (speed 3) (safety 1)))
(let* (from-stream (tmp-char
- (let (char)
- (if* (iostruct-unget-char iostruct) then
- ;; from-stream is used to do input CR/LF normalization
- (setf from-stream t)
- (setf char (first (iostruct-unget-char iostruct)))
- (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
- char
- elseif (iostruct-entity-bufs iostruct) then
- (let (entity-buf)
- (loop
- (setf entity-buf (first (iostruct-entity-bufs iostruct)))
- (if* (streamp (tokenbuf-stream entity-buf))
- then (setf from-stream t)
- else (setf from-stream nil))
- (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
- (when char (return))
- (when (streamp (tokenbuf-stream entity-buf))
- (close (tokenbuf-stream entity-buf))
- (put-back-tokenbuf entity-buf))
- (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
- (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
- (when (not (iostruct-entity-bufs iostruct)) (return))))
- (if* char then char
- else (next-char (iostruct-tokenbuf iostruct)
- (iostruct-read-sequence-func iostruct)))
- else (setf from-stream t)
- (next-char (iostruct-tokenbuf iostruct)
- (iostruct-read-sequence-func iostruct))))))
+ (let (char)
+ (if* (iostruct-unget-char iostruct) then
+ ;; from-stream is used to do input CR/LF normalization
+ (setf from-stream t)
+ (setf char (first (iostruct-unget-char iostruct)))
+ (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
+ char
+ elseif (iostruct-entity-bufs iostruct) then
+ (let (entity-buf)
+ (loop
+ (setf entity-buf (first (iostruct-entity-bufs iostruct)))
+ (if* (streamp (tokenbuf-stream entity-buf))
+ then (setf from-stream t)
+ else (setf from-stream nil))
+ (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
+ (when char (return))
+ (when (streamp (tokenbuf-stream entity-buf))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
+ (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
+ (when (not (iostruct-entity-bufs iostruct)) (return))))
+ (if* char then char
+ else (next-char (iostruct-tokenbuf iostruct)
+ (iostruct-read-sequence-func iostruct)))
+ else (setf from-stream t)
+ (next-char (iostruct-tokenbuf iostruct)
+ (iostruct-read-sequence-func iostruct))))))
(if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char)))
(defun unicode-check (p tokenbuf)
#+allegro
(let ((format (ignore-errors (excl:sniff-for-unicode p))))
(if* (eq format (find-external-format :unicode))
- then
- (setf (stream-external-format p) format)
- else
- (setf (stream-external-format p) (find-external-format :utf8))))
+ then
+ (setf (stream-external-format p) format)
+ else
+ (setf (stream-external-format p) (find-external-format :utf8))))
#-allegro
(let* ((c (read-char p nil)) c2
- (c-code (if c (char-code c) nil)))
+ (c-code (if c (char-code c) nil)))
(if* (eq #xFF c-code) then
- (setf c2 (read-char p nil))
- (setf c-code (if c (char-code c2) nil))
- (if* (eq #xFE c-code) then
- (format t "set unicode~%")
- (setf (stream-external-format p)
- (find-external-format
- #+allegro :unicode
- #-allegro :fat-little))
- else
- (xml-error "stream has incomplete Unicode marker"))
- else (setf (stream-external-format p)
- (find-external-format :utf8))
- (when c
- (push c (iostruct-unget-char tokenbuf))
- #+ignore (unread-char c p) ;; bug when there is single ^M in file
- )))))
+ (setf c2 (read-char p nil))
+ (setf c-code (if c (char-code c2) nil))
+ (if* (eq #xFE c-code) then
+ (format t "set unicode~%")
+ (setf (stream-external-format p)
+ (find-external-format
+ #+allegro :unicode
+ #-allegro :fat-little))
+ else
+ (xml-error "stream has incomplete Unicode marker"))
+ else (setf (stream-external-format p)
+ (find-external-format :utf8))
+ (when c
+ (push c (iostruct-unget-char tokenbuf))
+ #+ignore (unread-char c p) ;; bug when there is single ^M in file
+ )))))
(defun add-default-values (val attlist-data)
(declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
(if* (symbolp val)
then
- (let* ((tag-defaults (assoc val attlist-data)) defaults)
- (dolist (def (rest tag-defaults))
- (if* (stringp (third def)) then
- (push (first def) defaults)
- (push (if (eq (second def) :CDATA) (third def)
- (normalize-attrib-value (third def))) defaults)
- elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
- (push (first def) defaults)
- (push (if (eq (second def) :CDATA) (fourth def)
- (normalize-attrib-value (fourth def))) defaults)
- ))
- (if* defaults then
- (setf val (append (list val) (nreverse defaults)))
- else val)
- )
+ (let* ((tag-defaults (assoc val attlist-data)) defaults)
+ (dolist (def (rest tag-defaults))
+ (if* (stringp (third def)) then
+ (push (first def) defaults)
+ (push (if (eq (second def) :CDATA) (third def)
+ (normalize-attrib-value (third def))) defaults)
+ elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
+ (push (first def) defaults)
+ (push (if (eq (second def) :CDATA) (fourth def)
+ (normalize-attrib-value (fourth def))) defaults)
+ ))
+ (if* defaults then
+ (setf val (append (list val) (nreverse defaults)))
+ else val)
+ )
else
- ;; first make sure there are no errors in given list
- (let ((pairs (rest val)))
- (loop
- (when (null pairs) (return))
- (let ((this-one (first pairs)))
- (setf pairs (rest (rest pairs)))
- (when (member this-one pairs)
- (xml-error (concatenate 'string "Entity: "
- (string (first val))
- " has multiple "
- (string this-one)
- " attribute values"))))))
- (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
- (dolist (def (rest tag-defaults))
- (let ((old (member (first def) (rest val))))
- (if* (not old) then
- (if* (stringp (third def)) then
- (push (first def) defaults)
- (push (third def) defaults)
- elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
- (push (first def) defaults)
- (push (fourth def) defaults))
- else
- (push (first old) defaults)
- (push (second old) defaults))))
- (if* defaults then
- ;; now look for attributes in original list that weren't in dtd
- (let ((tmp-val (rest val)) att att-val)
- (loop
- (when (null tmp-val) (return))
- (setf att (first tmp-val))
- (setf att-val (second tmp-val))
- (setf tmp-val (rest (rest tmp-val)))
- (when (not (member att defaults))
- (push att defaults)
- (push att-val defaults))))
- (setf val (append (list (first val)) (nreverse defaults)))
- else val))
- ))
+ ;; first make sure there are no errors in given list
+ (let ((pairs (rest val)))
+ (loop
+ (when (null pairs) (return))
+ (let ((this-one (first pairs)))
+ (setf pairs (rest (rest pairs)))
+ (when (member this-one pairs)
+ (xml-error (concatenate 'string "Entity: "
+ (string (first val))
+ " has multiple "
+ (string this-one)
+ " attribute values"))))))
+ (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
+ (dolist (def (rest tag-defaults))
+ (let ((old (member (first def) (rest val))))
+ (if* (not old) then
+ (if* (stringp (third def)) then
+ (push (first def) defaults)
+ (push (third def) defaults)
+ elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
+ (push (first def) defaults)
+ (push (fourth def) defaults))
+ else
+ (push (first old) defaults)
+ (push (second old) defaults))))
+ (if* defaults then
+ ;; now look for attributes in original list that weren't in dtd
+ (let ((tmp-val (rest val)) att att-val)
+ (loop
+ (when (null tmp-val) (return))
+ (setf att (first tmp-val))
+ (setf att-val (second tmp-val))
+ (setf tmp-val (rest (rest tmp-val)))
+ (when (not (member att defaults))
+ (push att defaults)
+ (push att-val defaults))))
+ (setf val (append (list (first val)) (nreverse defaults)))
+ else val))
+ ))
(defun normalize-public-value (public-value)
(setf public-value (string-trim '(#\space) public-value))
(when (= count stop) (return public-value))
(setf cch (schar public-value count))
(if* (and (eq cch #\space) (eq last-ch #\space)) then
- (setf public-value
- (remove #\space public-value :start count :count 1))
- (decf stop)
- else (incf count)
- (setf last-ch cch)))))
+ (setf public-value
+ (remove #\space public-value :start count :count 1))
+ (decf stop)
+ else (incf count)
+ (setf last-ch cch)))))
(defun normalize-attrib-value (attrib-value &optional first-pass)
(when first-pass
(let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
(loop
- (when (= count stop) (return))
- (setf cch (schar attrib-value count))
- (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
- elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
- (setf (schar attrib-value count) #\space)
- elseif (and (eq cch #\newline) (eq last-ch #\return)) then
- (setf attrib-value
- (remove #\space attrib-value :start count :count 1))
- (decf stop))
- (incf count)
- (setf last-ch cch))))
+ (when (= count stop) (return))
+ (setf cch (schar attrib-value count))
+ (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
+ elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
+ (setf (schar attrib-value count) #\space)
+ elseif (and (eq cch #\newline) (eq last-ch #\return)) then
+ (setf attrib-value
+ (remove #\space attrib-value :start count :count 1))
+ (decf stop))
+ (incf count)
+ (setf last-ch cch))))
(setf attrib-value (string-trim '(#\space) attrib-value))
(let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
(loop
(when (= count stop) (return attrib-value))
(setf cch (schar attrib-value count))
(if* (and (eq cch #\space) (eq last-ch #\space)) then
- (setf attrib-value
- (remove #\space attrib-value :start count :count 1))
- (decf stop)
- else (incf count)
- (setf last-ch cch)))))
+ (setf attrib-value
+ (remove #\space attrib-value :start count :count 1))
+ (decf stop)
+ else (incf count)
+ (setf last-ch cch)))))
(defun check-xmldecl (val tokenbuf)
(declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
(when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val)))))
(xml-error "XML declaration tag does not include correct 'version' attribute"))
(when (and (fourth val)
- (or (not (symbolp (fourth val)))
- (and (not (string= "standalone" (symbol-name (fourth val))))
- (not (string= "encoding" (symbol-name (fourth val)))))))
+ (or (not (symbolp (fourth val)))
+ (and (not (string= "standalone" (symbol-name (fourth val))))
+ (not (string= "encoding" (symbol-name (fourth val)))))))
(xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute"))
(when (and (fourth val) (string= "standalone" (symbol-name (fourth val))))
(if* (equal (fifth val) "yes") then
- (setf (iostruct-standalonep tokenbuf) t)
+ (setf (iostruct-standalonep tokenbuf) t)
elseif (not (equal (fifth val) "no")) then
- (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
+ (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
(dotimes (i (length (third val)))
(let ((c (schar (third val) i)))
(when (and (not (alpha-char-p c))
- (not (digit-char-p c))
- (not (member c '(#\. #\_ #\- #\:)))
- )
- (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
+ (not (digit-char-p c))
+ (not (member c '(#\. #\_ #\- #\:)))
+ )
+ (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
(if* (and (fourth val) (eql :encoding (fourth val)))
then (dotimes (i (length (fifth val)))
- (let ((c (schar (fifth val) i)))
- (when (and (not (alpha-char-p c))
- (if* (> i 0) then
- (and (not (digit-char-p c))
- (not (member c '(#\. #\_ #\-))))
- else t))
- (xml-error "XML declaration tag does not include correct 'encoding' attribute value"))))
- ;; jkf 3/26/02
- ;; if we have a stream we're reading from set its external-format
- ;; to the encoding
- ;; note - tokenbuf is really an iostruct, not a tokenbuf
+ (let ((c (schar (fifth val) i)))
+ (when (and (not (alpha-char-p c))
+ (if* (> i 0) then
+ (and (not (digit-char-p c))
+ (not (member c '(#\. #\_ #\-))))
+ else t))
+ (xml-error "XML declaration tag does not include correct 'encoding' attribute value"))))
+ ;; jkf 3/26/02
+ ;; if we have a stream we're reading from set its external-format
+ ;; to the encoding
+ ;; note - tokenbuf is really an iostruct, not a tokenbuf
#+allegro
- (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf))
- then (setf (stream-external-format
- (tokenbuf-stream (iostruct-tokenbuf tokenbuf)))
- (find-external-format (fifth val))))
-
-
- ))
+ (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf))
+ then (setf (stream-external-format
+ (tokenbuf-stream (iostruct-tokenbuf tokenbuf)))
+ (find-external-format (fifth val))))
+
+
+ ))
(defun xml-error (text)
(declare (optimize (speed 3) (safety 1)))
(defvar *debug-xml* nil)
(defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities
- content-only uri-to-package)
+ content-only uri-to-package)
(declare (optimize (speed 3) (safety 1)))
(parse-xml (make-string-input-stream str) :external-callback external-callback
- :general-entities general-entities
- :parameter-entities parameter-entities :content-only content-only
- :uri-to-package uri-to-package))
+ :general-entities general-entities
+ :parameter-entities parameter-entities :content-only content-only
+ :uri-to-package uri-to-package))
(defmethod parse-xml ((p stream) &key external-callback general-entities
- parameter-entities content-only uri-to-package)
+ parameter-entities content-only uri-to-package)
(declare (optimize (speed 3) (safety 1)))
(pxml-internal0 p nil external-callback general-entities parameter-entities content-only
- uri-to-package))
+ uri-to-package))
(eval-when (compile load eval)
(defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element
(when (not (xml-space-p (elt val i))) (return nil))))
(defun pxml-internal0 (p read-sequence-func external-callback
- general-entities parameter-entities content-only uri-to-package)
+ general-entities parameter-entities content-only uri-to-package)
(declare (optimize (speed 3) (safety 1)))
(let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf)
- :do-entity t
- :read-sequence-func read-sequence-func)))
+ :do-entity t
+ :read-sequence-func read-sequence-func)))
;; set up stream right
(setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p)
;; set up user specified entities
;; look for Unicode file
(unicode-check p tokenbuf)
(unwind-protect
- (values (pxml-internal tokenbuf external-callback content-only)
- (iostruct-uri-to-package tokenbuf))
+ (values (pxml-internal tokenbuf external-callback content-only)
+ (iostruct-uri-to-package tokenbuf))
(dolist (entity-buf (iostruct-entity-bufs tokenbuf))
- (when (streamp (tokenbuf-stream entity-buf))
- (close (tokenbuf-stream entity-buf))
- (put-back-tokenbuf entity-buf))))
+ (when (streamp (tokenbuf-stream entity-buf))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))))
))
(defun pxml-internal (tokenbuf external-callback content-only)
(declare (optimize (speed 3) (safety 1)))
(let ((state state-docstart)
- (guts)
- (pending)
- (attlist-data)
- (public-string)
- (system-string)
- (entity-open-tags)
- )
+ (guts)
+ (pending)
+ (attlist-data)
+ (public-string)
+ (system-string)
+ (entity-open-tags)
+ )
(loop
(multiple-value-bind (val kind kind2)
- (next-token tokenbuf external-callback attlist-data)
- (when *debug-xml*
- (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
- (case state
- (#.state-docstart
- (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
- then
- (check-xmldecl val tokenbuf)
- (when (not content-only) (push val guts))
- (setf state state-docstart-misc)
- elseif (eq kind :comment)
- then
- (when (not content-only) (push val guts))
- (setf state state-docstart-misc)
- elseif (and (listp val) (eq :DOCTYPE (first val)))
- then
- (if* (eq (third val) :SYSTEM) then
- (setf system-string (fourth val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val))
- elseif (eq (third val) :PUBLIC) then
- (setf public-string (normalize-public-value (fourth val)))
- (setf system-string (fifth val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val)))
- (when system-string
- (if* external-callback then
- (let ((ext-stream (apply external-callback
- (list (parse-uri system-string)
- :DOCTYPE
- public-string
- ))))
- (when ext-stream
- (let (ext-io (entity-buf (get-tokenbuf)))
- (setf (tokenbuf-stream entity-buf) ext-stream)
- (setf ext-io (make-iostruct :tokenbuf entity-buf
- :do-entity
- (iostruct-do-entity tokenbuf)
- :read-sequence-func
- (iostruct-read-sequence-func tokenbuf)))
- (unicode-check ext-stream ext-io)
- (setf (iostruct-parameter-entities ext-io)
- (iostruct-parameter-entities tokenbuf))
- (setf (iostruct-general-entities ext-io)
- (iostruct-general-entities tokenbuf))
- (unwind-protect
- (setf val (append val
- (list (append
- (list :external)
- (parse-dtd
- ext-io
- t external-callback)))))
- (setf (iostruct-seen-any-dtd tokenbuf) t)
- (setf (iostruct-seen-external-dtd tokenbuf) t)
- (setf (iostruct-seen-parameter-reference tokenbuf)
- (iostruct-seen-parameter-reference ext-io))
- (setf (iostruct-general-entities tokenbuf)
- (iostruct-general-entities ext-io))
- (setf (iostruct-parameter-entities tokenbuf)
- (iostruct-parameter-entities ext-io))
- (setf (iostruct-do-entity tokenbuf)
- (iostruct-do-entity ext-io))
- (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
- (when (streamp (tokenbuf-stream entity-buf2))
- (close (tokenbuf-stream entity-buf2))
- (put-back-tokenbuf entity-buf2)))
- (close (tokenbuf-stream entity-buf))
- (put-back-tokenbuf entity-buf))
- )))
- else
- (setf (iostruct-do-entity tokenbuf) nil)))
- (setf attlist-data
- (process-attlist (rest (rest val)) attlist-data))
- (when (not content-only) (push val guts))
- (setf state state-docstart-misc2)
- elseif (eq kind :pi)
- then
- (push val guts)
- (setf state state-docstart-misc)
- elseif (eq kind :pcdata)
- then
- (when (or (not kind2) (not (all-xml-whitespace-p val)))
- (if* (not kind2) then
- (xml-error "An entity reference occured where only whitespace or the first element may occur")
- else
- (xml-error (concatenate 'string
- "unrecognized content '"
- (subseq val 0 (min (length val) 40)) "'"))))
- (setf state state-docstart-misc)
- elseif (or (symbolp val)
- (and (listp val) (symbolp (first val))))
- then
- (when (eq kind :start-tag)
- (setf val (add-default-values val attlist-data)))
- (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
- then (push (list val) guts)
- (setf state state-element-done)
- elseif (eq kind :start-tag)
- then (push (list val) pending)
- ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
- (when (iostruct-entity-bufs tokenbuf)
- (push (if (symbolp val) val (first val)) entity-open-tags))
- (setf state state-element-contents)
- else (xml-error (concatenate 'string
- "encountered token at illegal syntax position: '"
- (string kind) "'"
- (if* (null guts) then
- " at start of contents"
- else
- (concatenate 'string
- " following: '"
- (format nil "~s" (first guts))
- "'")))))
- else
- (print (list val kind kind2))
- (break "need to check for other allowable docstarts")))
- (#.state-docstart-misc2
- (if* (eq kind :pcdata)
- then
- (when (or (not kind2) (not (all-xml-whitespace-p val)))
- (if* (not kind2) then
- (xml-error "An entity reference occured where only whitespace or the first element may occur")
- else
- (xml-error (concatenate 'string
- "unrecognized content '"
- (subseq val 0 (min (length val) 40)) "'"))))
- elseif (and (listp val) (eq :comment (first val)))
- then
- (when (not content-only) (push val guts))
- elseif (eq kind :pi)
- then
- (push val guts)
- elseif (eq kind :eof)
- then
- (xml-error "unexpected end of file encountered")
- elseif (or (symbolp val)
- (and (listp val) (symbolp (first val))))
- then
- (when (eq kind :start-tag)
- (setf val (add-default-values val attlist-data)))
- (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
- then (push (list val) guts)
- (setf state state-element-done)
- elseif (eq kind :start-tag)
- then (push (list val) pending)
- ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
- (when (iostruct-entity-bufs tokenbuf)
- (push (if (symbolp val) val (first val)) entity-open-tags))
- (setf state state-element-contents)
- else (xml-error (concatenate 'string
- "encountered token at illegal syntax position: '"
- (string kind) "'"
- (if* (null guts) then
- " at start of contents"
- else
- (concatenate 'string
- " following: '"
- (format nil "~s" (first guts))
- "'")))))
- else
- (error "this branch unexpected <1>")))
- (#.state-docstart-misc
- (if* (eq kind :pcdata)
- then
- (when (or (not kind2) (not (all-xml-whitespace-p val)))
- (if* (not kind2) then
- (xml-error "An entity reference occured where only whitespace or the first element may occur")
- else
- (xml-error (concatenate 'string
- "unrecognized content '"
- (subseq val 0 (min (length val) 40)) "'"))))
- elseif (and (listp val) (eq :DOCTYPE (first val)))
- then
- (if* (eq (third val) :SYSTEM) then
- (setf system-string (fourth val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val))
- elseif (eq (third val) :PUBLIC) then
- (setf public-string (normalize-public-value (fourth val)))
- (setf system-string (fifth val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val))
- (setf val (remove (third val) val)))
- (when system-string
- (if* external-callback then
- (let ((ext-stream (apply external-callback
- (list (parse-uri system-string)
- :DOCTYPE
- public-string
- ))))
- (when ext-stream
- (let (ext-io (entity-buf (get-tokenbuf)))
- (setf (tokenbuf-stream entity-buf) ext-stream)
- (setf ext-io (make-iostruct :tokenbuf entity-buf
- :do-entity
- (iostruct-do-entity tokenbuf)
- :read-sequence-func
- (iostruct-read-sequence-func tokenbuf)))
- (unicode-check ext-stream ext-io)
- (setf (iostruct-parameter-entities ext-io)
- (iostruct-parameter-entities tokenbuf))
- (setf (iostruct-general-entities ext-io)
- (iostruct-general-entities tokenbuf))
- (unwind-protect
- (setf val (append val
- (list (append
- (list :external)
- (parse-dtd
- ext-io
- t external-callback)))))
- (setf (iostruct-seen-any-dtd tokenbuf) t)
- (setf (iostruct-seen-external-dtd tokenbuf) t)
- (setf (iostruct-seen-parameter-reference tokenbuf)
- (iostruct-seen-parameter-reference ext-io))
- (setf (iostruct-general-entities tokenbuf)
- (iostruct-general-entities ext-io))
- (setf (iostruct-parameter-entities tokenbuf)
- (iostruct-parameter-entities ext-io))
- (setf (iostruct-do-entity tokenbuf)
- (iostruct-do-entity ext-io))
- (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
- (when (streamp (tokenbuf-stream entity-buf2))
- (close (tokenbuf-stream entity-buf2))
- (put-back-tokenbuf entity-buf2)))
- (close (tokenbuf-stream entity-buf))
- (put-back-tokenbuf entity-buf))
- )))
- else
- (setf (iostruct-do-entity tokenbuf) nil)))
- (setf attlist-data
- (process-attlist (rest (rest val)) attlist-data))
- (when (not content-only) (push val guts))
- (setf state state-docstart-misc2)
- elseif (and (listp val) (eq :comment (first val)))
- then
- (when (not content-only) (push val guts))
- elseif (eq kind :pi)
- then
- (push val guts)
- elseif (or (symbolp val)
- (and (listp val) (symbolp (first val))))
- then
- (when (eq kind :start-tag)
- (setf val (add-default-values val attlist-data)))
- (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
- then (push (list val) guts)
- (setf state state-element-done)
- elseif (eq kind :start-tag)
- then (push (list val) pending)
- ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
- (when (iostruct-entity-bufs tokenbuf)
- (push (if (symbolp val) val (first val)) entity-open-tags))
- (setf state state-element-contents)
- else (xml-error (concatenate 'string
- "encountered token at illegal syntax position: '"
- (string kind) "'"
- (concatenate 'string
- " following: '"
- (format nil "~s" (first guts))
- "'"))))
- else
- (print (list val kind kind2))
- (break "check for other docstart-misc states")))
- (#.state-element-contents
- (if* (or (symbolp val)
- (and (listp val) (symbolp (first val))))
- then
- (when (eq kind :start-tag)
- (setf val (add-default-values val attlist-data)))
- (if* (eq kind :end-tag)
- then (let ((candidate (first (first pending))))
- (when (listp candidate) (setf candidate (first candidate)))
- (if* (eq candidate val)
- then
- (if* (iostruct-entity-bufs tokenbuf) then
- (when (not (eq (first entity-open-tags) val))
- (xml-error
- (concatenate 'string
- (string val)
- " element closed in entity that did not open it")))
- (setf entity-open-tags (rest entity-open-tags))
- else
- (when (eq (first entity-open-tags) val)
- (xml-error
- (concatenate 'string
- (string val)
- " element closed outside of entity that did not open it")))
- )
- (if* (= (length pending) 1)
- then
- (push (first pending) guts)
- (setf state state-element-done)
- else
- (setf (second pending)
- (append (second pending) (list (first pending)))))
- (setf pending (rest pending))
- ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
- else (xml-error (format nil
- "encountered end tag: ~s expected: ~s"
- val candidate))))
- elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
- then
- (setf (first pending)
- (append (first pending) (list (list val))))
- ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
- elseif (eq kind :start-tag)
- then
- (push (list val) pending)
- ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
- (when (iostruct-entity-bufs tokenbuf)
- (push (if (symbolp val) val (first val)) entity-open-tags))
- elseif (eq kind :cdata) then
- (setf (first pending)
- (append (first pending) (rest val)))
- (let ((old (first pending))
- (new))
- (dolist (item old)
- (if* (and (stringp (first new)) (stringp item)) then
- (setf (first new)
- (concatenate 'string (first new) item))
- else (push item new)))
- (setf (first pending) (reverse new)))
- elseif (eq kind :comment) then
- (when (not content-only) (push val guts))
- elseif (eq kind :pi)
- then
- (setf (first pending)
- (append (first pending) (list val)))
- elseif (eq kind :eof)
- then
- (xml-error "unexpected end of file encountered")
- else (xml-error (format nil "unexpected token: ~s" val)))
- elseif (eq kind :pcdata)
- then
- (setf (first pending)
- (append (first pending) (list val)))
- (let ((old (first pending))
- (new))
- (dolist (item old)
- (if* (and (stringp (first new)) (stringp item)) then
- (setf (first new)
- (concatenate 'string (first new) item))
- else (push item new)))
- (setf (first pending) (reverse new)))
- else (xml-error (format nil "unexpected token: ~s" val))))
- (#.state-element-done
- (if* (eq kind :pcdata)
- then
- (when (or (not kind2) (not (all-xml-whitespace-p val)))
- (if* (not kind2) then
- (xml-error "An entity reference occured where only whitespace or the first element may occur")
- else
- (xml-error (concatenate 'string
- "unrecognized content '"
- (subseq val 0 (min (length val) 40)) "'"))))
- elseif (eq kind :eof) then
- (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
- (return (nreverse guts))
- elseif (eq kind :comment) then
- (when (not content-only) (push val guts))
- elseif (eq kind :pi)
- then (push val guts)
- else
- (xml-error (concatenate 'string
- "encountered token at illegal syntax position: '"
- (string kind) "'"
- (concatenate 'string
- " following: '"
- (format nil "~s" (first guts))
- "'")))
- ))
- (t
- (error "need to support state:~s token:~s kind:~s kind2:~s <parse>" state val kind kind2)))
- ))))
+ (next-token tokenbuf external-callback attlist-data)
+ (when *debug-xml*
+ (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
+ (case state
+ (#.state-docstart
+ (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
+ then
+ (check-xmldecl val tokenbuf)
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc)
+ elseif (eq kind :comment)
+ then
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc)
+ elseif (and (listp val) (eq :DOCTYPE (first val)))
+ then
+ (if* (eq (third val) :SYSTEM) then
+ (setf system-string (fourth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ elseif (eq (third val) :PUBLIC) then
+ (setf public-string (normalize-public-value (fourth val)))
+ (setf system-string (fifth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val)))
+ (when system-string
+ (if* external-callback then
+ (let ((ext-stream (apply external-callback
+ (list (parse-uri system-string)
+ :DOCTYPE
+ public-string
+ ))))
+ (when ext-stream
+ (let (ext-io (entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) ext-stream)
+ (setf ext-io (make-iostruct :tokenbuf entity-buf
+ :do-entity
+ (iostruct-do-entity tokenbuf)
+ :read-sequence-func
+ (iostruct-read-sequence-func tokenbuf)))
+ (unicode-check ext-stream ext-io)
+ (setf (iostruct-parameter-entities ext-io)
+ (iostruct-parameter-entities tokenbuf))
+ (setf (iostruct-general-entities ext-io)
+ (iostruct-general-entities tokenbuf))
+ (unwind-protect
+ (setf val (append val
+ (list (append
+ (list :external)
+ (parse-dtd
+ ext-io
+ t external-callback)))))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (setf (iostruct-seen-external-dtd tokenbuf) t)
+ (setf (iostruct-seen-parameter-reference tokenbuf)
+ (iostruct-seen-parameter-reference ext-io))
+ (setf (iostruct-general-entities tokenbuf)
+ (iostruct-general-entities ext-io))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (iostruct-parameter-entities ext-io))
+ (setf (iostruct-do-entity tokenbuf)
+ (iostruct-do-entity ext-io))
+ (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
+ (when (streamp (tokenbuf-stream entity-buf2))
+ (close (tokenbuf-stream entity-buf2))
+ (put-back-tokenbuf entity-buf2)))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ )))
+ else
+ (setf (iostruct-do-entity tokenbuf) nil)))
+ (setf attlist-data
+ (process-attlist (rest (rest val)) attlist-data))
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc2)
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ (setf state state-docstart-misc)
+ elseif (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ (setf state state-docstart-misc)
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (if* (null guts) then
+ " at start of contents"
+ else
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))))
+ else
+ (print (list val kind kind2))
+ (break "need to check for other allowable docstarts")))
+ (#.state-docstart-misc2
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (and (listp val) (eq :comment (first val)))
+ then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ elseif (eq kind :eof)
+ then
+ (xml-error "unexpected end of file encountered")
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (if* (null guts) then
+ " at start of contents"
+ else
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))))
+ else
+ (error "this branch unexpected <1>")))
+ (#.state-docstart-misc
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (and (listp val) (eq :DOCTYPE (first val)))
+ then
+ (if* (eq (third val) :SYSTEM) then
+ (setf system-string (fourth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ elseif (eq (third val) :PUBLIC) then
+ (setf public-string (normalize-public-value (fourth val)))
+ (setf system-string (fifth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val)))
+ (when system-string
+ (if* external-callback then
+ (let ((ext-stream (apply external-callback
+ (list (parse-uri system-string)
+ :DOCTYPE
+ public-string
+ ))))
+ (when ext-stream
+ (let (ext-io (entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) ext-stream)
+ (setf ext-io (make-iostruct :tokenbuf entity-buf
+ :do-entity
+ (iostruct-do-entity tokenbuf)
+ :read-sequence-func
+ (iostruct-read-sequence-func tokenbuf)))
+ (unicode-check ext-stream ext-io)
+ (setf (iostruct-parameter-entities ext-io)
+ (iostruct-parameter-entities tokenbuf))
+ (setf (iostruct-general-entities ext-io)
+ (iostruct-general-entities tokenbuf))
+ (unwind-protect
+ (setf val (append val
+ (list (append
+ (list :external)
+ (parse-dtd
+ ext-io
+ t external-callback)))))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (setf (iostruct-seen-external-dtd tokenbuf) t)
+ (setf (iostruct-seen-parameter-reference tokenbuf)
+ (iostruct-seen-parameter-reference ext-io))
+ (setf (iostruct-general-entities tokenbuf)
+ (iostruct-general-entities ext-io))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (iostruct-parameter-entities ext-io))
+ (setf (iostruct-do-entity tokenbuf)
+ (iostruct-do-entity ext-io))
+ (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
+ (when (streamp (tokenbuf-stream entity-buf2))
+ (close (tokenbuf-stream entity-buf2))
+ (put-back-tokenbuf entity-buf2)))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ )))
+ else
+ (setf (iostruct-do-entity tokenbuf) nil)))
+ (setf attlist-data
+ (process-attlist (rest (rest val)) attlist-data))
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc2)
+ elseif (and (listp val) (eq :comment (first val)))
+ then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'"))))
+ else
+ (print (list val kind kind2))
+ (break "check for other docstart-misc states")))
+ (#.state-element-contents
+ (if* (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (eq kind :end-tag)
+ then (let ((candidate (first (first pending))))
+ (when (listp candidate) (setf candidate (first candidate)))
+ (if* (eq candidate val)
+ then
+ (if* (iostruct-entity-bufs tokenbuf) then
+ (when (not (eq (first entity-open-tags) val))
+ (xml-error
+ (concatenate 'string
+ (string val)
+ " element closed in entity that did not open it")))
+ (setf entity-open-tags (rest entity-open-tags))
+ else
+ (when (eq (first entity-open-tags) val)
+ (xml-error
+ (concatenate 'string
+ (string val)
+ " element closed outside of entity that did not open it")))
+ )
+ (if* (= (length pending) 1)
+ then
+ (push (first pending) guts)
+ (setf state state-element-done)
+ else
+ (setf (second pending)
+ (append (second pending) (list (first pending)))))
+ (setf pending (rest pending))
+ ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
+ else (xml-error (format nil
+ "encountered end tag: ~s expected: ~s"
+ val candidate))))
+ elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then
+ (setf (first pending)
+ (append (first pending) (list (list val))))
+ ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
+ elseif (eq kind :start-tag)
+ then
+ (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ elseif (eq kind :cdata) then
+ (setf (first pending)
+ (append (first pending) (rest val)))
+ (let ((old (first pending))
+ (new))
+ (dolist (item old)
+ (if* (and (stringp (first new)) (stringp item)) then
+ (setf (first new)
+ (concatenate 'string (first new) item))
+ else (push item new)))
+ (setf (first pending) (reverse new)))
+ elseif (eq kind :comment) then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (setf (first pending)
+ (append (first pending) (list val)))
+ elseif (eq kind :eof)
+ then
+ (xml-error "unexpected end of file encountered")
+ else (xml-error (format nil "unexpected token: ~s" val)))
+ elseif (eq kind :pcdata)
+ then
+ (setf (first pending)
+ (append (first pending) (list val)))
+ (let ((old (first pending))
+ (new))
+ (dolist (item old)
+ (if* (and (stringp (first new)) (stringp item)) then
+ (setf (first new)
+ (concatenate 'string (first new) item))
+ else (push item new)))
+ (setf (first pending) (reverse new)))
+ else (xml-error (format nil "unexpected token: ~s" val))))
+ (#.state-element-done
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (eq kind :eof) then
+ (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
+ (return (nreverse guts))
+ elseif (eq kind :comment) then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then (push val guts)
+ else
+ (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))
+ ))
+ (t
+ (error "need to support state:~s token:~s kind:~s kind2:~s <parse>" state val kind kind2)))
+ ))))
(eval-when (compile load eval)
(defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data
(declare (optimize (speed 3) (safety 1)))
;; return two values:
;; the next token from the stream.
- ;; the kind of token
+ ;; the kind of token
;;
;; if read-sequence-func is non-nil,
;; read-sequence-func is called to fetch the next character
(macrolet ((add-to-entity-buf (entity-symbol p-value)
- `(progn
- (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
- (iostruct-entity-bufs tokenbuf))))
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
+ (iostruct-entity-bufs tokenbuf))))
- (un-next-char (ch)
- `(push ,ch (iostruct-unget-char tokenbuf)))
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
- (clear-coll (coll)
- `(setf (collector-next ,coll) 0))
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
- (add-to-coll (coll ch)
- `(let ((.next. (collector-next ,coll)))
- (if* (>= .next. (collector-max ,coll))
- then (grow-and-add ,coll ,ch)
- else (setf (schar (collector-data ,coll) .next.)
- ,ch)
- (setf (collector-next ,coll) (1+ .next.)))))
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
- (to-preferred-case (ch)
- ;; should check the case mode
- `(char-downcase ,ch))
+ (to-preferred-case (ch)
+ ;; should check the case mode
+ `(char-downcase ,ch))
- )
+ )
(let ((state state-pcdata)
- (coll (get-collector))
- (entity (get-collector))
- (tag-to-return)
- (tag-to-return-string)
- (attrib-name)
- (empty-delim)
- (value-delim)
- (attrib-value)
- (attribs-to-return)
- (contents-to-return)
- (char-code 0)
- (special-tag-count 0)
- (attrib-value-tokenbuf)
- (last-ch)
- (cdatap t)
- (pcdatap t)
- (entity-source)
- (ns-token)
- (ch))
+ (coll (get-collector))
+ (entity (get-collector))
+ (tag-to-return)
+ (tag-to-return-string)
+ (attrib-name)
+ (empty-delim)
+ (value-delim)
+ (attrib-value)
+ (attribs-to-return)
+ (contents-to-return)
+ (char-code 0)
+ (special-tag-count 0)
+ (attrib-value-tokenbuf)
+ (last-ch)
+ (cdatap t)
+ (pcdatap t)
+ (entity-source)
+ (ns-token)
+ (ch))
(loop
- (setq ch (get-next-char tokenbuf))
- (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
- ch (char-code ch) state (iostruct-entity-names tokenbuf)))
- (if* (null ch)
- then (return) ; eof -- exit loop
- )
-
-
- (case state
- (#.state-pcdata
- (if* (eq ch #\<)
- then
- (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
- (if* (> (collector-next coll) 0)
- then ; have collected something, return this string
- (un-next-char ch) ; push back the <
- (return)
- else ; collect a tag
- (setq state state-readtagfirst))
- elseif (eq #\& ch)
- then (setf state state-pcdata2)
- (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
- (setf pcdatap nil)
- elseif (eq #\] ch) then (setf state state-pcdata7)
- elseif (not (xml-char-p ch)) then
- (xml-error (concatenate 'string
- "Illegal character: "
- (string ch)
- " detected in input"))
- else
- (add-to-coll coll ch)
- #+ignore
- (if* (not (eq ch #\return))
- then (add-to-coll coll ch))))
-
- (#.state-pcdata7
- (if* (eq #\] ch) then (setf state state-pcdata8)
- else (setf state state-pcdata)
- (add-to-coll coll #\]) (un-next-char ch)))
-
- (#.state-pcdata8
- (if* (eq #\> ch) then
- (add-to-coll coll #\])
- (add-to-coll coll #\])
- (add-to-coll coll #\>)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "content cannot contain ']]>':'"
- (compute-coll-string coll)
- "'"))
- elseif (eq #\] ch) then
- (add-to-coll coll #\])
- else (setf state state-pcdata)
- (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
-
- (#.state-pcdata2
- (if* (eq #\# ch)
- then (setf state state-pcdata3)
- elseif (xml-name-start-char-p ch)
- then (setf state state-pcdata4)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal reference name, starting at: '&"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-pcdata3
- (if* (eq #\x ch)
- then (setf state state-pcdata5)
- elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
- then (setf state state-pcdata6)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal character reference code, starting at: '&#"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-pcdata4
- (if* (xml-name-char-p ch)
- then (add-to-coll entity ch)
- elseif (eq #\; ch)
- then (let ((entity-symbol (compute-tag entity)))
- (clear-coll entity)
- (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string entity-symbol)
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
- elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
- elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
- elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
- elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
- else
- (let (p-value)
- (if* (and (iostruct-do-entity tokenbuf)
- (setf p-value
- (assoc entity-symbol
- (iostruct-general-entities tokenbuf)))) then
- (setf p-value (rest p-value))
- (when (member entity-symbol (iostruct-entity-names tokenbuf))
- (xml-error (concatenate 'string
- "entity:"
- (string entity-symbol)
- " in recursive reference")))
- (push entity-symbol (iostruct-entity-names tokenbuf))
- (if* (stringp p-value) then
- (add-to-entity-buf entity-symbol p-value)
- elseif (null external-callback) then
- (setf (iostruct-do-entity tokenbuf) nil)
- elseif p-value then
- (let ((entity-stream (apply external-callback p-value)))
- (if* entity-stream then
- (let ((entity-buf (get-tokenbuf)))
- (setf (tokenbuf-stream entity-buf) entity-stream)
- (unicode-check entity-stream tokenbuf)
- (push entity-buf
- (iostruct-entity-bufs tokenbuf))
- ;; check for possible external textdecl
- (let ((count 0) cch
- (string "<?xml "))
- (if* (dotimes (i (length string) t)
- (setf cch (get-next-char tokenbuf))
- (when (and (= i 5)
- (xml-space-p cch))
- (setf cch #\space))
- (when (not (eq cch
- (schar string count)))
- (return nil))
- (incf count)) then
- (setf count 5)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))
- ;; swallow <?xml token
- (swallow-xml-token
- tokenbuf
- external-callback)
- else
- (un-next-char cch)
- (decf count)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))))
- )
- else
- (xml-error (concatenate 'string
- "Reference to unparsed entity "
- (string entity-symbol)))
- ))
- )
- elseif (or (not (iostruct-seen-any-dtd tokenbuf))
- (iostruct-standalonep tokenbuf)
- (and (iostruct-seen-any-dtd tokenbuf)
- (not (iostruct-seen-external-dtd tokenbuf))
- (not (iostruct-seen-parameter-reference tokenbuf))))
- then
- (xml-error (concatenate 'string
- (string entity-symbol)
- " must have entity declaration before being referenced"))
- ))
- ))
- (setq state state-pcdata)
- else (let ((tmp (compute-coll-string entity)))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "reference not terminated by ';', starting at: '&"
- tmp
- (compute-coll-string coll)
- "'")))
- ))
-
- (#.state-pcdata5
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string (code-char char-code))
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (when (not (xml-char-p (code-char char-code)))
- (xml-error
- (concatenate 'string
- "Character reference: "
- (format nil "~s" char-code)
- " (decimal) is not valid XML input character")))
- (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state state-pcdata)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
- elseif (<= (char-code #\A) code (char-code #\F))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
- elseif (<= (char-code #\a) code (char-code #\f))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal hexidecimal character reference code, starting at: '"
- (compute-coll-string coll)
- "', calculated char code: "
- (format nil "~s" char-code)))
- )))
-
- (#.state-pcdata6
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string (code-char char-code))
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (when (not (xml-char-p (code-char char-code)))
- (xml-error
- (concatenate 'string
- "Character reference: "
- (format nil "~s" char-code)
- " (decimal) is not valid XML input character")))
- (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state state-pcdata)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal decimal character reference code, starting at: '"
- (compute-coll-string coll)
- "', calculated char code: "
- (format nil "~s" char-code)))
- )))
-
- (#.state-readtag-end
- (if* (xml-name-start-char-p ch)
- then (setf state state-readtag-end2)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal end tag name, starting at: '</"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag-end2
- (if* (xml-name-char-p ch)
- then (add-to-coll coll ch)
- elseif (eq #\> ch) then
- (let ((tag-string (compute-coll-string coll)))
- (when (and (iostruct-ns-scope tokenbuf)
- (string= tag-string
- (first (first (iostruct-ns-scope tokenbuf)))))
- (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
- (setf (iostruct-ns-to-package tokenbuf)
- (remove (assoc item (iostruct-ns-to-package tokenbuf))
- (iostruct-ns-to-package tokenbuf))))
- (setf (iostruct-ns-scope tokenbuf)
- (rest (iostruct-ns-scope tokenbuf)))))
- (setq tag-to-return (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (return)
- elseif (xml-space-p ch) then (setf state state-readtag-end3)
- (let ((tag-string (compute-coll-string coll)))
- (when (and (iostruct-ns-scope tokenbuf)
- (string= tag-string
- (first (first (iostruct-ns-scope tokenbuf)))))
- (setf (iostruct-ns-scope tokenbuf)
- (rest (iostruct-ns-scope tokenbuf)))))
- (setq tag-to-return (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- else (let ((tmp (compute-coll-string coll)))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal end tag name, starting at: '</"
- tmp
- (compute-coll-string coll)
- "'")))
- ))
-
- (#.state-readtag-end3
- (if* (xml-space-p ch) then nil
- elseif (eq #\> ch) then (return)
- else (let ((tmp (compute-coll-string coll)))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal end tag name, starting at: '"
- (compute-coll-string coll)
- "' end tag name: " tmp )))
- ))
-
- (#.state-readtagfirst
- ; starting to read a tag name
- (if* (eq #\/ ch)
- then (setf state state-readtag-end)
- elseif (eq #\? ch)
- then (setf state state-readtag-?)
- (setf empty-delim #\?)
- elseif (eq #\! ch)
- then (setf state state-readtag-!)
- (setf empty-delim nil)
- elseif (xml-name-start-char-p ch)
- then (setf state state-readtag)
- (setf empty-delim #\/)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal character following '<', starting at '"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag-!
- (if* (xml-name-start-char-p ch)
- then
- (setf state state-readtag-!-name)
- (un-next-char ch)
- elseif (eq #\[ ch)
- then
- (setf state state-readtag-!-conditional)
- elseif (eq #\- ch)
- then
- (setf state state-readtag-!-comment)
- else
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal character following '<!', starting at '<!"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag-!-conditional
- (if* (eq #\C ch) then
- (setf state state-readtag-!-conditional4)
- (setf special-tag-count 1)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal character following '<![', starting at '<!["
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag-!-conditional4
- (if* (not (eq (elt "CDATA[" special-tag-count) ch))
- then (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal token following '<![', starting at '<!["
- (subseq "CDATA[" 0 special-tag-count)
- (compute-coll-string coll)
- "'"))
- elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
- else (incf special-tag-count)))
-
- (#.state-readtag-!-conditional5
- (if* (eq #\] ch)
- then (setf state state-readtag-!-conditional6)
- elseif (not (xml-char-p ch)) then
- (xml-error (concatenate 'string
- "Illegal character: "
- (string ch)
- " detected in CDATA input"))
- else (add-to-coll coll ch)))
-
- (#.state-readtag-!-conditional6
- (if* (eq #\] ch)
- then (setf state state-readtag-!-conditional7)
- else (setf state state-readtag-!-conditional5)
- (add-to-coll coll #\])
- (add-to-coll coll ch)))
-
- (#.state-readtag-!-conditional7
- (if* (eq #\> ch)
- then
- (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- "CDATA cannot be constructed from entity reference/character data sequence")
- else
- (setf entity-source nil))
- (return)
- elseif (eq #\] ch) then
- (add-to-coll coll #\]) ;; come back here to check again
- else (setf state state-readtag-!-conditional5)
- (add-to-coll coll #\])
- (add-to-coll coll #\])
- (add-to-coll coll ch)))
-
- (#.state-readtag-!-comment
- (if* (eq #\- ch)
- then (setf state state-readtag-!-readcomment)
- (setf tag-to-return :comment)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal token following '<![-', starting at '<!-"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag-!-readcomment
- (if* (eq #\- ch)
- then (setf state state-readtag-!-readcomment2)
- elseif (not (xml-char-p ch)) then
- (xml-error (concatenate 'string
- "Illegal character: "
- (string ch)
- " detected in input"))
- else (add-to-coll coll ch)))
-
- (#.state-readtag-!-readcomment2
- (if* (eq #\- ch)
- then (setf state state-readtag-end-bracket)
- else (setf state state-readtag-!-readcomment)
- (add-to-coll coll #\-) (add-to-coll coll ch)))
-
- (#.state-readtag-end-bracket
- (if* (eq #\> ch)
- then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string tag-to-return)
- " tag cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal token following '--' comment terminator, starting at '--"
- (compute-coll-string coll)
- "'"))
- ))
-
- (#.state-readtag
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- else
- (if* (xml-space-p ch) then
- (setf tag-to-return-string (compute-coll-string coll))
- (setq tag-to-return
- (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (clear-coll coll)
- (setf state state-readtag2)
- elseif (eq #\> ch) then
- (setq tag-to-return
- (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (clear-coll coll)
- (return)
- elseif (eq #\/ ch) then
- (setq tag-to-return
- (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (clear-coll coll)
- (setf state state-readtag3)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal token name, starting at '"
- (compute-coll-string coll)
- "'"))
- )))
-
- (#.state-readtag2
- (if* (xml-space-p ch) then nil
- elseif (eq #\> ch) then (return)
- elseif (eq #\/ ch) then (setf state state-readtag3)
- elseif (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-readtag4)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal token, starting at '"
- (compute-coll-string coll)
- "' following element token start: " (string tag-to-return)))
- ))
-
- (#.state-readtag4
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- elseif (eq #\= ch) then
- (setq attrib-name (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (clear-coll coll)
- (let ((name (symbol-name attrib-name)))
- (when (and (>= (length name) 5)
- (string= name "xmlns" :end1 5))
- (if* (= (length name) 5)
- then
- (setf ns-token :none)
- elseif (eq (schar name 5) #\:)
- then
- (setf ns-token (subseq name 6)))))
- (setf state state-readtag5)
- elseif (xml-space-p ch) then
- (setq attrib-name (compute-tag coll *package*
- (iostruct-ns-to-package tokenbuf)))
- (clear-coll coll)
- (let ((name (symbol-name attrib-name)))
- (when (and (>= (length name) 5)
- (string= name "xmlns" :end1 5))
- (if* (= (length name) 5)
- then
- (setf ns-token :none)
- else
- (setf ns-token (subseq name 6)))))
- (setf state state-readtag12)
- else (let ((tmp (compute-coll-string coll)))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "looking for attribute '=', found: '"
- (compute-coll-string coll)
- "' following attribute name: " tmp)))
- ))
-
- (#.state-readtag12
- (if* (xml-space-p ch) then nil
- elseif (eq #\= ch) then (setf state state-readtag5)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "looking for attribute '=', found: '"
- (compute-coll-string coll)
- "' following attribute name: " (string attrib-name)))))
-
- (#.state-readtag5
- ;; begin to collect attribute value
- (if* (or (eq ch #\")
- (eq ch #\'))
- then (setq value-delim ch)
- (let* ((tag-defaults (assoc tag-to-return attlist-data))
- (this-attrib (assoc attrib-name tag-defaults)))
- (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
- (setf cdatap nil))
- )
- (setq state state-readtag6)
- elseif (xml-space-p ch) then nil
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value not delimited by ' or \" : '"
- (compute-coll-string coll)
- "' following attribute: " (string attrib-name)))
- ))
-
- (#.state-readtag6
- (let ((from-entity (and attrib-value-tokenbuf
- (eq attrib-value-tokenbuf
- (first (iostruct-entity-bufs tokenbuf))))))
- (when (not from-entity) (setf attrib-value-tokenbuf nil))
- (if* from-entity then
- (if* (eq #\newline ch) then (setf ch #\space)
- elseif (eq #\return ch) then (setf ch #\space)
- elseif (eq #\tab ch) then (setf ch #\space)
- ))
- (if* (and (not from-entity) (eq ch value-delim))
- then (setq attrib-value (compute-coll-string coll))
- (when (not cdatap)
- (setf attrib-value (normalize-attrib-value attrib-value)))
- (clear-coll coll)
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (when ns-token
- (let ((package (assoc (parse-uri attrib-value)
- (iostruct-uri-to-package tokenbuf)
- :test 'uri=)))
- (if* package then (setf package (rest package))
- else
- (setf package
- (let ((i 0) new-package)
- (loop
- (let* ((candidate (concatenate 'string
- "net.xml.namespace."
- (format nil "~s" i)))
- (exists (find-package candidate)))
- (if* exists
- then (incf i)
- else (setf new-package (make-package candidate))
- (setf (iostruct-uri-to-package tokenbuf)
- (acons (parse-uri attrib-value) new-package
- (iostruct-uri-to-package tokenbuf)))
- (return new-package)))))))
- (setf (iostruct-ns-to-package tokenbuf)
- (acons ns-token package (iostruct-ns-to-package tokenbuf)))
- )
- (if* (and (first (iostruct-ns-scope tokenbuf))
- (string= (first (first (iostruct-ns-scope tokenbuf)))
- tag-to-return-string))
- then
- (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
- else
- (push (list tag-to-return-string (list ns-token))
- (iostruct-ns-scope tokenbuf)))
- (setf ns-token nil))
- (setq state state-readtag6a)
- elseif (eq #\newline ch) then
- (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
- elseif (or (eq #\tab ch) (eq #\return ch)) then
- (add-to-coll coll #\space)
- elseif (eq #\& ch)
- then (setq state state-readtag7)
- (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
- elseif (and (xml-char-p ch) (not (eq #\< ch)))
- then (add-to-coll coll ch)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value cannot contain '<': '"
- (compute-coll-string coll)
- "' following attribute: " (string attrib-name)))
- )
- (setf last-ch ch)))
-
- (#.state-readtag6a
- (if* (xml-space-p ch) then (setf state state-readtag2)
- elseif (eq #\> ch) then (setf state state-readtag2)
- (return)
- elseif (eq #\/ ch) then (setf state state-readtag3)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal token, starting at '"
- (compute-coll-string coll)
- "' following element token start: " (string tag-to-return)))
- ))
-
- (#.state-readtag7
- (if* (eq #\# ch)
- then (setf state state-readtag8)
- elseif (xml-name-start-char-p ch)
- then (setf state state-readtag9)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value contains illegal reference name: '&"
- (compute-coll-string coll)
- "' in attribute value for: " (string attrib-name)))
- ))
-
- (#.state-readtag8
- (if* (eq #\x ch)
- then (setf state state-readtag10)
- elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
- then (setf state state-readtag11)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value contains illegal character reference code: '"
- (compute-coll-string coll)
- "' in attribute value for: " (string attrib-name)))
- ))
-
- (#.state-readtag10
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string (code-char char-code))
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state state-readtag6)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
- elseif (<= (char-code #\A) code (char-code #\F))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
- elseif (<= (char-code #\a) code (char-code #\f))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value contains illegal hexidecimal character reference code: '"
- (compute-coll-string coll)
- "' in attribute value for: " (string attrib-name)))
- )))
-
- (#.state-readtag11
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string (code-char char-code))
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state state-readtag6)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value contains illegal decimal character reference code: '"
- (compute-coll-string coll)
- "' in attribute value for: " (string attrib-name)))
- )))
-
- (#.state-readtag9
- (if* (xml-name-char-p ch)
- then (add-to-coll entity ch)
- elseif (eq #\; ch)
- then (let ((entity-symbol (compute-tag entity)))
- (clear-coll entity)
- (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
- (xml-error
- (concatenate 'string
- (string entity-symbol)
- " reference cannot be constructed from entity reference/character data sequence"))
- else
- (setf entity-source nil))
- (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
- elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
- elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
- elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
- elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
- else (let (p-value)
- (if* (and (iostruct-do-entity tokenbuf)
- (setf p-value
- (assoc entity-symbol
- (iostruct-general-entities tokenbuf)))) then
- (setf p-value (rest p-value))
- (when (member entity-symbol (iostruct-entity-names tokenbuf))
- (xml-error (concatenate 'string
- "entity:"
- (string entity-symbol)
- " in recursive reference")))
- (push entity-symbol (iostruct-entity-names tokenbuf))
- (if* (stringp p-value) then
- (add-to-entity-buf entity-symbol p-value)
- (when (not attrib-value-tokenbuf)
- (setf attrib-value-tokenbuf
- (first (iostruct-entity-bufs tokenbuf))))
- elseif (null external-callback) then
- (setf (iostruct-do-entity tokenbuf) nil)
- elseif p-value then
- (let ((entity-stream (apply external-callback p-value)))
- (if* entity-stream then
- (let ((entity-buf (get-tokenbuf)))
- (setf (tokenbuf-stream entity-buf) entity-stream)
- (unicode-check entity-stream tokenbuf)
- (push entity-buf
- (iostruct-entity-bufs tokenbuf))
- ;; check for possible external textdecl
- (let ((count 0) cch
- (string "<?xml "))
- (if* (dotimes (i (length string) t)
- (setf cch (get-next-char tokenbuf))
- (when (and (= i 5)
- (xml-space-p cch))
- (setf cch #\space))
- (when (not (eq cch
- (schar string count)))
- (return nil))
- (incf count)) then
- (setf count 5)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))
- ;; swallow <?xml token
- (swallow-xml-token
- tokenbuf
- external-callback)
- else
- (un-next-char cch)
- (decf count)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))))
- )
- else
- (xml-error (concatenate 'string
- "Reference to unparsed entity "
- (string entity-symbol)))
- ))
- )
- elseif (or (not (iostruct-seen-any-dtd tokenbuf))
- (and (iostruct-seen-any-dtd tokenbuf)
- (not (iostruct-seen-external-dtd tokenbuf))
- (not (iostruct-seen-parameter-reference tokenbuf))))
- then
- (xml-error (concatenate 'string
- (string entity-symbol)
- " must have entity declaration before being referenced"))
- ))
- ))
- (setq state state-readtag6)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "attribute value contains illegal reference name: '&"
- (compute-coll-string coll)
- "' in attribute value for: " (string attrib-name)))
- ))
-
- (#.state-readtag3
- (if* (eq #\> ch) then (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "expected '>' found '"
- (compute-coll-string coll)
- "' in element: " (string tag-to-return)))
- ))
-
- (#.state-readtag-!-name
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- else
- (when (not (xml-space-p ch))
- (xml-error (concatenate 'string
- "expecting whitespace following: '<!"
- (compute-coll-string coll)
- "' ; got: '" (string ch) "'")))
- (setq tag-to-return (compute-tag coll))
- (clear-coll coll)
- (setf state state-pre-!-contents)))
-
- (#.state-readtag-?
- (if* (xml-name-char-p ch)
- then
- (add-to-coll coll ch)
- else
- (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
- (xml-error (concatenate 'string
- "expecting name following: '<?"
- (compute-coll-string coll)
- "' ; got: '" (string ch) "'"))
- )
- (when (= (collector-next coll) 0)
- (xml-error "null <? token"))
- (if* (and (= (collector-next coll) 3)
- (eq (elt (collector-data coll) 0) #\x)
- (eq (elt (collector-data coll) 1) #\m)
- (eq (elt (collector-data coll) 2) #\l)
- )
- then
- (when (eq #\? ch) (xml-error "null <?xml token"))
- (setq tag-to-return :xml)
- (setf state state-findattributename)
- elseif (and (= (collector-next coll) 3)
- (or (eq (elt (collector-data coll) 0) #\x)
- (eq (elt (collector-data coll) 0) #\X))
- (or (eq (elt (collector-data coll) 1) #\m)
- (eq (elt (collector-data coll) 1) #\M))
- (or (eq (elt (collector-data coll) 2) #\l)
- (eq (elt (collector-data coll) 2) #\L))
- ) then
- (xml-error "<?xml tag must be all lower case")
- else
- (setq tag-to-return (compute-tag coll))
- (when (eq #\? ch) (un-next-char ch))
- (setf state state-prereadpi))
- (clear-coll coll)))
-
- (#.state-pre-!-contents
- (if* (xml-space-p ch)
- then nil
- elseif (not (xml-char-p ch))
- then (xml-error (concatenate 'string ;; no test for this...
- "illegal character '"
- (string ch)
- " following <!" (string tag-to-return)))
- elseif (eq #\> ch)
- then (return)
- else (un-next-char ch)
- (setf state state-!-contents)))
-
- (#.state-begin-dtd
- (un-next-char ch)
- (let ((val (parse-dtd tokenbuf nil external-callback)))
- (setf (iostruct-seen-any-dtd tokenbuf) t)
- (push (append (list :[) val)
- contents-to-return))
- (setf state state-!-doctype-ext3))
-
- (#.state-!-contents
- (if* (xml-name-char-p ch)
- then (add-to-coll coll ch)
- elseif (eq #\> ch)
- then (push (compute-coll-string coll) contents-to-return)
- (clear-coll coll)
- (return)
- elseif (eq #\[ ch)
- then (push (compute-tag coll) contents-to-return)
- (clear-coll coll)
- (setf state state-begin-dtd)
- elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
- ;; look at tag-to-return and set state accordingly
- then (push (compute-tag coll) contents-to-return)
- (clear-coll coll)
- (setf state state-!-doctype)
- else (xml-error
- (concatenate 'string
- "illegal name: '"
- (string tag-to-return)
- "' in <! tag: "))
- ))
-
- (#.state-!-doctype-ext
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- else
- (when (not (xml-space-p ch))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in '"
- (compute-coll-string coll)
- "' in <! tag: " (string tag-to-return) " "
- (string (first contents-to-return))
- ))
- )
- (let ((token (compute-tag coll)))
- (push token contents-to-return)
- (clear-coll coll)
- (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
- elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
- else (xml-error
- (concatenate 'string
- "expected 'SYSTEM' or 'PUBLIC' got '"
- (string (first contents-to-return))
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return))))
- )
- )))
-
- (#.state-!-doctype-public
- (if* (xml-space-p ch) then nil
- elseif (eq #\" ch) then (setf state state-!-doctype-public2)
- elseif (eq #\' ch) then (setf state state-!-doctype-public3)
- else (xml-error
- (concatenate 'string
- "expected quote or double-quote got: '"
- (string ch)
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return)) " "
- (string (first contents-to-return))
- ))
- ))
-
- (#.state-!-doctype-system
- (if* (xml-space-p ch) then nil
- elseif (eq #\" ch) then (setf state state-!-doctype-system2)
- elseif (eq #\' ch) then (setf state state-!-doctype-system3)
- else (xml-error
- (concatenate 'string
- "expected quote or double-quote got: '"
- (string ch)
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return)) " "
- (string (first contents-to-return))
- ))
- ))
-
- (#.state-!-doctype-public2
- (if* (eq #\" ch) then (push (compute-coll-string coll)
- contents-to-return)
- (clear-coll coll)
- (setf state state-!-doctype-system)
- elseif (pub-id-char-p ch) then (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in DOCTYPE PUBLIC string: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-!-doctype-public3
- (if* (eq #\' ch) then (push (compute-coll-string coll)
- contents-to-return)
- (clear-coll coll)
- (setf state state-!-doctype-system)
- elseif (pub-id-char-p ch) then (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in DOCTYPE PUBLIC string: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-!-doctype-system2
- (when (not (xml-char-p ch))
- (xml-error "XML is not well formed")) ;; not tested
- (if* (eq #\" ch) then (push (compute-coll-string coll)
- contents-to-return)
- (clear-coll coll)
- (setf state state-!-doctype-ext2)
- else (add-to-coll coll ch)))
-
- (#.state-!-doctype-system3
- (when (not (xml-char-p ch))
- (xml-error "XML is not well formed")) ;; not tested
- (if* (eq #\' ch) then (push (compute-coll-string coll)
- contents-to-return)
- (clear-coll coll)
- (setf state state-!-doctype-ext2)
- else (add-to-coll coll ch)))
-
- (#.state-!-doctype-ext2
- (if* (xml-space-p ch) then nil
- elseif (eq #\> ch) then (return)
- elseif (eq #\[ ch)
- then (setf state state-begin-dtd)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal char in DOCTYPE token: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-!-doctype-ext3
- (if* (xml-space-p ch) then nil
- elseif (eq #\> ch) then (return)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal char in DOCTYPE token following dtd: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-!-doctype
- ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
- (if* (xml-space-p ch) then nil
- elseif (xml-name-start-char-p ch)
- then
- (setf state state-!-doctype-ext)
- (un-next-char ch)
- elseif (eq #\> ch) then (return)
- elseif (eq #\[ ch)
- then (setf state state-begin-dtd)
- else (xml-error
- (concatenate 'string
- "illegal character: '"
- (string ch)
- "' in <! tag: " (string tag-to-return) " "
- (string (first contents-to-return))))
- ))
-
- (#.state-prereadpi
- (if* (xml-space-p ch)
- then nil
- elseif (not (xml-char-p ch))
- then (xml-error "XML is not well formed") ;; no test
- else (un-next-char ch)
- (setf state state-readpi)))
-
- (#.state-readpi
- (if* (eq #\? ch)
- then (setf state state-readpi2)
- elseif (not (xml-char-p ch))
- then (xml-error "XML is not well formed") ;; no test
- else (add-to-coll coll ch)))
-
- (#.state-readpi2
- (if* (eq #\> ch)
- then (return)
- elseif (eq #\? ch) then
- (add-to-coll coll #\?) ;; come back here to try again
- else (setf state state-readpi)
- (add-to-coll coll #\?)
- (add-to-coll coll ch)))
-
- (#.state-findattributename0
- (if* (xml-space-p ch) then (setf state state-findattributename)
- elseif (eq ch empty-delim)
- then (setf state state-noattributename)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "expected space or tag end before: '"
- (compute-coll-string coll) "'"))))
- (#.state-findattributename
- ;; search until we find the start of an attribute name
- ;; or the end of the tag
- (if* (eq ch empty-delim)
- then (setf state state-noattributename)
- elseif (xml-space-p ch)
- then nil ;; skip whitespace
- elseif (xml-name-start-char-p ch)
- then
- (un-next-char ch)
- (setf state state-attribname)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal char in <?xml token: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-attribname
- ;; collect attribute name
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- elseif (xml-space-p ch) then
- (setq attrib-name (compute-tag coll))
- (clear-coll coll)
- (setq state state-attribname2)
- else
- (when (not (eq #\= ch))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal char in <?xml attribute token: '"
- (compute-coll-string coll) "'"))
- )
- (setq attrib-name (compute-tag coll))
- (clear-coll coll)
- (setq state state-attribstartvalue)))
-
- (#.state-attribname2
- (if* (eq #\= ch) then (setq state state-attribstartvalue)
- elseif (xml-space-p ch) then nil
- else
- (un-next-char ch)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal char in <?xml attribute token: '"
- (compute-coll-string coll) "'"))))
- (#.state-attribstartvalue
- ;; begin to collect value
- (if* (or (eq ch #\")
- (eq ch #\'))
- then (setq value-delim ch)
- (setq state state-attribvaluedelim)
- elseif (xml-space-p ch) then nil
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "expected ' or \" before <?xml attribute token value: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-attribvaluedelim
- (if* (eq ch value-delim)
- then (setq attrib-value (compute-coll-string coll))
- (clear-coll coll)
- (push attrib-name attribs-to-return)
- (push attrib-value attribs-to-return)
- (setq state state-findattributename0)
- elseif (and (xml-char-p ch) (not (eq #\< ch)))
- then (add-to-coll coll ch)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in attribute token value: '"
- (compute-coll-string coll) "'"))
- ))
-
- (#.state-noattributename
- (if* (eq #\> ch)
- then
- (return) ;; ready to build return token
- else
- (xml-error
- (concatenate 'string
- "expected '>' found: '" (string ch) "' in <?xml token"))
- ))
-
- (t
- (error "need to support state:~s" state))
- ))
+ (setq ch (get-next-char tokenbuf))
+ (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
+ ch (char-code ch) state (iostruct-entity-names tokenbuf)))
+ (if* (null ch)
+ then (return) ; eof -- exit loop
+ )
+
+
+ (case state
+ (#.state-pcdata
+ (if* (eq ch #\<)
+ then
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ (if* (> (collector-next coll) 0)
+ then ; have collected something, return this string
+ (un-next-char ch) ; push back the <
+ (return)
+ else ; collect a tag
+ (setq state state-readtagfirst))
+ elseif (eq #\& ch)
+ then (setf state state-pcdata2)
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ (setf pcdatap nil)
+ elseif (eq #\] ch) then (setf state state-pcdata7)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in input"))
+ else
+ (add-to-coll coll ch)
+ #+ignore
+ (if* (not (eq ch #\return))
+ then (add-to-coll coll ch))))
+
+ (#.state-pcdata7
+ (if* (eq #\] ch) then (setf state state-pcdata8)
+ else (setf state state-pcdata)
+ (add-to-coll coll #\]) (un-next-char ch)))
+
+ (#.state-pcdata8
+ (if* (eq #\> ch) then
+ (add-to-coll coll #\])
+ (add-to-coll coll #\])
+ (add-to-coll coll #\>)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "content cannot contain ']]>':'"
+ (compute-coll-string coll)
+ "'"))
+ elseif (eq #\] ch) then
+ (add-to-coll coll #\])
+ else (setf state state-pcdata)
+ (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
+
+ (#.state-pcdata2
+ (if* (eq #\# ch)
+ then (setf state state-pcdata3)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-pcdata4)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal reference name, starting at: '&"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-pcdata3
+ (if* (eq #\x ch)
+ then (setf state state-pcdata5)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-pcdata6)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character reference code, starting at: '&#"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-pcdata4
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ elseif (eq #\; ch)
+ then (let ((entity-symbol (compute-tag entity)))
+ (clear-coll entity)
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
+ elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
+ elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
+ elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
+ elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
+ else
+ (let (p-value)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-general-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (add-to-entity-buf entity-symbol p-value)
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (if* entity-stream then
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ ;; check for possible external textdecl
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (swallow-xml-token
+ tokenbuf
+ external-callback)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ )
+ else
+ (xml-error (concatenate 'string
+ "Reference to unparsed entity "
+ (string entity-symbol)))
+ ))
+ )
+ elseif (or (not (iostruct-seen-any-dtd tokenbuf))
+ (iostruct-standalonep tokenbuf)
+ (and (iostruct-seen-any-dtd tokenbuf)
+ (not (iostruct-seen-external-dtd tokenbuf))
+ (not (iostruct-seen-parameter-reference tokenbuf))))
+ then
+ (xml-error (concatenate 'string
+ (string entity-symbol)
+ " must have entity declaration before being referenced"))
+ ))
+ ))
+ (setq state state-pcdata)
+ else (let ((tmp (compute-coll-string entity)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "reference not terminated by ';', starting at: '&"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+
+ (#.state-pcdata5
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (when (not (xml-char-p (code-char char-code)))
+ (xml-error
+ (concatenate 'string
+ "Character reference: "
+ (format nil "~s" char-code)
+ " (decimal) is not valid XML input character")))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-pcdata)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal hexidecimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+
+ (#.state-pcdata6
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (when (not (xml-char-p (code-char char-code)))
+ (xml-error
+ (concatenate 'string
+ "Character reference: "
+ (format nil "~s" char-code)
+ " (decimal) is not valid XML input character")))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-pcdata)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal decimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+
+ (#.state-readtag-end
+ (if* (xml-name-start-char-p ch)
+ then (setf state state-readtag-end2)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '</"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-end2
+ (if* (xml-name-char-p ch)
+ then (add-to-coll coll ch)
+ elseif (eq #\> ch) then
+ (let ((tag-string (compute-coll-string coll)))
+ (when (and (iostruct-ns-scope tokenbuf)
+ (string= tag-string
+ (first (first (iostruct-ns-scope tokenbuf)))))
+ (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
+ (setf (iostruct-ns-to-package tokenbuf)
+ (remove (assoc item (iostruct-ns-to-package tokenbuf))
+ (iostruct-ns-to-package tokenbuf))))
+ (setf (iostruct-ns-scope tokenbuf)
+ (rest (iostruct-ns-scope tokenbuf)))))
+ (setq tag-to-return (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (return)
+ elseif (xml-space-p ch) then (setf state state-readtag-end3)
+ (let ((tag-string (compute-coll-string coll)))
+ (when (and (iostruct-ns-scope tokenbuf)
+ (string= tag-string
+ (first (first (iostruct-ns-scope tokenbuf)))))
+ (setf (iostruct-ns-scope tokenbuf)
+ (rest (iostruct-ns-scope tokenbuf)))))
+ (setq tag-to-return (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '</"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+
+ (#.state-readtag-end3
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '"
+ (compute-coll-string coll)
+ "' end tag name: " tmp )))
+ ))
+
+ (#.state-readtagfirst
+ ; starting to read a tag name
+ (if* (eq #\/ ch)
+ then (setf state state-readtag-end)
+ elseif (eq #\? ch)
+ then (setf state state-readtag-?)
+ (setf empty-delim #\?)
+ elseif (eq #\! ch)
+ then (setf state state-readtag-!)
+ (setf empty-delim nil)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-readtag)
+ (setf empty-delim #\/)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<', starting at '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!
+ (if* (xml-name-start-char-p ch)
+ then
+ (setf state state-readtag-!-name)
+ (un-next-char ch)
+ elseif (eq #\[ ch)
+ then
+ (setf state state-readtag-!-conditional)
+ elseif (eq #\- ch)
+ then
+ (setf state state-readtag-!-comment)
+ else
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<!', starting at '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-conditional
+ (if* (eq #\C ch) then
+ (setf state state-readtag-!-conditional4)
+ (setf special-tag-count 1)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<![', starting at '<!["
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-conditional4
+ (if* (not (eq (elt "CDATA[" special-tag-count) ch))
+ then (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![', starting at '<!["
+ (subseq "CDATA[" 0 special-tag-count)
+ (compute-coll-string coll)
+ "'"))
+ elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
+ else (incf special-tag-count)))
+
+ (#.state-readtag-!-conditional5
+ (if* (eq #\] ch)
+ then (setf state state-readtag-!-conditional6)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in CDATA input"))
+ else (add-to-coll coll ch)))
+
+ (#.state-readtag-!-conditional6
+ (if* (eq #\] ch)
+ then (setf state state-readtag-!-conditional7)
+ else (setf state state-readtag-!-conditional5)
+ (add-to-coll coll #\])
+ (add-to-coll coll ch)))
+
+ (#.state-readtag-!-conditional7
+ (if* (eq #\> ch)
+ then
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ "CDATA cannot be constructed from entity reference/character data sequence")
+ else
+ (setf entity-source nil))
+ (return)
+ elseif (eq #\] ch) then
+ (add-to-coll coll #\]) ;; come back here to check again
+ else (setf state state-readtag-!-conditional5)
+ (add-to-coll coll #\])
+ (add-to-coll coll #\])
+ (add-to-coll coll ch)))
+
+ (#.state-readtag-!-comment
+ (if* (eq #\- ch)
+ then (setf state state-readtag-!-readcomment)
+ (setf tag-to-return :comment)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![-', starting at '<!-"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-readcomment
+ (if* (eq #\- ch)
+ then (setf state state-readtag-!-readcomment2)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in input"))
+ else (add-to-coll coll ch)))
+
+ (#.state-readtag-!-readcomment2
+ (if* (eq #\- ch)
+ then (setf state state-readtag-end-bracket)
+ else (setf state state-readtag-!-readcomment)
+ (add-to-coll coll #\-) (add-to-coll coll ch)))
+
+ (#.state-readtag-end-bracket
+ (if* (eq #\> ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string tag-to-return)
+ " tag cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '--' comment terminator, starting at '--"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (if* (xml-space-p ch) then
+ (setf tag-to-return-string (compute-coll-string coll))
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (setf state state-readtag2)
+ elseif (eq #\> ch) then
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (return)
+ elseif (eq #\/ ch) then
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (setf state state-readtag3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token name, starting at '"
+ (compute-coll-string coll)
+ "'"))
+ )))
+
+ (#.state-readtag2
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\/ ch) then (setf state state-readtag3)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-readtag4)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token, starting at '"
+ (compute-coll-string coll)
+ "' following element token start: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag4
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (eq #\= ch) then
+ (setq attrib-name (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (let ((name (symbol-name attrib-name)))
+ (when (and (>= (length name) 5)
+ (string= name "xmlns" :end1 5))
+ (if* (= (length name) 5)
+ then
+ (setf ns-token :none)
+ elseif (eq (schar name 5) #\:)
+ then
+ (setf ns-token (subseq name 6)))))
+ (setf state state-readtag5)
+ elseif (xml-space-p ch) then
+ (setq attrib-name (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (let ((name (symbol-name attrib-name)))
+ (when (and (>= (length name) 5)
+ (string= name "xmlns" :end1 5))
+ (if* (= (length name) 5)
+ then
+ (setf ns-token :none)
+ else
+ (setf ns-token (subseq name 6)))))
+ (setf state state-readtag12)
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "looking for attribute '=', found: '"
+ (compute-coll-string coll)
+ "' following attribute name: " tmp)))
+ ))
+
+ (#.state-readtag12
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\= ch) then (setf state state-readtag5)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "looking for attribute '=', found: '"
+ (compute-coll-string coll)
+ "' following attribute name: " (string attrib-name)))))
+
+ (#.state-readtag5
+ ;; begin to collect attribute value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (let* ((tag-defaults (assoc tag-to-return attlist-data))
+ (this-attrib (assoc attrib-name tag-defaults)))
+ (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
+ (setf cdatap nil))
+ )
+ (setq state state-readtag6)
+ elseif (xml-space-p ch) then nil
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value not delimited by ' or \" : '"
+ (compute-coll-string coll)
+ "' following attribute: " (string attrib-name)))
+ ))
+
+ (#.state-readtag6
+ (let ((from-entity (and attrib-value-tokenbuf
+ (eq attrib-value-tokenbuf
+ (first (iostruct-entity-bufs tokenbuf))))))
+ (when (not from-entity) (setf attrib-value-tokenbuf nil))
+ (if* from-entity then
+ (if* (eq #\newline ch) then (setf ch #\space)
+ elseif (eq #\return ch) then (setf ch #\space)
+ elseif (eq #\tab ch) then (setf ch #\space)
+ ))
+ (if* (and (not from-entity) (eq ch value-delim))
+ then (setq attrib-value (compute-coll-string coll))
+ (when (not cdatap)
+ (setf attrib-value (normalize-attrib-value attrib-value)))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (when ns-token
+ (let ((package (assoc (parse-uri attrib-value)
+ (iostruct-uri-to-package tokenbuf)
+ :test 'uri=)))
+ (if* package then (setf package (rest package))
+ else
+ (setf package
+ (let ((i 0) new-package)
+ (loop
+ (let* ((candidate (concatenate 'string
+ "net.xml.namespace."
+ (format nil "~s" i)))
+ (exists (find-package candidate)))
+ (if* exists
+ then (incf i)
+ else (setf new-package (make-package candidate))
+ (setf (iostruct-uri-to-package tokenbuf)
+ (acons (parse-uri attrib-value) new-package
+ (iostruct-uri-to-package tokenbuf)))
+ (return new-package)))))))
+ (setf (iostruct-ns-to-package tokenbuf)
+ (acons ns-token package (iostruct-ns-to-package tokenbuf)))
+ )
+ (if* (and (first (iostruct-ns-scope tokenbuf))
+ (string= (first (first (iostruct-ns-scope tokenbuf)))
+ tag-to-return-string))
+ then
+ (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
+ else
+ (push (list tag-to-return-string (list ns-token))
+ (iostruct-ns-scope tokenbuf)))
+ (setf ns-token nil))
+ (setq state state-readtag6a)
+ elseif (eq #\newline ch) then
+ (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
+ elseif (or (eq #\tab ch) (eq #\return ch)) then
+ (add-to-coll coll #\space)
+ elseif (eq #\& ch)
+ then (setq state state-readtag7)
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value cannot contain '<': '"
+ (compute-coll-string coll)
+ "' following attribute: " (string attrib-name)))
+ )
+ (setf last-ch ch)))
+
+ (#.state-readtag6a
+ (if* (xml-space-p ch) then (setf state state-readtag2)
+ elseif (eq #\> ch) then (setf state state-readtag2)
+ (return)
+ elseif (eq #\/ ch) then (setf state state-readtag3)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token, starting at '"
+ (compute-coll-string coll)
+ "' following element token start: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag7
+ (if* (eq #\# ch)
+ then (setf state state-readtag8)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-readtag9)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal reference name: '&"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag8
+ (if* (eq #\x ch)
+ then (setf state state-readtag10)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-readtag11)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag10
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-readtag6)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal hexidecimal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ )))
+
+ (#.state-readtag11
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-readtag6)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal decimal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ )))
+
+ (#.state-readtag9
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ elseif (eq #\; ch)
+ then (let ((entity-symbol (compute-tag entity)))
+ (clear-coll entity)
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
+ elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
+ elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
+ elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
+ elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
+ else (let (p-value)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-general-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (add-to-entity-buf entity-symbol p-value)
+ (when (not attrib-value-tokenbuf)
+ (setf attrib-value-tokenbuf
+ (first (iostruct-entity-bufs tokenbuf))))
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (if* entity-stream then
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ ;; check for possible external textdecl
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (swallow-xml-token
+ tokenbuf
+ external-callback)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ )
+ else
+ (xml-error (concatenate 'string
+ "Reference to unparsed entity "
+ (string entity-symbol)))
+ ))
+ )
+ elseif (or (not (iostruct-seen-any-dtd tokenbuf))
+ (and (iostruct-seen-any-dtd tokenbuf)
+ (not (iostruct-seen-external-dtd tokenbuf))
+ (not (iostruct-seen-parameter-reference tokenbuf))))
+ then
+ (xml-error (concatenate 'string
+ (string entity-symbol)
+ " must have entity declaration before being referenced"))
+ ))
+ ))
+ (setq state state-readtag6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal reference name: '&"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag3
+ (if* (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected '>' found '"
+ (compute-coll-string coll)
+ "' in element: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag-!-name
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (when (not (xml-space-p ch))
+ (xml-error (concatenate 'string
+ "expecting whitespace following: '<!"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'")))
+ (setq tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (setf state state-pre-!-contents)))
+
+ (#.state-readtag-?
+ (if* (xml-name-char-p ch)
+ then
+ (add-to-coll coll ch)
+ else
+ (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
+ (xml-error (concatenate 'string
+ "expecting name following: '<?"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'"))
+ )
+ (when (= (collector-next coll) 0)
+ (xml-error "null <? token"))
+ (if* (and (= (collector-next coll) 3)
+ (eq (elt (collector-data coll) 0) #\x)
+ (eq (elt (collector-data coll) 1) #\m)
+ (eq (elt (collector-data coll) 2) #\l)
+ )
+ then
+ (when (eq #\? ch) (xml-error "null <?xml token"))
+ (setq tag-to-return :xml)
+ (setf state state-findattributename)
+ elseif (and (= (collector-next coll) 3)
+ (or (eq (elt (collector-data coll) 0) #\x)
+ (eq (elt (collector-data coll) 0) #\X))
+ (or (eq (elt (collector-data coll) 1) #\m)
+ (eq (elt (collector-data coll) 1) #\M))
+ (or (eq (elt (collector-data coll) 2) #\l)
+ (eq (elt (collector-data coll) 2) #\L))
+ ) then
+ (xml-error "<?xml tag must be all lower case")
+ else
+ (setq tag-to-return (compute-tag coll))
+ (when (eq #\? ch) (un-next-char ch))
+ (setf state state-prereadpi))
+ (clear-coll coll)))
+
+ (#.state-pre-!-contents
+ (if* (xml-space-p ch)
+ then nil
+ elseif (not (xml-char-p ch))
+ then (xml-error (concatenate 'string ;; no test for this...
+ "illegal character '"
+ (string ch)
+ " following <!" (string tag-to-return)))
+ elseif (eq #\> ch)
+ then (return)
+ else (un-next-char ch)
+ (setf state state-!-contents)))
+
+ (#.state-begin-dtd
+ (un-next-char ch)
+ (let ((val (parse-dtd tokenbuf nil external-callback)))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (push (append (list :[) val)
+ contents-to-return))
+ (setf state state-!-doctype-ext3))
+
+ (#.state-!-contents
+ (if* (xml-name-char-p ch)
+ then (add-to-coll coll ch)
+ elseif (eq #\> ch)
+ then (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ elseif (eq #\[ ch)
+ then (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-begin-dtd)
+ elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
+ ;; look at tag-to-return and set state accordingly
+ then (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype)
+ else (xml-error
+ (concatenate 'string
+ "illegal name: '"
+ (string tag-to-return)
+ "' in <! tag: "))
+ ))
+
+ (#.state-!-doctype-ext
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (when (not (xml-space-p ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in '"
+ (compute-coll-string coll)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))
+ ))
+ )
+ (let ((token (compute-tag coll)))
+ (push token contents-to-return)
+ (clear-coll coll)
+ (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
+ elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
+ else (xml-error
+ (concatenate 'string
+ "expected 'SYSTEM' or 'PUBLIC' got '"
+ (string (first contents-to-return))
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return))))
+ )
+ )))
+
+ (#.state-!-doctype-public
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\" ch) then (setf state state-!-doctype-public2)
+ elseif (eq #\' ch) then (setf state state-!-doctype-public3)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))
+ ))
+
+ (#.state-!-doctype-system
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\" ch) then (setf state state-!-doctype-system2)
+ elseif (eq #\' ch) then (setf state state-!-doctype-system3)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))
+ ))
+
+ (#.state-!-doctype-public2
+ (if* (eq #\" ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-system)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in DOCTYPE PUBLIC string: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-public3
+ (if* (eq #\' ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-system)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in DOCTYPE PUBLIC string: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-system2
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq #\" ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-ext2)
+ else (add-to-coll coll ch)))
+
+ (#.state-!-doctype-system3
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq #\' ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-ext2)
+ else (add-to-coll coll ch)))
+
+ (#.state-!-doctype-ext2
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\[ ch)
+ then (setf state state-begin-dtd)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in DOCTYPE token: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-ext3
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in DOCTYPE token following dtd: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype
+ ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
+ (if* (xml-space-p ch) then nil
+ elseif (xml-name-start-char-p ch)
+ then
+ (setf state state-!-doctype-ext)
+ (un-next-char ch)
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\[ ch)
+ then (setf state state-begin-dtd)
+ else (xml-error
+ (concatenate 'string
+ "illegal character: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))))
+ ))
+
+ (#.state-prereadpi
+ (if* (xml-space-p ch)
+ then nil
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (un-next-char ch)
+ (setf state state-readpi)))
+
+ (#.state-readpi
+ (if* (eq #\? ch)
+ then (setf state state-readpi2)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)))
+
+ (#.state-readpi2
+ (if* (eq #\> ch)
+ then (return)
+ elseif (eq #\? ch) then
+ (add-to-coll coll #\?) ;; come back here to try again
+ else (setf state state-readpi)
+ (add-to-coll coll #\?)
+ (add-to-coll coll ch)))
+
+ (#.state-findattributename0
+ (if* (xml-space-p ch) then (setf state state-findattributename)
+ elseif (eq ch empty-delim)
+ then (setf state state-noattributename)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected space or tag end before: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-findattributename
+ ;; search until we find the start of an attribute name
+ ;; or the end of the tag
+ (if* (eq ch empty-delim)
+ then (setf state state-noattributename)
+ elseif (xml-space-p ch)
+ then nil ;; skip whitespace
+ elseif (xml-name-start-char-p ch)
+ then
+ (un-next-char ch)
+ (setf state state-attribname)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml token: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-attribname
+ ;; collect attribute name
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (xml-space-p ch) then
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribname2)
+ else
+ (when (not (eq #\= ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml attribute token: '"
+ (compute-coll-string coll) "'"))
+ )
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribstartvalue)))
+
+ (#.state-attribname2
+ (if* (eq #\= ch) then (setq state state-attribstartvalue)
+ elseif (xml-space-p ch) then nil
+ else
+ (un-next-char ch)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml attribute token: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-attribstartvalue
+ ;; begin to collect value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (setq state state-attribvaluedelim)
+ elseif (xml-space-p ch) then nil
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected ' or \" before <?xml attribute token value: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-attribvaluedelim
+ (if* (eq ch value-delim)
+ then (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattributename0)
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in attribute token value: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-noattributename
+ (if* (eq #\> ch)
+ then
+ (return) ;; ready to build return token
+ else
+ (xml-error
+ (concatenate 'string
+ "expected '>' found: '" (string ch) "' in <?xml token"))
+ ))
+
+ (t
+ (error "need to support state:~s" state))
+ ))
(put-back-collector entity)
(case state
- (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
- (put-back-collector coll)
- (if* attribs-to-return
- then (values (cons tag-to-return
- (nreverse attribs-to-return))
- (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
- else
- (values tag-to-return :start-tag :end-tag)
- ))
- (#.state-readtag-end-bracket
- ;; this is a :commant tag
- (let ((ret (compute-coll-string coll)))
- (put-back-collector coll)
- (values (cons tag-to-return (list ret)) :comment :nil)))
- (#.state-pcdata
- (let ((next-char (collector-next coll)))
- (put-back-collector coll)
- (if* (zerop next-char)
- then (values nil :eof nil)
- else (values (compute-coll-string coll) :pcdata pcdatap))))
- (#.state-readpi2
- (let ((ret (compute-coll-string coll)))
- (put-back-collector coll)
- (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
- ((#.state-readtag-!-conditional)
- (put-back-collector coll)
- (values (append (list tag-to-return) contents-to-return) :start-tag
- :end-tag))
- ((#.state-!-contents
- #.state-!-doctype
- #.state-!-doctype-ext2
- #.state-!-doctype-ext3)
- (put-back-collector coll)
- (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
- :end-tag))
- (#.state-readtag3
- (put-back-collector coll)
- (values (if* attribs-to-return
- then (cons tag-to-return
- (nreverse attribs-to-return))
- else tag-to-return) :start-tag :end-tag))
- ((#.state-readtag2
- #.state-readtag)
- (put-back-collector coll)
- (values (if* attribs-to-return
- then (cons tag-to-return
- (nreverse attribs-to-return))
- else tag-to-return) :start-tag nil))
- ((#.state-readtag-end2
- #.state-readtag-end3)
- (put-back-collector coll)
- (values tag-to-return :end-tag nil))
- (#.state-readtag-!-conditional7
- (let ((ret (compute-coll-string coll)))
- (put-back-collector coll)
- (values (append (list :cdata) (list ret)) :cdata nil)))
- (t
- ;; if ch is null that means we encountered unexpected EOF
- (when (null ch)
- (put-back-collector coll)
- (xml-error "unexpected end of input"))
- (print (list tag-to-return attribs-to-return))
- (let ((ret (compute-coll-string coll)))
- (put-back-collector coll)
- (error "need to support state <post>:~s ~s ~s ~s" state
- tag-to-return
- contents-to-return
- ret))))
+ (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
+ (put-back-collector coll)
+ (if* attribs-to-return
+ then (values (cons tag-to-return
+ (nreverse attribs-to-return))
+ (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
+ else
+ (values tag-to-return :start-tag :end-tag)
+ ))
+ (#.state-readtag-end-bracket
+ ;; this is a :commant tag
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (cons tag-to-return (list ret)) :comment :nil)))
+ (#.state-pcdata
+ (let ((next-char (collector-next coll)))
+ (put-back-collector coll)
+ (if* (zerop next-char)
+ then (values nil :eof nil)
+ else (values (compute-coll-string coll) :pcdata pcdatap))))
+ (#.state-readpi2
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
+ ((#.state-readtag-!-conditional)
+ (put-back-collector coll)
+ (values (append (list tag-to-return) contents-to-return) :start-tag
+ :end-tag))
+ ((#.state-!-contents
+ #.state-!-doctype
+ #.state-!-doctype-ext2
+ #.state-!-doctype-ext3)
+ (put-back-collector coll)
+ (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
+ :end-tag))
+ (#.state-readtag3
+ (put-back-collector coll)
+ (values (if* attribs-to-return
+ then (cons tag-to-return
+ (nreverse attribs-to-return))
+ else tag-to-return) :start-tag :end-tag))
+ ((#.state-readtag2
+ #.state-readtag)
+ (put-back-collector coll)
+ (values (if* attribs-to-return
+ then (cons tag-to-return
+ (nreverse attribs-to-return))
+ else tag-to-return) :start-tag nil))
+ ((#.state-readtag-end2
+ #.state-readtag-end3)
+ (put-back-collector coll)
+ (values tag-to-return :end-tag nil))
+ (#.state-readtag-!-conditional7
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (append (list :cdata) (list ret)) :cdata nil)))
+ (t
+ ;; if ch is null that means we encountered unexpected EOF
+ (when (null ch)
+ (put-back-collector coll)
+ (xml-error "unexpected end of input"))
+ (print (list tag-to-return attribs-to-return))
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (error "need to support state <post>:~s ~s ~s ~s" state
+ tag-to-return
+ contents-to-return
+ ret))))
)))
(defun swallow-xml-token (tokenbuf external-callback)
(declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
(let ((xml (next-token tokenbuf external-callback nil)))
(if* (and (eq (fourth xml) :standalone) (stringp (fifth xml))
- (equal (fifth xml) "yes")) then
- (xml-error "external XML entity cannot be standalone document")
+ (equal (fifth xml) "yes")) then
+ (xml-error "external XML entity cannot be standalone document")
elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml))
- (equal (seventh xml) "yes")) then
- (xml-error "external XML entity cannot be standalone document"))))
+ (equal (seventh xml) "yes")) then
+ (xml-error "external XML entity cannot be standalone document"))))
;; return the string with entity references replaced by text
;; normalizing will happen later
(if* (stringp (first value-list)) then (setf value-string (first value-list))
elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list)))
(let ((tmp-result (parse-xml
- (concatenate 'string
- "<item x='"
- value-string
- "'/>")
- :external-callback external-callback
- :general-entities
- (iostruct-general-entities tokenbuf))))
+ (concatenate 'string
+ "<item x='"
+ value-string
+ "'/>")
+ :external-callback external-callback
+ :general-entities
+ (iostruct-general-entities tokenbuf))))
(if* (stringp (first value-list)) then
- (setf (first value-list)
- (third (first (first tmp-result))))
- elseif (eq (first value-list) :FIXED) then
- (setf (second value-list)
- (third (first (first tmp-result)))))))
+ (setf (first value-list)
+ (third (first (first tmp-result))))
+ elseif (eq (first value-list) :FIXED) then
+ (setf (second value-list)
+ (third (first (first tmp-result)))))))
value-list)
(defun process-attlist (args attlist-data)
(dolist (item (rest arg1))
;;(format t "item: ~s~%" item)
(when (eq :ATTLIST (first item))
- (let* ((name (second item))
- (name-data (assoc name attlist-data))
- (new-name-data (rest name-data)))
- ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
- (dolist (attrib-data (rest (rest item)))
- ;;(format t "attrib-data: ~s~%" attrib-data)
- #+ignore
- (setf (rest (rest attrib-data))
- (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
- (when (not (assoc (first attrib-data) new-name-data))
- (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
- (if* name-data then
- (rplacd (assoc name attlist-data) (nreverse new-name-data))
- else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))
+ (let* ((name (second item))
+ (name-data (assoc name attlist-data))
+ (new-name-data (rest name-data)))
+ ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
+ (dolist (attrib-data (rest (rest item)))
+ ;;(format t "attrib-data: ~s~%" attrib-data)
+ #+ignore
+ (setf (rest (rest attrib-data))
+ (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
+ (when (not (assoc (first attrib-data) new-name-data))
+ (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
+ (if* name-data then
+ (rplacd (assoc name attlist-data) (nreverse new-name-data))
+ else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))
(provide :pxml)
(defvar *debug-dtd* nil)
(defun parse-dtd (tokenbuf
- external external-callback)
+ external external-callback)
(declare (optimize (speed 3) (safety 1)))
(let ((guts)
- (include-count 0))
+ (include-count 0))
(loop
(multiple-value-bind (val kind)
- (next-dtd-token tokenbuf
- external include-count external-callback)
- (if* (eq kind :end-dtd) then
- (return (nreverse guts))
- elseif (eq kind :include) then
- (incf include-count)
- elseif (eq kind :ignore) then nil
- elseif (eq kind :include-end) then
- (if* (> include-count 0) then (decf include-count)
- else (xml-error "unexpected ']]>' token"))
- else (when (iostruct-do-entity tokenbuf) (push val guts)))))))
+ (next-dtd-token tokenbuf
+ external include-count external-callback)
+ (if* (eq kind :end-dtd) then
+ (return (nreverse guts))
+ elseif (eq kind :include) then
+ (incf include-count)
+ elseif (eq kind :ignore) then nil
+ elseif (eq kind :include-end) then
+ (if* (> include-count 0) then (decf include-count)
+ else (xml-error "unexpected ']]>' token"))
+ else (when (iostruct-do-entity tokenbuf) (push val guts)))))))
(defparameter dtd-parser-states ())
(macrolet ((def-dtd-parser-state (var val)
- `(progn (eval-when (compile load eval) (defconstant ,var ,val))
- (pushnew '(,val . ,var) dtd-parser-states :key #'car))))
+ `(progn (eval-when (compile load eval) (defconstant ,var ,val))
+ (pushnew '(,val . ,var) dtd-parser-states :key #'car))))
(def-dtd-parser-state state-dtdstart 0)
(def-dtd-parser-state state-tokenstart 1)
(def-dtd-parser-state state-dtd-? 2)
)
(defun next-dtd-token (tokenbuf
- external include-count external-callback)
+ external include-count external-callback)
(declare #+allegro (:fbound parse-default-value)
- #+lispworks (optimize (safety 0) (debug 3))
- #-lispworks (optimize (speed 3) (safety 1)))
+ #+lispworks (optimize (safety 0) (debug 3))
+ #-lispworks (optimize (speed 3) (safety 1)))
(macrolet ((add-to-entity-buf (entity-symbol p-value)
- `(progn
- (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
- (iostruct-entity-bufs tokenbuf))))
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
+ (iostruct-entity-bufs tokenbuf))))
- (un-next-char (ch)
- `(push ,ch (iostruct-unget-char tokenbuf)))
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
- (clear-coll (coll)
- `(setf (collector-next ,coll) 0))
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
- (add-to-coll (coll ch)
- `(let ((.next. (collector-next ,coll)))
- (if* (>= .next. (collector-max ,coll))
- then (grow-and-add ,coll ,ch)
- else (setf (schar (collector-data ,coll) .next.)
- ,ch)
- (setf (collector-next ,coll) (1+ .next.)))))
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
- (to-preferred-case (ch)
- ;; should check the case mode
- `(char-downcase ,ch))
+ (to-preferred-case (ch)
+ ;; should check the case mode
+ `(char-downcase ,ch))
- )
+ )
(let ((state state-dtdstart)
- (coll (get-collector))
- (entity (get-collector))
- (tag-to-return)
- (contents-to-return)
- (pending (list nil))
- (pending-type)
- (value-delim)
- (public-string)
- (char-code 0)
- (check-count 0)
- (ignore-count 0)
- (reference-save-state)
- (prefp)
- (entityp)
- (pentityp)
- (prev-state)
- (ch))
+ (coll (get-collector))
+ (entity (get-collector))
+ (tag-to-return)
+ (contents-to-return)
+ (pending (list nil))
+ (pending-type)
+ (value-delim)
+ (public-string)
+ (char-code 0)
+ (check-count 0)
+ (ignore-count 0)
+ (reference-save-state)
+ (prefp)
+ (entityp)
+ (pentityp)
+ (prev-state)
+ (ch))
(loop
- (setq ch (get-next-char tokenbuf))
- (when *debug-dtd*
- (format t "~@<dtd ~:Ichar: ~s ~:_state: ~s ~:_contents: ~s ~:_pending: ~s ~:_pending-type: ~s ~:_entity-names ~s~:>~%"
- ch (or (cdr (assoc state dtd-parser-states)) state)
- contents-to-return pending pending-type
- (iostruct-entity-names tokenbuf)))
- (if* (null ch)
- then (setf prev-state state)
- (setf state :eof)
- (return) ;; eof -- exit loop
- )
+ (setq ch (get-next-char tokenbuf))
+ (when *debug-dtd*
+ (format t "~@<dtd ~:Ichar: ~s ~:_state: ~s ~:_contents: ~s ~:_pending: ~s ~:_pending-type: ~s ~:_entity-names ~s~:>~%"
+ ch (or (cdr (assoc state dtd-parser-states)) state)
+ contents-to-return pending pending-type
+ (iostruct-entity-names tokenbuf)))
+ (if* (null ch)
+ then (setf prev-state state)
+ (setf state :eof)
+ (return) ;; eof -- exit loop
+ )
- (case state
- (#.state-dtdstart
- (if* (and (eq #\] ch)
- external (> include-count 0)) then
- (setf state state-dtd-!-include3)
- elseif (and (eq #\] ch) (not external)) then (return)
- elseif (eq #\< ch) then (setf state state-tokenstart)
- elseif (xml-space-p ch) then nil
- elseif (eq #\% ch) then (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-include3
- (if* (eq #\] ch) then (setf state state-dtd-!-include4)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD token, starting at: ']"
- (compute-coll-string coll)
- "'"))))
- (#.state-dtd-!-include4
- (if* (eq #\> ch) then (return)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD token, starting at: ']]"
- (compute-coll-string coll)
- "'"))))
- #+ignore
- (#.state-dtd-pref
- (if* (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-dtd-pref2)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD parameter reference name, starting at: '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-tokenstart
- (if* (eq #\? ch) then (setf state state-dtd-?)
- elseif (eq #\! ch) then (setf state state-dtd-!)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-?
- (if* (xml-name-char-p ch)
- then
- (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else
- (when (not (xml-space-p ch))
- (xml-error (concatenate 'string
- "expecting name following: '<?"
- (compute-coll-string coll)
- "' ; got: '" (string ch) "'"))
- )
- (when (= (collector-next coll) 0)
- (xml-error "null <? token"))
- (if* (and (= (collector-next coll) 3)
- (or (eq (elt (collector-data coll) 0) #\X)
- (eq (elt (collector-data coll) 0) #\x))
- (or (eq (elt (collector-data coll) 1) #\M)
- (eq (elt (collector-data coll) 1) #\m))
- (or (eq (elt (collector-data coll) 2) #\L)
- (eq (elt (collector-data coll) 2) #\l)))
- then
- (xml-error "<?xml not allowed in dtd")
- else
- (setq tag-to-return (compute-tag coll))
- (setf state state-dtd-?-2))
- (clear-coll coll)))
- (#.state-dtd-?-2
- (if* (xml-space-p ch)
- then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (not (xml-char-p ch))
- then (xml-error "XML is not well formed") ;; no test
- else (add-to-coll coll ch)
- (setf state state-dtd-?-3)))
- (#.state-dtd-?-3
- (if* (eq #\? ch)
- then (setf state state-dtd-?-4)
- elseif (not (xml-char-p ch))
- then (xml-error "XML is not well formed") ;; no test
- else (add-to-coll coll ch)))
- (#.state-dtd-?-4
- (if* (eq #\> ch)
- then
- (push (compute-coll-string coll) contents-to-return)
- (clear-coll coll)
- (return)
- else (setf state state-dtd-?-3)
- (add-to-coll coll #\?)
- (add-to-coll coll ch)))
- (#.state-dtd-!
- (if* (eq #\- ch) then (setf state state-dtd-comment)
- elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-token)
- (un-next-char ch)
- elseif (and (eq #\[ ch) external) then
- (setf state state-dtd-!-cond)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-cond
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\I ch) then (setf state state-dtd-!-cond2)
- else (error "this should not happen")
- ))
- (#.state-dtd-!-cond2
- (if* (eq #\N ch) then (setf state state-dtd-!-include)
- (setf check-count 2)
- elseif (eq #\G ch) then (setf state state-dtd-!-ignore)
- (setf check-count 2)
- else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
- ))
- (#.state-dtd-!-ignore
- (if* (and (eq check-count 5) (eq ch #\E)) then
- (setf state state-dtd-!-ignore2)
- elseif (eq ch (elt "IGNORE" check-count)) then
- (incf check-count)
- else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
- ))
- (#.state-dtd-!-ignore2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\[ ch) then (setf state state-dtd-!-ignore3)
- (incf ignore-count)
- else (xml-error "'[' missing after '<![Ignore'")))
- (#.state-dtd-!-ignore3
- (if* (eq #\< ch) then (setf state state-dtd-!-ignore4)
- elseif (eq #\] ch) then (setf state state-dtd-!-ignore5)))
- (#.state-dtd-!-ignore4
- (if* (eq #\! ch) then (setf state state-dtd-!-ignore6)
- else (un-next-char ch)
- (setf state state-dtd-!-ignore3)))
- (#.state-dtd-!-ignore5
- (if* (eq #\] ch) then (setf state state-dtd-!-ignore7)
- else (un-next-char ch)
- (setf state state-dtd-!-ignore3)))
- (#.state-dtd-!-ignore6
- (if* (eq #\[ ch) then (incf ignore-count)
- (setf state state-dtd-!-ignore3)
- else (un-next-char ch)
- (setf state state-dtd-!-ignore3)))
- (#.state-dtd-!-ignore7
- (if* (eq #\> ch) then (decf ignore-count)
- (when (= ignore-count 0) (return))
- else (un-next-char ch)
- (setf state state-dtd-!-ignore3)))
- (#.state-dtd-!-include
- (if* (and (eq check-count 6) (eq ch #\E)) then
- (setf state state-dtd-!-include2)
- elseif (eq ch (elt "INCLUD" check-count)) then
- (incf check-count)
- else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
- ))
- (#.state-dtd-!-include2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\[ ch) then (return)
- else (xml-error "'[' missing after '<![INCLUDE'")))
- (#.state-dtd-comment
- (if* (eq #\- ch)
- then (setf state state-dtd-comment2)
- (setf tag-to-return :comment)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal token following '<![-', starting at '<!-"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-comment2
- (if* (eq #\- ch)
- then (setf state state-dtd-comment3)
- else (add-to-coll coll ch)))
- (#.state-dtd-comment3
- (if* (eq #\- ch)
- then (setf state state-dtd-comment4)
- else (setf state state-dtd-comment2)
- (add-to-coll coll #\-) (add-to-coll coll ch)))
- (#.state-dtd-comment4
- (if* (eq #\> ch)
- then (push (compute-coll-string coll) contents-to-return)
- (clear-coll coll)
- (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal token following '--' comment terminator, starting at '--"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-token
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (setf tag-to-return (compute-tag coll))
- (clear-coll coll)
- (if* (eq tag-to-return :ELEMENT) then (setf state state-dtd-!-element)
- elseif (eq tag-to-return :ATTLIST) then
- (setf state state-dtd-!-attlist)
- elseif (eq tag-to-return :ENTITY) then
- (setf entityp t)
- (setf state state-dtd-!-entity)
- elseif (eq tag-to-return :NOTATION) then
- (setf state state-dtd-!-notation)
- else
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!"
- (string tag-to-return)
- "'")))
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-notation
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-dtd-!-notation2)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!NOTATION "
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-notation2
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) contents-to-return)
- (clear-coll coll)
- (setf state state-dtd-!-notation3)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!NOTATION name: "
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-notation3
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-dtd-!-entity6)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!NOTATION spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity
- (if* (eq #\% ch) then (push :param contents-to-return)
- (setf pentityp t)
- (setf state state-dtd-!-entity2)
- elseif (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf pending nil)
- (setf state state-dtd-!-entity3)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!ENTITY "
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity2
- (if* (xml-space-p ch) then (setf state state-dtd-!-entity7)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity3
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) contents-to-return)
- (setf contents-to-return
- (nreverse contents-to-return))
- (clear-coll coll)
- (setf state state-dtd-!-entity4)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY name: "
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity4
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (or (eq #\' ch) (eq #\" ch)) then
- (setf value-delim ch)
- (setf state state-dtd-!-entity-value)
- elseif (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-dtd-!-entity6)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY spec: '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity6
- (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
- then
- (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else
- (when (not (xml-space-p ch))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in '"
- (compute-coll-string coll)
- "' in <! tag: " (string tag-to-return) " "
- (string (first contents-to-return))
- ))
- )
- (let ((token (compute-tag coll)))
- (push token contents-to-return)
- (clear-coll coll)
- (if* (eq :SYSTEM token) then (setf state state-!-dtd-system)
- elseif (eq :PUBLIC token) then (setf state state-!-dtd-public)
- else (xml-error
- (concatenate 'string
- "expected 'SYSTEM' or 'PUBLIC' got '"
- (string (first contents-to-return))
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return))))
- )
- )))
- (#.state-dtd-!-entity7
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-dtd-!-entity3)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY % name: "
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-!-dtd-public
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (or (eq #\" ch) (eq #\' ch)) then
- (setf state state-!-dtd-public2)
- (setf value-delim ch)
- else (xml-error
- (concatenate 'string
- "expected quote or double-quote got: '"
- (string ch)
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return)) " "
- (string (first contents-to-return))
- ))))
- (#.state-!-dtd-public2
- (if* (eq value-delim ch) then
- (push (setf public-string
- (normalize-public-value
- (compute-coll-string coll))) contents-to-return)
- (clear-coll coll)
- (setf state state-!-dtd-public3)
- elseif (pub-id-char-p ch) then (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "illegal character in string: '"
- (compute-coll-string coll) "'"))
- ))
- (#.state-!-dtd-public3
- (if* (xml-space-p ch) then (setf state state-!-dtd-system)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (and (not entityp)
- (eq #\> ch)) then
- (setf state state-!-dtd-system)
- (return)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "Expected space before: '"
- (compute-coll-string coll) "'"))
- ))
- (#.state-!-dtd-system
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (or (eq #\" ch) (eq #\' ch)) then
- (setf state state-!-dtd-system2)
- (setf value-delim ch)
- elseif (and (not entityp)
- (eq #\> ch)) then (return)
- else (xml-error
- (concatenate 'string
- "expected quote or double-quote got: '"
- (string ch)
- "' in <! tag: " (string tag-to-return) " "
- (string (second contents-to-return)) " "
- (string (first contents-to-return))
- ))))
- (#.state-!-dtd-system2
- (when (not (xml-char-p ch))
- (xml-error "XML is not well formed")) ;; not tested
- (if* (eq value-delim ch) then
- (let ((entity-symbol (first (last contents-to-return)))
- (system-string (compute-coll-string coll)))
- (if* pentityp then
- (when (not (assoc entity-symbol (iostruct-parameter-entities tokenbuf)))
- (setf (iostruct-parameter-entities tokenbuf)
- (acons entity-symbol (list (parse-uri system-string)
- tag-to-return
- public-string)
- (iostruct-parameter-entities tokenbuf)))
- )
- else
- (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
- (setf (iostruct-general-entities tokenbuf)
- (acons entity-symbol (list (parse-uri system-string)
- tag-to-return
- public-string
- )
- (iostruct-general-entities tokenbuf)))
- (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
- (setf (iostruct-general-entities tokenbuf)
- (acons entity-symbol (list (parse-uri system-string)
- tag-to-return
- public-string
- )
- (iostruct-general-entities tokenbuf))))
- )
- )
- (push system-string contents-to-return))
- (clear-coll coll)
- (setf state state-!-dtd-system3)
- else (add-to-coll coll ch)))
- (#.state-!-dtd-system3
- (if* (xml-space-p ch) then (setf state state-!-dtd-system4)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\> ch) then (return)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-!-dtd-system4
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (and (not pentityp) (xml-name-start-char-p ch)) then
- (add-to-coll coll ch)
- (setf state state-!-dtd-system5)
- elseif (eq #\> ch) then (return)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-!-dtd-system5
- (if* (xml-name-char-p ch) then
- (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (let ((token (compute-tag coll)))
- (when (not (eq :NDATA token))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- )
- (clear-coll coll)
- (push token contents-to-return)
- (setf state state-!-dtd-system6))
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-!-dtd-system6
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (add-to-coll coll ch)
- (setf state state-!-dtd-system7)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-!-dtd-system7
- (if* (xml-name-char-p ch) then
- (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) contents-to-return)
- (clear-coll coll)
- (setf state state-dtd-!-entity5) ;; just looking for space, >
- elseif (eq #\> ch) then
- (push (compute-tag coll) contents-to-return)
- (clear-coll coll)
- (return)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first (nreverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity-value
- (if* (eq ch value-delim) then
- (let ((tmp (compute-coll-string coll)))
- (when (> (length tmp) 0)
- (when (null (first pending)) (setf pending (rest pending)))
- (push tmp pending)))
- (if* (> (length pending) 1) then
- (push (nreverse pending) contents-to-return)
- else (push (first pending) contents-to-return))
- (setf pending (list nil))
- (setf state state-dtd-!-entity5)
- (clear-coll coll)
- (if* pentityp then
- (when (not (assoc (third contents-to-return)
- (iostruct-parameter-entities tokenbuf)))
- (setf (iostruct-parameter-entities tokenbuf)
- (acons (third contents-to-return)
- (first contents-to-return)
- (iostruct-parameter-entities tokenbuf))))
- else
- (when (not (assoc (second contents-to-return)
- (iostruct-general-entities tokenbuf)))
- (setf (iostruct-general-entities tokenbuf)
- (acons (second contents-to-return)
- (first contents-to-return)
- (iostruct-general-entities tokenbuf)))))
- elseif (eq #\& ch) then
- (setf reference-save-state state-dtd-!-entity-value)
- (setf state state-dtd-!-attdef-decl-value3)
- elseif (eq #\% ch) then
- (setf prefp t)
- (setf reference-save-state state-dtd-!-entity-value)
- (setf state state-dtd-!-attdef-decl-value3)
- elseif (xml-char-p ch)
- then (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ENTITY value for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-entity5
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\> ch) then (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents following <!ENTITY spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attlist
- (if* (xml-name-start-char-p ch) then (setf state state-dtd-!-attlist-name)
- (un-next-char ch)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!ATTLIST "
- (compute-coll-string coll)
- "'"))))
- (#.state-dtd-!-attlist-name
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll *package*)
- contents-to-return)
- (clear-coll coll)
- (setf state state-dtd-!-attdef)
- elseif (eq #\> ch) then
- (push (compute-tag coll *package*)
- contents-to-return)
- (clear-coll coll)
- (return)
- else (push (compute-tag coll)
- contents-to-return)
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-dtd-!-attdef-name)
- elseif (eq #\> ch) then (return)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-name
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (setf (first pending) (compute-tag coll *package*))
- (clear-coll coll)
- (setf state state-dtd-!-attdef-type)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-type
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (un-next-char ch)
- ;; let next state do all other checking
- (setf state state-dtd-!-attdef-type2)))
- (#.state-dtd-!-attdef-type2
- ;; can only be one of a few tokens, but wait until token built to check
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and (eq #\( ch) (= 0 (length (compute-coll-string coll)))) then
- (push (list :enumeration) pending)
- (setf state state-dtd-!-attdef-notation2)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (let ((token (compute-tag coll)))
- (when (and (not (eq :CDATA token))
- (not (eq :ID token))
- (not (eq :IDREF token))
- (not (eq :IDREFS token))
- (not (eq :ENTITY token))
- (not (eq :ENTITIES token))
- (not (eq :NMTOKEN token))
- (not (eq :NMTOKENS token))
- (not (eq :NOTATION token)))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (if* (eq token :NOTATION) then
- (push (list token) pending)
- (setf state state-dtd-!-attdef-notation)
- else
- (push token pending)
- (setf state state-dtd-!-attdef-decl))
- )
- (clear-coll coll)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-notation
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\( ch) then (setf state state-dtd-!-attdef-notation2)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-notation2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (setf state state-dtd-!-attdef-notation3)
- (add-to-coll coll ch)
- elseif (and (xml-name-char-p ch) (listp (first pending))
- (eq :enumeration (first (reverse (first pending))))) then
- (setf state state-dtd-!-attdef-notation3)
- (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-notation3
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-attdef-notation4)
- elseif (eq #\| ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-attdef-notation2)
- elseif (eq #\) ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (setf (first pending) (nreverse (first pending)))
- ;;(setf state state-dtd-!-attdef-decl)
- (setf state state-dtd-!-attdef-notation5)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-notation5
- (if* (xml-space-p ch) then (setf state state-dtd-!-attdef-decl)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error
- (concatenate 'string
- "Expected space before: '"
- (compute-coll-string coll) "'"))))
- (#.state-dtd-!-attdef-notation4
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-char-p ch) then (add-to-coll coll ch)
- (setf state state-dtd-!-attdef-notation3)
- elseif (eq #\| ch) then (setf state state-dtd-!-attdef-notation2)
- elseif (eq #\) ch) then (setf state state-dtd-!-attdef-decl)
- (setf (first pending) (nreverse (first pending)))
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-decl
- (if* (eq #\# ch) then
- (setf state state-dtd-!-attdef-decl-type)
- elseif (or (eq #\' ch) (eq #\" ch)) then
- (setf value-delim ch)
- (setf state state-dtd-!-attdef-decl-value)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-decl-value
- (if* (eq ch value-delim) then
- #-ignore
- (push (first (parse-default-value (list (compute-coll-string coll))
- tokenbuf external-callback))
- pending)
- #+ignore
- (push (compute-coll-string coll) pending)
- (setf contents-to-return
- (append contents-to-return
- (if* entityp then
- (nreverse pending)
- else (list (nreverse pending)))))
- (setf pending (list nil))
- (setf state state-dtd-!-attdef)
- (clear-coll coll)
- elseif (eq #\& ch) then (setf state state-dtd-!-attdef-decl-value3)
- (setf reference-save-state state-dtd-!-attdef-decl-value)
- elseif (and (xml-char-p ch) (not (eq #\< ch)))
- then (add-to-coll coll ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-decl-value3
- (if* (and (not prefp) (eq #\# ch))
- then (setf state state-dtd-!-attdef-decl-value4)
- elseif (xml-name-start-char-p ch)
- then (setf state state-dtd-!-attdef-decl-value5)
- (when (not prefp) (add-to-coll coll #\&))
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal reference name, starting at: '&"
- (compute-coll-string coll)
- "'"))))
- (#.state-dtd-!-attdef-decl-value4
- (if* (eq #\x ch)
- then (setf state state-dtd-!-attdef-decl-value6)
- elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
- then (setf state state-dtd-!-attdef-decl-value7)
- (un-next-char ch)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal character reference code, starting at: '&#"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-attdef-decl-value5
- (if* (xml-name-char-p ch)
- then (add-to-coll entity ch)
- (when (not prefp) (add-to-coll coll ch))
- elseif (eq #\; ch)
- then
- (if* (not prefp) then (add-to-coll coll ch)
- elseif (not external) then
- (xml-error
- (concatenate 'string
- "internal dtd subset cannot reference parameter entity within a token; entity: "
- (compute-coll-string entity)))
- else
- (let* ((entity-symbol (compute-tag entity))
- (p-value
- (assoc entity-symbol (iostruct-parameter-entities tokenbuf))))
- (clear-coll entity)
- (if* (and (iostruct-do-entity tokenbuf)
- (setf p-value
- (assoc entity-symbol
- (iostruct-parameter-entities tokenbuf)))) then
- (setf p-value (rest p-value))
- (when (member entity-symbol (iostruct-entity-names tokenbuf))
- (xml-error (concatenate 'string
- "entity:"
- (string entity-symbol)
- " in recursive reference")))
- (push entity-symbol (iostruct-entity-names tokenbuf))
- (if* (stringp p-value) then
- (dotimes (i (length p-value))
- (add-to-coll coll (schar p-value i)))
- elseif p-value then
- (if* (null external-callback) then
- (setf (iostruct-do-entity tokenbuf) nil)
- else
- (let ((count 0) (string "<?xml ") last-ch
- save-ch save-unget
- (tmp-count 0)
- (entity-stream
- (apply external-callback p-value)))
- (when entity-stream
- (let ((tmp-buf (get-tokenbuf)))
- (setf (tokenbuf-stream tmp-buf)
- entity-stream)
- (setf save-unget
- (iostruct-unget-char tokenbuf))
- (setf (iostruct-unget-char tokenbuf) nil)
- (unicode-check entity-stream tokenbuf)
- (when (iostruct-unget-char tokenbuf)
- (setf save-ch (first (iostruct-unget-char tokenbuf))))
- (setf (iostruct-unget-char tokenbuf) save-unget)
- (loop
- (let ((cch
- (if* save-ch
- then
- (let ((s2 save-ch))
- (setf save-ch nil)
- s2)
- else
- (next-char
- tmp-buf
- (iostruct-read-sequence-func
- tokenbuf)))))
- (when (null cch) (return))
- (when *debug-dtd*
- (format t "dtd-char: ~s~%" cch))
- (if* (< count 0) then
- (if* (and (eq last-ch #\?)
- (eq cch #\>)) then
- (setf count 6)
- else (setf last-ch cch))
- elseif (< count 6) then
- (when (and (= count 5)
- (xml-space-p cch))
- (setf cch #\space))
- (if* (not (eq cch
- (schar string count)
- )) then
- (loop
- (when (= tmp-count count)
- (return))
- (add-to-coll coll
- (schar string
- tmp-count))
- (incf tmp-count))
- (add-to-coll coll cch)
- (setf count 10)
- else (incf count))
- elseif (= count 6) then
- (dotimes (i 6)
- (add-to-coll coll (schar string i)))
- (setf count 10)
- else (add-to-coll coll cch))))
- (setf (iostruct-entity-names tokenbuf)
- (rest (iostruct-entity-names tokenbuf)))
- (close entity-stream)
- (put-back-tokenbuf tmp-buf)))))
- )
- (setf state state-dtdstart)
- else nil
- )))
- (setf state reference-save-state)
- else (let ((tmp (compute-coll-string entity)))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "reference not terminated by ';', starting at: '&"
- tmp
- (compute-coll-string coll)
- "'")))
- ))
- (#.state-dtd-!-attdef-decl-value6
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state state-dtd-!-attdef-decl-value)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
- elseif (<= (char-code #\A) code (char-code #\F))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
- elseif (<= (char-code #\a) code (char-code #\f))
- then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal hexidecimal character reference code, starting at: '"
- (compute-coll-string coll)
- "', calculated char code: "
- (format nil "~s" char-code)))
- )))
- (#.state-dtd-!-attdef-decl-value7
- (let ((code (char-code ch)))
- (if* (eq #\; ch)
- then (add-to-coll coll (code-char char-code))
- (setf char-code 0)
- (setq state reference-save-state)
- elseif (<= (char-code #\0) code (char-code #\9))
- then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal decimal character reference code, starting at: '"
- (compute-coll-string coll)
- "', calculated char code: "
- (format nil "~s" char-code)))
- )))
- (#.state-dtd-!-attdef-decl-type
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (or (xml-space-p ch) (eq #\> ch)) then
- (let ((token (compute-tag coll)))
- (when (and (not (eq :REQUIRED token))
- (not (eq :IMPLIED token))
- (not (eq :FIXED token)))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (push token pending)
- (if* (eq :FIXED token) then
- (when (eq #\> ch)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (setf state state-dtd-!-attdef-decl-value2)
- elseif (eq #\> ch) then
- (setf contents-to-return
- (append contents-to-return (list (nreverse pending))))
- (return)
- else (setf contents-to-return
- (append contents-to-return (list (nreverse pending))))
- (setf pending (list nil))
- (setf state state-dtd-!-attdef)))
- (clear-coll coll)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#. state-dtd-!-attdef-decl-value2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (or (eq #\' ch) (eq #\" ch)) then
- (setf value-delim ch)
- (setf state state-dtd-!-attdef-decl-value)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ATTLIST type spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-element-name)
- (un-next-char ch)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD characters, starting at: '<!ELEMENT "
- (compute-coll-string coll)
- "'"))))
- (#.state-dtd-!-element-name
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll)
- contents-to-return)
- (clear-coll coll)
- (setf state state-dtd-!-element-type)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT name: '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type
- (if* (eq #\( ch) then (setf state state-dtd-!-element-type-paren)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-dtd-!-element-type-token)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-dtd-!-element-type-paren-name)
- elseif (eq #\# ch) then
- (setf state state-dtd-!-element-type-paren-pcd)
- elseif (eq #\( ch) then
- (push nil pending)
- (setf state state-dtd-!-element-type-paren-choice-paren)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))))
- (#.state-dtd-!-element-type-paren2
- (if* (eq #\> ch) then
- ;; there only one name...
- (setf (first contents-to-return) (first (first contents-to-return)))
- (return)
- elseif (eq #\* ch) then
- (setf state state-dtd-!-element-type-paren-pcd5)
- (setf (first contents-to-return) (nreverse (first contents-to-return)))
- (if* (> (length (first contents-to-return)) 1) then
- (setf (first contents-to-return)
- (list (append (list :choice)
- (first contents-to-return))))
- elseif (listp (first (first contents-to-return))) then
- (setf (first contents-to-return)
- (first (first contents-to-return))))
- (push :* (first contents-to-return))
- elseif (eq #\? ch) then
- (setf state state-dtd-!-element-type-paren-pcd5)
- (setf (first contents-to-return) (nreverse (first contents-to-return)))
- (if* (> (length (first contents-to-return)) 1) then
- (setf (first contents-to-return)
- (list (append (list :choice)
- (first contents-to-return))))
- elseif (listp (first (first contents-to-return))) then
- (setf (first contents-to-return)
- (first (first contents-to-return))))
- (push :? (first contents-to-return))
- elseif (eq #\+ ch) then
- (setf state state-dtd-!-element-type-paren-pcd5)
- (setf (first contents-to-return) (nreverse (first contents-to-return)))
- (if* (> (length (first contents-to-return)) 1) then
- (setf (first contents-to-return)
- (list (append (list :choice)
- (first contents-to-return))))
- elseif (listp (first (first contents-to-return))) then
- (setf (first contents-to-return)
- (first (first contents-to-return))))
- (push :+ (first contents-to-return))
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (setf state state-dtd-!-element-type-paren-pcd5)
- (setf (first contents-to-return) (nreverse (first contents-to-return)))
- (when (> (length (first contents-to-return)) 1)
- (setf (first contents-to-return)
- (list (append (list :\choice)
- (first contents-to-return)))))
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-name
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-name2)
- elseif (eq #\? ch) then
- (push (compute-tag coll) (first pending))
- (setf (first pending)
- (list (push :? (first pending))))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-name2)
- elseif (eq #\* ch) then
- (push (compute-tag coll) (first pending))
- (setf (first pending)
- (list (push :* (first pending))))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-name2)
- elseif (eq #\+ ch) then
- (push (compute-tag coll) (first pending))
- (setf (first pending)
- (list (push :+ (first pending))))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-name2)
- elseif (eq #\) ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (if* (= (length pending) 1) then
- (push (first pending) contents-to-return)
- (setf state state-dtd-!-element-type-paren2)
- else ;; this is (xxx)
- (if* (second pending) then
- (push (first pending) (second pending))
- else (setf (second pending) (first pending)))
- (setf pending (rest pending))
- (setf state state-dtd-!-element-type-paren-choice-name3)
- )
- elseif (eq #\, ch) then
- (when (and (first pending) (not (eq :seq (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push (compute-tag coll) (first pending))
- (push :seq pending-type)
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice)
- elseif (eq #\| ch) then
- (when (and (first pending) (not (eq :choice (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push (compute-tag coll) (first pending))
- (push :choice pending-type)
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-name2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\| ch) then
- (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push :choice pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- elseif (eq #\, ch) then
- (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push :seq pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- elseif (eq #\) ch) then
- (if* (= (length pending) 1) then
- (push (list (first pending)) contents-to-return)
- (setf state state-dtd-!-element-type-paren2)
- else (setf pending (reverse (rest (reverse pending))))
- )
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
+ (case state
+ (#.state-dtdstart
+ (if* (and (eq #\] ch)
+ external (> include-count 0)) then
+ (setf state state-dtd-!-include3)
+ elseif (and (eq #\] ch) (not external)) then (return)
+ elseif (eq #\< ch) then (setf state state-tokenstart)
+ elseif (xml-space-p ch) then nil
+ elseif (eq #\% ch) then (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-include3
+ (if* (eq #\] ch) then (setf state state-dtd-!-include4)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD token, starting at: ']"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-include4
+ (if* (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD token, starting at: ']]"
+ (compute-coll-string coll)
+ "'"))))
+ #+ignore
+ (#.state-dtd-pref
+ (if* (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-pref2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD parameter reference name, starting at: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-tokenstart
+ (if* (eq #\? ch) then (setf state state-dtd-?)
+ elseif (eq #\! ch) then (setf state state-dtd-!)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-?
+ (if* (xml-name-char-p ch)
+ then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (when (not (xml-space-p ch))
+ (xml-error (concatenate 'string
+ "expecting name following: '<?"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'"))
+ )
+ (when (= (collector-next coll) 0)
+ (xml-error "null <? token"))
+ (if* (and (= (collector-next coll) 3)
+ (or (eq (elt (collector-data coll) 0) #\X)
+ (eq (elt (collector-data coll) 0) #\x))
+ (or (eq (elt (collector-data coll) 1) #\M)
+ (eq (elt (collector-data coll) 1) #\m))
+ (or (eq (elt (collector-data coll) 2) #\L)
+ (eq (elt (collector-data coll) 2) #\l)))
+ then
+ (xml-error "<?xml not allowed in dtd")
+ else
+ (setq tag-to-return (compute-tag coll))
+ (setf state state-dtd-?-2))
+ (clear-coll coll)))
+ (#.state-dtd-?-2
+ (if* (xml-space-p ch)
+ then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)
+ (setf state state-dtd-?-3)))
+ (#.state-dtd-?-3
+ (if* (eq #\? ch)
+ then (setf state state-dtd-?-4)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)))
+ (#.state-dtd-?-4
+ (if* (eq #\> ch)
+ then
+ (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (setf state state-dtd-?-3)
+ (add-to-coll coll #\?)
+ (add-to-coll coll ch)))
+ (#.state-dtd-!
+ (if* (eq #\- ch) then (setf state state-dtd-comment)
+ elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-token)
+ (un-next-char ch)
+ elseif (and (eq #\[ ch) external) then
+ (setf state state-dtd-!-cond)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-cond
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\I ch) then (setf state state-dtd-!-cond2)
+ else (error "this should not happen")
+ ))
+ (#.state-dtd-!-cond2
+ (if* (eq #\N ch) then (setf state state-dtd-!-include)
+ (setf check-count 2)
+ elseif (eq #\G ch) then (setf state state-dtd-!-ignore)
+ (setf check-count 2)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-ignore
+ (if* (and (eq check-count 5) (eq ch #\E)) then
+ (setf state state-dtd-!-ignore2)
+ elseif (eq ch (elt "IGNORE" check-count)) then
+ (incf check-count)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-ignore2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\[ ch) then (setf state state-dtd-!-ignore3)
+ (incf ignore-count)
+ else (xml-error "'[' missing after '<![Ignore'")))
+ (#.state-dtd-!-ignore3
+ (if* (eq #\< ch) then (setf state state-dtd-!-ignore4)
+ elseif (eq #\] ch) then (setf state state-dtd-!-ignore5)))
+ (#.state-dtd-!-ignore4
+ (if* (eq #\! ch) then (setf state state-dtd-!-ignore6)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore5
+ (if* (eq #\] ch) then (setf state state-dtd-!-ignore7)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore6
+ (if* (eq #\[ ch) then (incf ignore-count)
+ (setf state state-dtd-!-ignore3)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore7
+ (if* (eq #\> ch) then (decf ignore-count)
+ (when (= ignore-count 0) (return))
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-include
+ (if* (and (eq check-count 6) (eq ch #\E)) then
+ (setf state state-dtd-!-include2)
+ elseif (eq ch (elt "INCLUD" check-count)) then
+ (incf check-count)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-include2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\[ ch) then (return)
+ else (xml-error "'[' missing after '<![INCLUDE'")))
+ (#.state-dtd-comment
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment2)
+ (setf tag-to-return :comment)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![-', starting at '<!-"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-comment2
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment3)
+ else (add-to-coll coll ch)))
+ (#.state-dtd-comment3
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment4)
+ else (setf state state-dtd-comment2)
+ (add-to-coll coll #\-) (add-to-coll coll ch)))
+ (#.state-dtd-comment4
+ (if* (eq #\> ch)
+ then (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '--' comment terminator, starting at '--"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-token
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (if* (eq tag-to-return :ELEMENT) then (setf state state-dtd-!-element)
+ elseif (eq tag-to-return :ATTLIST) then
+ (setf state state-dtd-!-attlist)
+ elseif (eq tag-to-return :ENTITY) then
+ (setf entityp t)
+ (setf state state-dtd-!-entity)
+ elseif (eq tag-to-return :NOTATION) then
+ (setf state state-dtd-!-notation)
+ else
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (string tag-to-return)
+ "'")))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-notation2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!NOTATION "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation2
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-notation3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!NOTATION name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!NOTATION spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity
+ (if* (eq #\% ch) then (push :param contents-to-return)
+ (setf pentityp t)
+ (setf state state-dtd-!-entity2)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf pending nil)
+ (setf state state-dtd-!-entity3)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ENTITY "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity2
+ (if* (xml-space-p ch) then (setf state state-dtd-!-entity7)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity3
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (setf contents-to-return
+ (nreverse contents-to-return))
+ (clear-coll coll)
+ (setf state state-dtd-!-entity4)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-entity-value)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY spec: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity6
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (when (not (xml-space-p ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in '"
+ (compute-coll-string coll)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))
+ ))
+ )
+ (let ((token (compute-tag coll)))
+ (push token contents-to-return)
+ (clear-coll coll)
+ (if* (eq :SYSTEM token) then (setf state state-!-dtd-system)
+ elseif (eq :PUBLIC token) then (setf state state-!-dtd-public)
+ else (xml-error
+ (concatenate 'string
+ "expected 'SYSTEM' or 'PUBLIC' got '"
+ (string (first contents-to-return))
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return))))
+ )
+ )))
+ (#.state-dtd-!-entity7
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY % name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-public
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\" ch) (eq #\' ch)) then
+ (setf state state-!-dtd-public2)
+ (setf value-delim ch)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))))
+ (#.state-!-dtd-public2
+ (if* (eq value-delim ch) then
+ (push (setf public-string
+ (normalize-public-value
+ (compute-coll-string coll))) contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-dtd-public3)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in string: '"
+ (compute-coll-string coll) "'"))
+ ))
+ (#.state-!-dtd-public3
+ (if* (xml-space-p ch) then (setf state state-!-dtd-system)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and (not entityp)
+ (eq #\> ch)) then
+ (setf state state-!-dtd-system)
+ (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "Expected space before: '"
+ (compute-coll-string coll) "'"))
+ ))
+ (#.state-!-dtd-system
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\" ch) (eq #\' ch)) then
+ (setf state state-!-dtd-system2)
+ (setf value-delim ch)
+ elseif (and (not entityp)
+ (eq #\> ch)) then (return)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))))
+ (#.state-!-dtd-system2
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq value-delim ch) then
+ (let ((entity-symbol (first (last contents-to-return)))
+ (system-string (compute-coll-string coll)))
+ (if* pentityp then
+ (when (not (assoc entity-symbol (iostruct-parameter-entities tokenbuf)))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string)
+ (iostruct-parameter-entities tokenbuf)))
+ )
+ else
+ (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string
+ )
+ (iostruct-general-entities tokenbuf)))
+ (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string
+ )
+ (iostruct-general-entities tokenbuf))))
+ )
+ )
+ (push system-string contents-to-return))
+ (clear-coll coll)
+ (setf state state-!-dtd-system3)
+ else (add-to-coll coll ch)))
+ (#.state-!-dtd-system3
+ (if* (xml-space-p ch) then (setf state state-!-dtd-system4)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and (not pentityp) (xml-name-start-char-p ch)) then
+ (add-to-coll coll ch)
+ (setf state state-!-dtd-system5)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system5
+ (if* (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq :NDATA token))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ )
+ (clear-coll coll)
+ (push token contents-to-return)
+ (setf state state-!-dtd-system6))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system6
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-!-dtd-system7)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system7
+ (if* (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-entity5) ;; just looking for space, >
+ elseif (eq #\> ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity-value
+ (if* (eq ch value-delim) then
+ (let ((tmp (compute-coll-string coll)))
+ (when (> (length tmp) 0)
+ (when (null (first pending)) (setf pending (rest pending)))
+ (push tmp pending)))
+ (if* (> (length pending) 1) then
+ (push (nreverse pending) contents-to-return)
+ else (push (first pending) contents-to-return))
+ (setf pending (list nil))
+ (setf state state-dtd-!-entity5)
+ (clear-coll coll)
+ (if* pentityp then
+ (when (not (assoc (third contents-to-return)
+ (iostruct-parameter-entities tokenbuf)))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (acons (third contents-to-return)
+ (first contents-to-return)
+ (iostruct-parameter-entities tokenbuf))))
+ else
+ (when (not (assoc (second contents-to-return)
+ (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons (second contents-to-return)
+ (first contents-to-return)
+ (iostruct-general-entities tokenbuf)))))
+ elseif (eq #\& ch) then
+ (setf reference-save-state state-dtd-!-entity-value)
+ (setf state state-dtd-!-attdef-decl-value3)
+ elseif (eq #\% ch) then
+ (setf prefp t)
+ (setf reference-save-state state-dtd-!-entity-value)
+ (setf state state-dtd-!-attdef-decl-value3)
+ elseif (xml-char-p ch)
+ then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity5
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ENTITY spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attlist
+ (if* (xml-name-start-char-p ch) then (setf state state-dtd-!-attlist-name)
+ (un-next-char ch)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ATTLIST "
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-attlist-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll *package*)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef)
+ elseif (eq #\> ch) then
+ (push (compute-tag coll *package*)
+ contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (push (compute-tag coll)
+ contents-to-return)
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-attdef-name)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf (first pending) (compute-tag coll *package*))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-type)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-type
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (un-next-char ch)
+ ;; let next state do all other checking
+ (setf state state-dtd-!-attdef-type2)))
+ (#.state-dtd-!-attdef-type2
+ ;; can only be one of a few tokens, but wait until token built to check
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and (eq #\( ch) (= 0 (length (compute-coll-string coll)))) then
+ (push (list :enumeration) pending)
+ (setf state state-dtd-!-attdef-notation2)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (and (not (eq :CDATA token))
+ (not (eq :ID token))
+ (not (eq :IDREF token))
+ (not (eq :IDREFS token))
+ (not (eq :ENTITY token))
+ (not (eq :ENTITIES token))
+ (not (eq :NMTOKEN token))
+ (not (eq :NMTOKENS token))
+ (not (eq :NOTATION token)))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (if* (eq token :NOTATION) then
+ (push (list token) pending)
+ (setf state state-dtd-!-attdef-notation)
+ else
+ (push token pending)
+ (setf state state-dtd-!-attdef-decl))
+ )
+ (clear-coll coll)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\( ch) then (setf state state-dtd-!-attdef-notation2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (setf state state-dtd-!-attdef-notation3)
+ (add-to-coll coll ch)
+ elseif (and (xml-name-char-p ch) (listp (first pending))
+ (eq :enumeration (first (reverse (first pending))))) then
+ (setf state state-dtd-!-attdef-notation3)
+ (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation3
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-notation4)
+ elseif (eq #\| ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-notation2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf (first pending) (nreverse (first pending)))
+ ;;(setf state state-dtd-!-attdef-decl)
+ (setf state state-dtd-!-attdef-notation5)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation5
+ (if* (xml-space-p ch) then (setf state state-dtd-!-attdef-decl)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "Expected space before: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-dtd-!-attdef-notation4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-char-p ch) then (add-to-coll coll ch)
+ (setf state state-dtd-!-attdef-notation3)
+ elseif (eq #\| ch) then (setf state state-dtd-!-attdef-notation2)
+ elseif (eq #\) ch) then (setf state state-dtd-!-attdef-decl)
+ (setf (first pending) (nreverse (first pending)))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl
+ (if* (eq #\# ch) then
+ (setf state state-dtd-!-attdef-decl-type)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-attdef-decl-value)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value
+ (if* (eq ch value-delim) then
+ #-ignore
+ (push (first (parse-default-value (list (compute-coll-string coll))
+ tokenbuf external-callback))
+ pending)
+ #+ignore
+ (push (compute-coll-string coll) pending)
+ (setf contents-to-return
+ (append contents-to-return
+ (if* entityp then
+ (nreverse pending)
+ else (list (nreverse pending)))))
+ (setf pending (list nil))
+ (setf state state-dtd-!-attdef)
+ (clear-coll coll)
+ elseif (eq #\& ch) then (setf state state-dtd-!-attdef-decl-value3)
+ (setf reference-save-state state-dtd-!-attdef-decl-value)
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value3
+ (if* (and (not prefp) (eq #\# ch))
+ then (setf state state-dtd-!-attdef-decl-value4)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-dtd-!-attdef-decl-value5)
+ (when (not prefp) (add-to-coll coll #\&))
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal reference name, starting at: '&"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-attdef-decl-value4
+ (if* (eq #\x ch)
+ then (setf state state-dtd-!-attdef-decl-value6)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-dtd-!-attdef-decl-value7)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character reference code, starting at: '&#"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value5
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ (when (not prefp) (add-to-coll coll ch))
+ elseif (eq #\; ch)
+ then
+ (if* (not prefp) then (add-to-coll coll ch)
+ elseif (not external) then
+ (xml-error
+ (concatenate 'string
+ "internal dtd subset cannot reference parameter entity within a token; entity: "
+ (compute-coll-string entity)))
+ else
+ (let* ((entity-symbol (compute-tag entity))
+ (p-value
+ (assoc entity-symbol (iostruct-parameter-entities tokenbuf))))
+ (clear-coll entity)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (dotimes (i (length p-value))
+ (add-to-coll coll (schar p-value i)))
+ elseif p-value then
+ (if* (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ else
+ (let ((count 0) (string "<?xml ") last-ch
+ save-ch save-unget
+ (tmp-count 0)
+ (entity-stream
+ (apply external-callback p-value)))
+ (when entity-stream
+ (let ((tmp-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream tmp-buf)
+ entity-stream)
+ (setf save-unget
+ (iostruct-unget-char tokenbuf))
+ (setf (iostruct-unget-char tokenbuf) nil)
+ (unicode-check entity-stream tokenbuf)
+ (when (iostruct-unget-char tokenbuf)
+ (setf save-ch (first (iostruct-unget-char tokenbuf))))
+ (setf (iostruct-unget-char tokenbuf) save-unget)
+ (loop
+ (let ((cch
+ (if* save-ch
+ then
+ (let ((s2 save-ch))
+ (setf save-ch nil)
+ s2)
+ else
+ (next-char
+ tmp-buf
+ (iostruct-read-sequence-func
+ tokenbuf)))))
+ (when (null cch) (return))
+ (when *debug-dtd*
+ (format t "dtd-char: ~s~%" cch))
+ (if* (< count 0) then
+ (if* (and (eq last-ch #\?)
+ (eq cch #\>)) then
+ (setf count 6)
+ else (setf last-ch cch))
+ elseif (< count 6) then
+ (when (and (= count 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (if* (not (eq cch
+ (schar string count)
+ )) then
+ (loop
+ (when (= tmp-count count)
+ (return))
+ (add-to-coll coll
+ (schar string
+ tmp-count))
+ (incf tmp-count))
+ (add-to-coll coll cch)
+ (setf count 10)
+ else (incf count))
+ elseif (= count 6) then
+ (dotimes (i 6)
+ (add-to-coll coll (schar string i)))
+ (setf count 10)
+ else (add-to-coll coll cch))))
+ (setf (iostruct-entity-names tokenbuf)
+ (rest (iostruct-entity-names tokenbuf)))
+ (close entity-stream)
+ (put-back-tokenbuf tmp-buf)))))
+ )
+ (setf state state-dtdstart)
+ else nil
+ )))
+ (setf state reference-save-state)
+ else (let ((tmp (compute-coll-string entity)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "reference not terminated by ';', starting at: '&"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+ (#.state-dtd-!-attdef-decl-value6
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-dtd-!-attdef-decl-value)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal hexidecimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+ (#.state-dtd-!-attdef-decl-value7
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state reference-save-state)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal decimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+ (#.state-dtd-!-attdef-decl-type
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (xml-space-p ch) (eq #\> ch)) then
+ (let ((token (compute-tag coll)))
+ (when (and (not (eq :REQUIRED token))
+ (not (eq :IMPLIED token))
+ (not (eq :FIXED token)))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token pending)
+ (if* (eq :FIXED token) then
+ (when (eq #\> ch)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (setf state state-dtd-!-attdef-decl-value2)
+ elseif (eq #\> ch) then
+ (setf contents-to-return
+ (append contents-to-return (list (nreverse pending))))
+ (return)
+ else (setf contents-to-return
+ (append contents-to-return (list (nreverse pending))))
+ (setf pending (list nil))
+ (setf state state-dtd-!-attdef)))
+ (clear-coll coll)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#. state-dtd-!-attdef-decl-value2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-attdef-decl-value)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-element-name)
+ (un-next-char ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ELEMENT "
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-element-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT name: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type
+ (if* (eq #\( ch) then (setf state state-dtd-!-element-type-paren)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-token)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-name)
+ elseif (eq #\# ch) then
+ (setf state state-dtd-!-element-type-paren-pcd)
+ elseif (eq #\( ch) then
+ (push nil pending)
+ (setf state state-dtd-!-element-type-paren-choice-paren)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-element-type-paren2
+ (if* (eq #\> ch) then
+ ;; there only one name...
+ (setf (first contents-to-return) (first (first contents-to-return)))
+ (return)
+ elseif (eq #\* ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :* (first contents-to-return))
+ elseif (eq #\? ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :? (first contents-to-return))
+ elseif (eq #\+ ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :+ (first contents-to-return))
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (when (> (length (first contents-to-return)) 1)
+ (setf (first contents-to-return)
+ (list (append (list :\choice)
+ (first contents-to-return)))))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\? ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :? (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\* ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :* (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\+ ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :+ (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (if* (= (length pending) 1) then
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren2)
+ else ;; this is (xxx)
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (first pending)))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ elseif (eq #\, ch) then
+ (when (and (first pending) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (push :seq pending-type)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (first pending) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (push :choice pending-type)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-name2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\| ch) then
+ (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\, ch) then
+ (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (push (list (first pending)) contents-to-return)
+ (setf state state-dtd-!-element-type-paren2)
+ else (setf pending (reverse (rest (reverse pending))))
+ )
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
- (#.state-dtd-!-element-type-paren-choice
- (if* (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-dtd-!-element-type-paren-choice-name)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\( ch) then
- (push nil pending)
- (setf state state-dtd-!-element-type-paren-choice-paren)
- elseif (eq #\) ch) then
- (if* (= (length pending) 1) then
- (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (push (first pending) contents-to-return)
- (setf state state-dtd-!-element-type-paren3)
- else (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (if* (second pending) then
- (push (first pending) (second pending))
- else (setf (second pending) (list (first pending))))
- (setf pending (rest pending))
- (setf state state-dtd-!-element-type-paren-choice-name3)
- )
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
+ (#.state-dtd-!-element-type-paren-choice
+ (if* (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-choice-name)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\( ch) then
+ (push nil pending)
+ (setf state state-dtd-!-element-type-paren-choice-paren)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
- (#.state-dtd-!-element-type-paren-choice-paren
- (if* (xml-name-start-char-p ch) then
- (setf state state-dtd-!-element-type-paren-name)
- (un-next-char ch)
- elseif (eq #\( ch) then (push nil pending)
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-choice-name
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\? ch) then
- (push (list :? (compute-tag coll)) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\* ch) then
- (push (list :* (compute-tag coll)) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\+ ch) then
- (push (list :+ (compute-tag coll)) (first pending))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\) ch) then
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (if* (= (length pending) 1) then
- (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (push (first pending) contents-to-return)
- (setf state state-dtd-!-element-type-paren3)
- else (setf (first pending) (nreverse (first pending)))
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- (if* (second pending) then
- (push (first pending) (second pending))
- else (setf (second pending)
- ;; (list (first pending)) ;2001-03-22
- (first pending) ;2001-03-22
- ))
- (setf pending (rest pending))
- (setf state state-dtd-!-element-type-paren-choice-name3)
- )
- elseif (eq #\, ch) then
- (when (and (first pending) (not (eq :seq (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (push :seq pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- elseif (eq #\| ch) then
- (when (and (first pending) (not (eq :choice (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push (compute-tag coll) (first pending))
- (clear-coll coll)
- (push :choice pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-choice-name2
- (if* (eq #\| ch)
- ;; begin changes 2001-03-22
- then (setf state state-dtd-!-element-type-paren-choice)
- (push :choice pending-type)
- elseif (eq #\, ch)
- then (setf state state-dtd-!-element-type-paren-choice)
- (push :seq pending-type)
- ;; end changes 2001-03-22
- elseif (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\) ch) then
- (if* (= (length pending) 1) then
- (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (push (first pending) contents-to-return)
- (setf state state-dtd-!-element-type-paren3)
- else (setf (first pending) (nreverse (first pending)))
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- (if* (second pending) then
- (push (first pending) (second pending))
- else (setf (second pending) (list (first pending))))
- (setf state state-dtd-!-element-type-paren-choice-name3)
- )
- (setf pending (rest pending))
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-choice-name3
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\? ch) then
- (setf (first pending) (list :? (first pending)))
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\* ch) then
- (setf (first pending) (list :* (first pending)))
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\+ ch) then
- (setf (first pending) (list :+ (first pending)))
- (setf state state-dtd-!-element-type-paren-choice-name2)
- elseif (eq #\) ch) then
- (if* (= (length pending) 1) then
- (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (push (first pending) contents-to-return)
- (setf pending (rest pending))
- (setf state state-dtd-!-element-type-paren3)
- else (setf (first pending) (nreverse (first pending)))
- (if* (> (length (first pending)) 1) then
- (push (first pending-type) (first pending))
- (setf pending-type (rest pending-type))
- else (setf (first pending) (first (first pending))))
- (if* (second pending) then
- (push (first pending) (second pending))
- else (setf (second pending) (list (first pending))))
- (setf pending (rest pending))
- (setf state state-dtd-!-element-type-paren-choice)
- )
- elseif (eq #\, ch) then
- (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push :seq pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- elseif (eq #\| ch) then
- (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
- (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal '|' and ',' mix starting at '"
- (compute-coll-string coll)
- "'")))
- (push :choice pending-type)
- (setf state state-dtd-!-element-type-paren-choice)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren3
- (if* (eq #\+ ch) then
- (setf (first contents-to-return)
- (append (list :+) (list (first contents-to-return))))
- (setf state state-dtd-!-element-type-paren-pcd5)
- elseif (eq #\? ch) then
- (setf (first contents-to-return)
- (append (list :?) (list (first contents-to-return))))
- (setf state state-dtd-!-element-type-paren-pcd5)
- elseif (eq #\* ch) then
- (setf (first contents-to-return)
- (append (list :*) (list (first contents-to-return))))
- (setf state state-dtd-!-element-type-paren-pcd5)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (setf state state-dtd-!-element-type-paren-pcd5)
- elseif (eq #\> ch) then (return)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (let ((token (compute-tag coll)))
- (when (not (eq token :PCDATA))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (clear-coll coll)
- (push token contents-to-return))
- (setf state state-dtd-!-element-type-paren-pcd2)
- elseif (eq #\| ch) then
- (let ((token (compute-tag coll)))
- (when (not (eq token :PCDATA))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (push token contents-to-return))
- (clear-coll coll)
- (setf state state-dtd-!-element-type-paren-pcd3)
- elseif (eq #\) ch) then
- (let ((token (compute-tag coll)))
- (when (not (eq token :PCDATA))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (push token contents-to-return))
- (setf state state-dtd-!-element-type-paren-pcd4)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd2
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\) ch) then
- (setf state state-dtd-!-element-type-paren-pcd4)
- elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd3
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-name-start-char-p ch) then
- (un-next-char ch)
- (setf state state-dtd-!-element-type-paren-pcd7)
- else (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd4
- (if* (xml-space-p ch) then
- (setf state state-dtd-!-element-type-paren-pcd6)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\* ch) then
- (setf (first contents-to-return) '(:* :PCDATA))
- (setf state state-dtd-!-element-type-paren-pcd5)
- elseif (eq #\> ch) then (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents following <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd5
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\> ch) then (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents following <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd6
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\> ch) then (return)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents following <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd7
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (setf state state-dtd-!-element-type-paren-pcd8)
- (let ((token (compute-tag coll)))
- (clear-coll coll)
- (if* (listp (first contents-to-return)) then
- (push token (first contents-to-return))
- else (setf (first contents-to-return)
- (list token (first contents-to-return)))))
- elseif (eq #\) ch) then
- (setf state state-dtd-!-element-type-paren-pcd9)
- (let ((token (compute-tag coll)))
- (clear-coll coll)
- (if* (listp (first contents-to-return)) then
- (push token (first contents-to-return))
- else (setf (first contents-to-return)
- (list token (first contents-to-return)))))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents in <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd8
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
- elseif (eq #\) ch) then (setf state state-dtd-!-element-type-paren-pcd9)
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents in <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-paren-pcd9
- (if* (eq #\* ch) then (setf state state-dtd-!-element-type-paren-pcd5)
- (setf (first contents-to-return) (nreverse (first contents-to-return)))
- (when (> (length (first contents-to-return)) 1)
- (setf (first contents-to-return)
- (list (append (list :choice)
- (first contents-to-return)))))
- (push :* (first contents-to-return))
- else (clear-coll coll)
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "illegal DTD contents in <!ELEMENT content spec for "
- (string (first (reverse contents-to-return)))
- ": '"
- (compute-coll-string coll)
- "'"))
- ))
- (#.state-dtd-!-element-type-token
- (if* (xml-name-char-p ch) then (add-to-coll coll ch)
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (xml-space-p ch) then
- (let ((token (compute-tag coll)))
- (when (not (or (eq token :EMPTY) (eq token :ANY)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (push token contents-to-return)
- (setf state state-dtd-!-element-type-end))
- elseif (eq #\> ch) then
- (let ((token (compute-tag coll)))
- (when (not (or (eq token :EMPTY) (eq token :ANY)))
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'")))
- (push token contents-to-return)
- (return))
- else (add-to-coll coll ch)
- (xml-error (concatenate 'string
- "illegal DTD <!ELEMENT content spec for "
- (string (first contents-to-return))
- ": '"
- (compute-coll-string coll)
- "'"))
- )
- )
- (#.state-dtd-!-element-type-end
- (if* (xml-space-p ch) then nil
- elseif (and external (eq #\% ch)) then
- (external-param-reference tokenbuf coll external-callback)
- elseif (eq #\> ch) then (return)
- else (xml-error (concatenate 'string
- "expected '>', got '"
- (string ch)
- "' in DTD <! ELEMENT "
- (string (first contents-to-return))
- " for "
- (string (second contents-to-return))))
- ))
- (t
- (error "need to support dtd state:~s" state))))
+ (#.state-dtd-!-element-type-paren-choice-paren
+ (if* (xml-name-start-char-p ch) then
+ (setf state state-dtd-!-element-type-paren-name)
+ (un-next-char ch)
+ elseif (eq #\( ch) then (push nil pending)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\? ch) then
+ (push (list :? (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\* ch) then
+ (push (list :* (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\+ ch) then
+ (push (list :+ (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending)
+ ;; (list (first pending)) ;2001-03-22
+ (first pending) ;2001-03-22
+ ))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ elseif (eq #\, ch) then
+ (when (and (first pending) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (first pending) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name2
+ (if* (eq #\| ch)
+ ;; begin changes 2001-03-22
+ then (setf state state-dtd-!-element-type-paren-choice)
+ (push :choice pending-type)
+ elseif (eq #\, ch)
+ then (setf state state-dtd-!-element-type-paren-choice)
+ (push :seq pending-type)
+ ;; end changes 2001-03-22
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ (setf pending (rest pending))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\? ch) then
+ (setf (first pending) (list :? (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\* ch) then
+ (setf (first pending) (list :* (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\+ ch) then
+ (setf (first pending) (list :+ (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice)
+ )
+ elseif (eq #\, ch) then
+ (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren3
+ (if* (eq #\+ ch) then
+ (setf (first contents-to-return)
+ (append (list :+) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\? ch) then
+ (setf (first contents-to-return)
+ (append (list :?) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\* ch) then
+ (setf (first contents-to-return)
+ (append (list :*) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (clear-coll coll)
+ (push token contents-to-return))
+ (setf state state-dtd-!-element-type-paren-pcd2)
+ elseif (eq #\| ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-pcd3)
+ elseif (eq #\) ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return))
+ (setf state state-dtd-!-element-type-paren-pcd4)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\) ch) then
+ (setf state state-dtd-!-element-type-paren-pcd4)
+ elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-pcd7)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd4
+ (if* (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd6)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\* ch) then
+ (setf (first contents-to-return) '(:* :PCDATA))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd5
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd6
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd7
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd8)
+ (let ((token (compute-tag coll)))
+ (clear-coll coll)
+ (if* (listp (first contents-to-return)) then
+ (push token (first contents-to-return))
+ else (setf (first contents-to-return)
+ (list token (first contents-to-return)))))
+ elseif (eq #\) ch) then
+ (setf state state-dtd-!-element-type-paren-pcd9)
+ (let ((token (compute-tag coll)))
+ (clear-coll coll)
+ (if* (listp (first contents-to-return)) then
+ (push token (first contents-to-return))
+ else (setf (first contents-to-return)
+ (list token (first contents-to-return)))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd8
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
+ elseif (eq #\) ch) then (setf state state-dtd-!-element-type-paren-pcd9)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd9
+ (if* (eq #\* ch) then (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (when (> (length (first contents-to-return)) 1)
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return)))))
+ (push :* (first contents-to-return))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-token
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (or (eq token :EMPTY) (eq token :ANY)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return)
+ (setf state state-dtd-!-element-type-end))
+ elseif (eq #\> ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (or (eq token :EMPTY) (eq token :ANY)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return)
+ (return))
+ else (add-to-coll coll ch)
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ )
+ )
+ (#.state-dtd-!-element-type-end
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (xml-error (concatenate 'string
+ "expected '>', got '"
+ (string ch)
+ "' in DTD <! ELEMENT "
+ (string (first contents-to-return))
+ " for "
+ (string (second contents-to-return))))
+ ))
+ (t
+ (error "need to support dtd state:~s" state))))
(put-back-collector entity)
(put-back-collector coll)
(case state
- (#.state-dtdstart
- (when (and (null ch) (not external))
- (xml-error "unexpected end of input while parsing DTD"))
- (if* (null tag-to-return) then (values nil :end-dtd)
- else (error "process other return state")))
- ((#.state-dtd-!-element-type-end #.state-dtd-!-element-type-token
- #.state-dtd-!-element-type-paren-pcd4 #.state-dtd-!-element-type-paren-pcd6
- #.state-dtd-!-element-type-paren-pcd5 #.state-dtd-!-element-type-paren2
- #.state-dtd-!-element-type-paren3)
- (values (append (list tag-to-return) (nreverse contents-to-return))
- nil))
- ((#.state-dtd-!-attdef-decl-type #.state-dtd-!-attlist-name
- #.state-dtd-!-attdef)
- (values (append (list tag-to-return) contents-to-return)
- nil))
- ((#.state-dtd-!-entity5 #.state-!-dtd-system3
- #.state-!-dtd-system7 #.state-!-dtd-system4
- #.state-!-dtd-system ;; this is actually a !NOTATION
- #.state-dtd-?-4 ;; PI
- #.state-dtd-comment4 ;; comment
- )
- (let ((ret (append (list tag-to-return) (nreverse contents-to-return))))
- (values ret
- nil)))
- #+ignore
- (#.state-dtd-pref2
- (values (nreverse contents-to-return) nil))
- (#.state-dtd-!-include2
- (values nil :include))
- (#.state-dtd-!-include4
- (values nil :include-end))
- (#.state-dtd-!-ignore7
- (values nil :ignore))
- (:eof
- (if* (not external) then
- (xml-error "unexpected end of input while processing DTD internal subset")
- elseif (or (> include-count 0) (not (eq prev-state state-dtdstart))) then
- (xml-error "unexpected end of input while processing external DTD"))
- (values nil :end-dtd))
- (t
- (print (list tag-to-return contents-to-return))
- (error "need to support dtd <post> state:~s" state)))
+ (#.state-dtdstart
+ (when (and (null ch) (not external))
+ (xml-error "unexpected end of input while parsing DTD"))
+ (if* (null tag-to-return) then (values nil :end-dtd)
+ else (error "process other return state")))
+ ((#.state-dtd-!-element-type-end #.state-dtd-!-element-type-token
+ #.state-dtd-!-element-type-paren-pcd4 #.state-dtd-!-element-type-paren-pcd6
+ #.state-dtd-!-element-type-paren-pcd5 #.state-dtd-!-element-type-paren2
+ #.state-dtd-!-element-type-paren3)
+ (values (append (list tag-to-return) (nreverse contents-to-return))
+ nil))
+ ((#.state-dtd-!-attdef-decl-type #.state-dtd-!-attlist-name
+ #.state-dtd-!-attdef)
+ (values (append (list tag-to-return) contents-to-return)
+ nil))
+ ((#.state-dtd-!-entity5 #.state-!-dtd-system3
+ #.state-!-dtd-system7 #.state-!-dtd-system4
+ #.state-!-dtd-system ;; this is actually a !NOTATION
+ #.state-dtd-?-4 ;; PI
+ #.state-dtd-comment4 ;; comment
+ )
+ (let ((ret (append (list tag-to-return) (nreverse contents-to-return))))
+ (values ret
+ nil)))
+ #+ignore
+ (#.state-dtd-pref2
+ (values (nreverse contents-to-return) nil))
+ (#.state-dtd-!-include2
+ (values nil :include))
+ (#.state-dtd-!-include4
+ (values nil :include-end))
+ (#.state-dtd-!-ignore7
+ (values nil :ignore))
+ (:eof
+ (if* (not external) then
+ (xml-error "unexpected end of input while processing DTD internal subset")
+ elseif (or (> include-count 0) (not (eq prev-state state-dtdstart))) then
+ (xml-error "unexpected end of input while processing external DTD"))
+ (values nil :end-dtd))
+ (t
+ (print (list tag-to-return contents-to-return))
+ (error "need to support dtd <post> state:~s" state)))
)
))
(defun external-param-reference (tokenbuf old-coll external-callback)
(declare #+allegro (:fbound next-token)
- #+lispworks (optimize (safety 0) (debug 3))
- (ignorable old-coll)
- #-lispworks (optimize (speed 3) (safety 1)))
+ #+lispworks (optimize (safety 0) (debug 3))
+ (ignorable old-coll)
+ #-lispworks (optimize (speed 3) (safety 1)))
(setf (iostruct-seen-parameter-reference tokenbuf) t)
(macrolet ((add-to-entity-buf (entity-symbol p-value)
- `(progn
- (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
- (iostruct-entity-bufs tokenbuf))))
- (clear-coll (coll)
- `(setf (collector-next ,coll) 0))
- (un-next-char (ch)
- `(push ,ch (iostruct-unget-char tokenbuf)))
- (add-to-coll (coll ch)
- `(let ((.next. (collector-next ,coll)))
- (if* (>= .next. (collector-max ,coll))
- then (grow-and-add ,coll ,ch)
- else (setf (schar (collector-data ,coll) .next.)
- ,ch)
- (setf (collector-next ,coll) (1+ .next.))))))
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
+ (iostruct-entity-bufs tokenbuf))))
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.))))))
(let ((ch (get-next-char tokenbuf))
- (coll (get-collector))
- p-value entity-symbol)
+ (coll (get-collector))
+ p-value entity-symbol)
(add-to-coll coll ch)
(when (not (xml-name-start-char-p ch))
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "Illegal DTD parameter entity name starting at: "
- (compute-coll-string coll))))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "Illegal DTD parameter entity name starting at: "
+ (compute-coll-string coll))))
(loop
- (setf ch (get-next-char tokenbuf))
- (if* (eq #\; ch) then
- (setf entity-symbol (compute-tag coll))
- (clear-coll coll)
- #+ignore (format t "entity symbol: ~s entities: ~s match: ~s~%"
- entity-symbol (iostruct-parameter-entities tokenbuf)
- (assoc entity-symbol
- (iostruct-parameter-entities tokenbuf)))
- (if* (and (iostruct-do-entity tokenbuf)
- (setf p-value
- (assoc entity-symbol
- (iostruct-parameter-entities tokenbuf)))) then
- (setf p-value (rest p-value))
- (when (member entity-symbol (iostruct-entity-names tokenbuf))
- (xml-error (concatenate 'string
- "entity:"
- (string entity-symbol)
- " in recursive reference")))
- (push entity-symbol (iostruct-entity-names tokenbuf))
- (if* (stringp p-value) then
- (setf p-value (concatenate 'string " " p-value " "))
- (add-to-entity-buf entity-symbol p-value)
- elseif (null external-callback) then
- (setf (iostruct-do-entity tokenbuf) nil)
- elseif p-value then
- (let ((entity-stream (apply external-callback p-value)))
- (when entity-stream
- (let ((entity-buf (get-tokenbuf)))
- (setf (tokenbuf-stream entity-buf) entity-stream)
- (unicode-check entity-stream tokenbuf)
- (add-to-entity-buf entity-symbol " ")
- (push entity-buf
- (iostruct-entity-bufs tokenbuf))
- (let ((count 0) cch
- (string "<?xml "))
- (if* (dotimes (i (length string) t)
- (setf cch (get-next-char tokenbuf))
- (when (and (= i 5)
- (xml-space-p cch))
- (setf cch #\space))
- (when (not (eq cch
- (schar string count)))
- (return nil))
- (incf count)) then
- (setf count 5)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))
- ;; swallow <?xml token
- (next-token tokenbuf external-callback nil)
- else
- (un-next-char cch)
- (decf count)
- (loop
- (when (< count 0) (return))
- (un-next-char (schar string count))
- (decf count))))
- (push #\space (iostruct-unget-char tokenbuf))
- )
- )))
- else (xml-error
- (concatenate 'string
- (string entity-symbol)
- " parameter entity referenced but not declared"))
- )
- (put-back-collector coll)
- (return)
- elseif (xml-name-char-p ch) then (add-to-coll coll ch)
- else
- (dotimes (i 15)
- (add-to-coll coll ch)
- (setq ch (get-next-char tokenbuf))
- (if* (null ch)
- then (return)))
- (xml-error (concatenate 'string
- "Illegal DTD parameter entity name stating at: "
- (compute-coll-string coll))))))))
+ (setf ch (get-next-char tokenbuf))
+ (if* (eq #\; ch) then
+ (setf entity-symbol (compute-tag coll))
+ (clear-coll coll)
+ #+ignore (format t "entity symbol: ~s entities: ~s match: ~s~%"
+ entity-symbol (iostruct-parameter-entities tokenbuf)
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (setf p-value (concatenate 'string " " p-value " "))
+ (add-to-entity-buf entity-symbol p-value)
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (when entity-stream
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (add-to-entity-buf entity-symbol " ")
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (next-token tokenbuf external-callback nil)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ (push #\space (iostruct-unget-char tokenbuf))
+ )
+ )))
+ else (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " parameter entity referenced but not declared"))
+ )
+ (put-back-collector coll)
+ (return)
+ elseif (xml-name-char-p ch) then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "Illegal DTD parameter entity name stating at: "
+ (compute-coll-string coll))))))))