From 2e566ae3baa533146fbdb77af653adfda5356b76 Mon Sep 17 00:00:00 2001 From: "Kevin M. Rosenberg" Date: Fri, 31 Aug 2007 18:04:31 +0000 Subject: [PATCH] r11859: Canonicalize whitespace --- build.cl | 32 +- phtml-test.cl | 318 ++-- phtml.cl | 2136 +++++++++++----------- pxml-test.cl | 42 +- pxml0.cl | 270 +-- pxml1.cl | 498 +++--- pxml2.cl | 3806 +++++++++++++++++++-------------------- pxml3.cl | 4706 ++++++++++++++++++++++++------------------------- 8 files changed, 5904 insertions(+), 5904 deletions(-) diff --git a/build.cl b/build.cl index 8958fe5..5a42ca1 100644 --- a/build.cl +++ b/build.cl @@ -2,27 +2,27 @@ (in-package :user) -(let ((filenames +(let ((filenames (list - "pxml0" - "pxml1" - "pxml3" - "pxml2"))) + "pxml0" + "pxml1" + "pxml3" + "pxml2"))) (dolist (f filenames) (compile-file-if-needed (concatenate 'string f ".cl")) (load (concatenate 'string f ".fasl"))) - + (with-open-file (out "pxml.fasl" - :element-type '(unsigned-byte 8) - :direction :output - :if-exists :supersede - :if-does-not-exist :create) + :element-type '(unsigned-byte 8) + :direction :output + :if-exists :supersede + :if-does-not-exist :create) (dolist (file filenames) (with-open-file (in (concatenate 'string file ".fasl") - :element-type '(unsigned-byte 8)) + :element-type '(unsigned-byte 8)) (format t "~%; ~s" file) - (let ((buf (make-array 2048 :element-type '(unsigned-byte 8)))) - (loop as x = (read-sequence buf in) - until (= x 0) - do (write-sequence buf out :end x))))))) - + (let ((buf (make-array 2048 :element-type '(unsigned-byte 8)))) + (loop as x = (read-sequence buf in) + until (= x 0) + do (write-sequence buf out :end x))))))) + diff --git a/phtml-test.cl b/phtml-test.cl index 8b47684..d85a84f 100644 --- a/phtml-test.cl +++ b/phtml-test.cl @@ -1,8 +1,8 @@ -;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA +;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA ;; ;; This code is free software; you can redistribute it and/or ;; modify it under the terms of the version 2.1 of -;; the GNU Lesser General Public License as published by +;; the GNU Lesser General Public License as published by ;; the Free Software Foundation, as clarified by the AllegroServe ;; prequel found in license-allegroserve.txt. ;; @@ -11,11 +11,11 @@ ;; merchantability or fitness for a particular purpose. See the GNU ;; Lesser General Public License for more details. ;; -;; Version 2.1 of the GNU Lesser General Public License is in the file +;; Version 2.1 of the GNU Lesser General Public License is in the file ;; license-lgpl.txt that was distributed with this file. ;; If it is not present, you can access it from ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer -;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, +;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; @@ -41,7 +41,7 @@ - this is some title text + this is some title text this is some body text with some text @@ -95,59 +95,59 @@ '((:html (:comment "this should be

one

string") (:head - (:style "this should be

one

string") - (:title "this is some title text")) - (:body - "this is some body text" + (:style "this should be

one

string") + (:title "this is some title text")) + (:body + "this is some body text" ((:a :name "this is an anchor") "with some text") - (:comment "testing allowing looser attribute parsing") - ((:a :href "mailto:lmcelroy@performigence.com") - "lmcelroy@performigence.com") - :br - "this is some more text" - (:bogus "tests parser 'looseness'") - (:select - (:option "1") - (:option "2")) - (:ul - (:li "item 1") - (:li "item 2")) - (:dl - (:dt "a term") - (:dd "its definition") - (:dt "another term") - (:dd "another definition")) - (:table - (:colgroup - ((:col :align "right")) - ((:col :align "center"))) - (:thead - (:tr - (:th "this cell is aligned right") - (:th "this cell is centered"))) - (:tfoot - (:tr - (:th "this cell is aligned right") - (:th "this cell is centered"))) - (:tbody - (:tr - (:td "this cell is aligned right") - (:td "this cell is centered"))) - (:tbody - (:tr - (:td "this cell is aligned right") - (:td "this cell is centered")))) - (:pp - (:object - (:pp "Navigate the site:" - ((:map :name "mainmap") - ((:area :shape "rect" :coords "0,100,100,200")) - ((:area :shape "rect" :coords "100,100,100,200")))))) - (:abbr "WWW") - "is an abbreviation" - (:b "force") - (:pp "whitespace only") - )))) + (:comment "testing allowing looser attribute parsing") + ((:a :href "mailto:lmcelroy@performigence.com") + "lmcelroy@performigence.com") + :br + "this is some more text" + (:bogus "tests parser 'looseness'") + (:select + (:option "1") + (:option "2")) + (:ul + (:li "item 1") + (:li "item 2")) + (:dl + (:dt "a term") + (:dd "its definition") + (:dt "another term") + (:dd "another definition")) + (:table + (:colgroup + ((:col :align "right")) + ((:col :align "center"))) + (:thead + (:tr + (:th "this cell is aligned right") + (:th "this cell is centered"))) + (:tfoot + (:tr + (:th "this cell is aligned right") + (:th "this cell is centered"))) + (:tbody + (:tr + (:td "this cell is aligned right") + (:td "this cell is centered"))) + (:tbody + (:tr + (:td "this cell is aligned right") + (:td "this cell is centered")))) + (:pp + (:object + (:pp "Navigate the site:" + ((:map :name "mainmap") + ((:area :shape "rect" :coords "0,100,100,200")) + ((:area :shape "rect" :coords "100,100,100,200")))))) + (:abbr "WWW") + "is an abbreviation" + (:b "force") + (:pp "whitespace only") + )))) (setf *test-string2* "text more text @@ -186,7 +186,7 @@ (setf *test-string3* " - @@ -204,10 +204,10 @@ vlink='4' link='6'> ((:table :border "0" :cellspacing "0" :cellpadding "0") (:tr ((:td :bgcolor "0" :rowspan "4" :width "126" :align "left" :valign "center") - ((:nyt_ad :version "1.0" :location "") - ((:a :href "ads.gif" :target "top") - ((:img :src "http://ads2.gif" :border "0" :width "120" :height "90" :alt - "E-Mail Updates from NYTimes.com")))))))))) + ((:nyt_ad :version "1.0" :location "") + ((:a :href "ads.gif" :target "top") + ((:img :src "http://ads2.gif" :border "0" :width "120" :height "90" :alt + "E-Mail Updates from NYTimes.com")))))))))) (defmethod lhtml-equal ((a t) (b t)) @@ -218,70 +218,70 @@ vlink='4' link='6'> (loop (if* (and (= i (length a)) (= j (length b))) then (return t) elseif (and (< i (length a)) (white-space-p (nth i a))) then - (incf i) + (incf i) elseif (white-space-p (nth j b)) then - (incf j) + (incf j) elseif (and (= i (length a)) (/= j (length b))) then - (return - (loop - (when (= j (length b)) (return t)) - (when (not (white-space-p (nth j b))) (return nil)) - (incf j))) + (return + (loop + (when (= j (length b)) (return t)) + (when (not (white-space-p (nth j b))) (return nil)) + (incf j))) elseif (and (/= i (length a)) (= j (length b))) then - (return - (loop - (when (= i (length a)) (return t)) - (when (not (white-space-p (nth i a))) (return nil)) - (incf i))) + (return + (loop + (when (= i (length a)) (return t)) + (when (not (white-space-p (nth i a))) (return nil)) + (incf i))) elseif (not (lhtml-equal (nth i a) (nth j b))) then - (return nil) - else - (incf i) - (incf j))))) + (return nil) + else + (incf i) + (incf j))))) (defmethod lhtml-equal ((a string) (b string)) (let ((i 0) (j 0)) ;; skip white space in beginning (loop (let ((char (elt a i))) - (when (and (not (eq char #\space)) - (not (eq char #\tab)) - (not (eq char #\return)) - (not (eq char #\linefeed))) - (return))) + (when (and (not (eq char #\space)) + (not (eq char #\tab)) + (not (eq char #\return)) + (not (eq char #\linefeed))) + (return))) (incf i)) (loop (let ((char (elt b j))) - (when (and (not (eq char #\space)) - (not (eq char #\tab)) - (not (eq char #\return)) - (not (eq char #\linefeed))) - (return))) + (when (and (not (eq char #\space)) + (not (eq char #\tab)) + (not (eq char #\return)) + (not (eq char #\linefeed))) + (return))) (incf j)) (loop (when (and (= i (length a)) (= j (length b))) (return t)) (when (and (= i (length a)) (/= j (length b))) - (return - (loop - (when (= j (length b)) (return t)) - (let ((char (elt b j))) - (when (and (not (eq char #\space)) - (not (eq char #\tab)) - (not (eq char #\return)) - (not (eq char #\linefeed))) - (return t))) - (incf j)))) + (return + (loop + (when (= j (length b)) (return t)) + (let ((char (elt b j))) + (when (and (not (eq char #\space)) + (not (eq char #\tab)) + (not (eq char #\return)) + (not (eq char #\linefeed))) + (return t))) + (incf j)))) (when (and (/= i (length a)) (= j (length b))) - (return - (loop - (when (= i (length a)) (return t)) - (let ((char (elt a i))) - (when (and (not (eq char #\space)) - (not (eq char #\tab)) - (not (eq char #\return)) - (not (eq char #\linefeed))) - (return t))) - (incf i)))) + (return + (loop + (when (= i (length a)) (return t)) + (let ((char (elt a i))) + (when (and (not (eq char #\space)) + (not (eq char #\tab)) + (not (eq char #\return)) + (not (eq char #\linefeed))) + (return t))) + (incf i)))) (when (not (eq (elt a i) (elt b j))) (return nil)) (incf i) (incf j)))) @@ -291,15 +291,15 @@ vlink='4' link='6'> (defmethod white-space-p ((a string)) (let ((i 0) - (length (length a))) + (length (length a))) (loop (when (= i length) (return t)) (let ((char (elt a i))) - (when (and (not (eq char #\space)) - (not (eq char #\tab)) - (not (eq char #\return)) - (not (eq char #\linefeed))) - (return nil))) + (when (and (not (eq char #\space)) + (not (eq char #\tab)) + (not (eq char #\return)) + (not (eq char #\linefeed))) + (return nil))) (incf i)))) ;;------------------------------------------------ @@ -313,16 +313,16 @@ vlink='4' link='6'> (incf *callback-called*) (if* (= *pass* 0) then - (incf *pass*) - (test t (lhtml-equal arg - '((:a :name "this is an anchor") - "with some text"))) + (incf *pass*) + (test t (lhtml-equal arg + '((:a :name "this is an anchor") + "with some text"))) else - (setf *pass* 0) - (test t (lhtml-equal arg - '((:a :href - "mailto:lmcelroy@performigence.com") - "lmcelroy@performigence.com")))))) + (setf *pass* 0) + (test t (lhtml-equal arg + '((:a :href + "mailto:lmcelroy@performigence.com") + "lmcelroy@performigence.com")))))) (let ((*pass* 0)) (defun nested-callback (arg) @@ -331,40 +331,40 @@ vlink='4' link='6'> (incf *callback-called*) (if* (= *pass* 0) then - (incf *pass*) - (test t (lhtml-equal arg - '(:pp "Navigate the site:" - ((:map :name "mainmap") - ((:area :shape "rect" :coords "0,100,100,200")) - ((:area :shape "rect" :coords "100,100,100,200")))))) + (incf *pass*) + (test t (lhtml-equal arg + '(:pp "Navigate the site:" + ((:map :name "mainmap") + ((:area :shape "rect" :coords "0,100,100,200")) + ((:area :shape "rect" :coords "100,100,100,200")))))) elseif (= *pass* 1) then - (incf *pass*) - (test t (lhtml-equal arg - '(:pp - (:object - (:pp "Navigate the site:" - ((:map :name "mainmap") - ((:area :shape "rect" :coords "0,100,100,200")) - ((:area :shape "rect" - :coords "100,100,100,200")))))))) + (incf *pass*) + (test t (lhtml-equal arg + '(:pp + (:object + (:pp "Navigate the site:" + ((:map :name "mainmap") + ((:area :shape "rect" :coords "0,100,100,200")) + ((:area :shape "rect" + :coords "100,100,100,200")))))))) else - (setf *pass* 0) - (test t (lhtml-equal arg - '(:pp "whitespace only")))))) + (setf *pass* 0) + (test t (lhtml-equal arg + '(:pp "whitespace only")))))) (defun testit () (let ((util.test:*test-errors* 0) - (util.test:*test-successes* 0)) + (util.test:*test-successes* 0)) (test t (lhtml-equal (parse-html *test-string2*) *expected-result2*)) (setf *callback-called* 0) (test t (lhtml-equal (parse-html *test-string*) *expected-result*)) (test 0 *callback-called*) ;;(setf (element-callback :a) 'callback-test-func) (setf *callback-called* 0) - (test t (lhtml-equal (parse-html *test-string* - :callbacks (acons :a 'callback-test-func nil)) - *expected-result*)) + (test t (lhtml-equal (parse-html *test-string* + :callbacks (acons :a 'callback-test-func nil)) + *expected-result*)) (test 2 *callback-called*) (setf *callback-called* 0) (test t (lhtml-equal (parse-html *test-string*) *expected-result*)) @@ -372,35 +372,35 @@ vlink='4' link='6'> (setf *callback-called* 0) ;; make sure function is OK arg ;;(setf (element-callback :a) (symbol-function 'callback-test-func)) - (test t (lhtml-equal - (parse-html *test-string* - :callbacks (acons :a (symbol-function 'callback-test-func) nil)) - *expected-result*)) + (test t (lhtml-equal + (parse-html *test-string* + :callbacks (acons :a (symbol-function 'callback-test-func) nil)) + *expected-result*)) (test 2 *callback-called*) ;; try with :callback-only t (setf *callback-called* 0) ;;(setf (element-callback :a) 'callback-test-func) (parse-html *test-string* :callback-only t - :callbacks (acons :a 'callback-test-func nil)) ;; won't return parse output + :callbacks (acons :a 'callback-test-func nil)) ;; won't return parse output (test 2 *callback-called*) ;; try nested callback (setf *callback-called* 0) ;;(setf (element-callback :p) 'nested-callback) (test t (lhtml-equal (parse-html *test-string* - :callbacks (acons :pp 'nested-callback nil)) - *expected-result*)) + :callbacks (acons :pp 'nested-callback nil)) + *expected-result*)) (test 3 *callback-called*) (setf *callback-called* 0) (parse-html *test-string* :callback-only t - :callbacks (acons :pp 'nested-callback nil)) + :callbacks (acons :pp 'nested-callback nil)) (test 3 *callback-called*) (test-error (parse-html "b ,var mmax)) - ,@body)) - - (addit (index charistic) - `(setf (svref arr ,index) - (logior (svref arr ,index) - ,charistic))) - ) - - (with-range (i #\A #\Z) - (addit i (+ char-tagcharacter - char-attribnamechar - char-attribundelimattribvalue))) - - (with-range (i #\a #\z) - (addit i (+ char-tagcharacter - char-attribnamechar - char-attribundelimattribvalue))) - - (with-range (i #\0 #\9) - (addit i (+ char-tagcharacter - char-attribnamechar - char-attribundelimattribvalue))) - - ;; let colon be legal tag character - (addit (char-code #\:) (+ char-attribnamechar - char-tagcharacter)) - - ;; NY times special tags have _ - (addit (char-code #\_) (+ char-attribnamechar - char-tagcharacter)) - - ; now the unusual cases - (addit (char-code #\-) (+ char-attribnamechar - char-attribundelimattribvalue)) - (addit (char-code #\.) (+ char-attribnamechar - char-attribundelimattribvalue)) - - ;; adding all typeable chars except for whitespace and > - (addit (char-code #\:) char-attribundelimattribvalue) - (addit (char-code #\@) char-attribundelimattribvalue) - (addit (char-code #\/) char-attribundelimattribvalue) - (addit (char-code #\!) char-attribundelimattribvalue) - (addit (char-code #\#) char-attribundelimattribvalue) - (addit (char-code #\$) char-attribundelimattribvalue) - (addit (char-code #\%) char-attribundelimattribvalue) - (addit (char-code #\^) char-attribundelimattribvalue) - (addit (char-code #\&) char-attribundelimattribvalue) - (addit (char-code #\() char-attribundelimattribvalue) - (addit (char-code #\)) char-attribundelimattribvalue) - (addit (char-code #\_) char-attribundelimattribvalue) - (addit (char-code #\=) char-attribundelimattribvalue) - (addit (char-code #\+) char-attribundelimattribvalue) - (addit (char-code #\\) char-attribundelimattribvalue) - (addit (char-code #\|) char-attribundelimattribvalue) - (addit (char-code #\{) char-attribundelimattribvalue) - (addit (char-code #\}) char-attribundelimattribvalue) - (addit (char-code #\[) char-attribundelimattribvalue) - (addit (char-code #\]) char-attribundelimattribvalue) - (addit (char-code #\;) char-attribundelimattribvalue) - (addit (char-code #\') char-attribundelimattribvalue) - (addit (char-code #\") char-attribundelimattribvalue) - (addit (char-code #\,) char-attribundelimattribvalue) - (addit (char-code #\<) char-attribundelimattribvalue) - (addit (char-code #\?) char-attribundelimattribvalue) - - ; i'm not sure what can be in a tag name but we know that - ; ! and - must be there since it's used in comments - - (addit (char-code #\-) char-tagcharacter) - (addit (char-code #\!) char-tagcharacter) - - ; spaces - (addit (char-code #\space) char-spacechar) - (addit (char-code #\tab) char-spacechar) - (addit (char-code #\return) char-spacechar) - (addit (char-code #\linefeed) char-spacechar) - - ) - - - + `(do ((,var (char-code ,from) (1+ ,var)) + (mmax (char-code ,to))) + ((> ,var mmax)) + ,@body)) + + (addit (index charistic) + `(setf (svref arr ,index) + (logior (svref arr ,index) + ,charistic))) + ) + + (with-range (i #\A #\Z) + (addit i (+ char-tagcharacter + char-attribnamechar + char-attribundelimattribvalue))) + + (with-range (i #\a #\z) + (addit i (+ char-tagcharacter + char-attribnamechar + char-attribundelimattribvalue))) + + (with-range (i #\0 #\9) + (addit i (+ char-tagcharacter + char-attribnamechar + char-attribundelimattribvalue))) + + ;; let colon be legal tag character + (addit (char-code #\:) (+ char-attribnamechar + char-tagcharacter)) + + ;; NY times special tags have _ + (addit (char-code #\_) (+ char-attribnamechar + char-tagcharacter)) + + ; now the unusual cases + (addit (char-code #\-) (+ char-attribnamechar + char-attribundelimattribvalue)) + (addit (char-code #\.) (+ char-attribnamechar + char-attribundelimattribvalue)) + + ;; adding all typeable chars except for whitespace and > + (addit (char-code #\:) char-attribundelimattribvalue) + (addit (char-code #\@) char-attribundelimattribvalue) + (addit (char-code #\/) char-attribundelimattribvalue) + (addit (char-code #\!) char-attribundelimattribvalue) + (addit (char-code #\#) char-attribundelimattribvalue) + (addit (char-code #\$) char-attribundelimattribvalue) + (addit (char-code #\%) char-attribundelimattribvalue) + (addit (char-code #\^) char-attribundelimattribvalue) + (addit (char-code #\&) char-attribundelimattribvalue) + (addit (char-code #\() char-attribundelimattribvalue) + (addit (char-code #\)) char-attribundelimattribvalue) + (addit (char-code #\_) char-attribundelimattribvalue) + (addit (char-code #\=) char-attribundelimattribvalue) + (addit (char-code #\+) char-attribundelimattribvalue) + (addit (char-code #\\) char-attribundelimattribvalue) + (addit (char-code #\|) char-attribundelimattribvalue) + (addit (char-code #\{) char-attribundelimattribvalue) + (addit (char-code #\}) char-attribundelimattribvalue) + (addit (char-code #\[) char-attribundelimattribvalue) + (addit (char-code #\]) char-attribundelimattribvalue) + (addit (char-code #\;) char-attribundelimattribvalue) + (addit (char-code #\') char-attribundelimattribvalue) + (addit (char-code #\") char-attribundelimattribvalue) + (addit (char-code #\,) char-attribundelimattribvalue) + (addit (char-code #\<) char-attribundelimattribvalue) + (addit (char-code #\?) char-attribundelimattribvalue) + + ; i'm not sure what can be in a tag name but we know that + ; ! and - must be there since it's used in comments + + (addit (char-code #\-) char-tagcharacter) + (addit (char-code #\!) char-tagcharacter) + + ; spaces + (addit (char-code #\space) char-spacechar) + (addit (char-code #\tab) char-spacechar) + (addit (char-code #\return) char-spacechar) + (addit (char-code #\linefeed) char-spacechar) + + ) + + + arr)) - + (defun char-characteristic (char bit) (declare (optimize (speed 3) (safety 1))) - ;; return true if the given char has the given bit set in + ;; return true if the given char has the given bit set in ;; the characteristic array (let ((code (char-code char))) (if* (<= 0 code 127) then ; in range - (not (zerop (logand (svref *characteristics* code) bit)))))) + (not (zerop (logand (svref *characteristics* code) bit)))))) -(defvar *html-entity-to-code* +(defvar *html-entity-to-code* (let ((table (make-hash-table :test #'equal))) (dolist (ent '(("nbsp" . 160) - ("iexcl" . 161) - ("cent" . 162) - ("pound" . 163) - ("curren" . 164) - ("yen" . 165) - ("brvbar" . 166) - ("sect" . 167) - ("uml" . 168) - ("copy" . 169) - ("ordf" . 170) - ("laquo" . 171) - ("not" . 172) - ("shy" . 173) - ("reg" . 174) - ("macr" . 175) - ("deg" . 176) - ("plusmn" . 177) - ("sup2" . 178) - ("sup3" . 179) - ("acute" . 180) - ("micro" . 181) - ("para" . 182) - ("middot" . 183) - ("cedil" . 184) - ("sup1" . 185) - ("ordm" . 186) - ("raquo" . 187) - ("frac14" . 188) - ("frac12" . 189) - ("frac34" . 190) - ("iquest" . 191) - ("Agrave" . 192) - ("Aacute" . 193) - ("Acirc" . 194) - ("Atilde" . 195) - ("Auml" . 196) - ("Aring" . 197) - ("AElig" . 198) - ("Ccedil" . 199) - ("Egrave" . 200) - ("Eacute" . 201) - ("Ecirc" . 202) - ("Euml" . 203) - ("Igrave" . 204) - ("Iacute" . 205) - ("Icirc" . 206) - ("Iuml" . 207) - ("ETH" . 208) - ("Ntilde" . 209) - ("Ograve" . 210) - ("Oacute" . 211) - ("Ocirc" . 212) - ("Otilde" . 213) - ("Ouml" . 214) - ("times" . 215) - ("Oslash" . 216) - ("Ugrave" . 217) - ("Uacute" . 218) - ("Ucirc" . 219) - ("Uuml" . 220) - ("Yacute" . 221) - ("THORN" . 222) - ("szlig" . 223) - ("agrave" . 224) - ("aacute" . 225) - ("acirc" . 226) - ("atilde" . 227) - ("auml" . 228) - ("aring" . 229) - ("aelig" . 230) - ("ccedil" . 231) - ("egrave" . 232) - ("eacute" . 233) - ("ecirc" . 234) - ("euml" . 235) - ("igrave" . 236) - ("iacute" . 237) - ("icirc" . 238) - ("iuml" . 239) - ("eth" . 240) - ("ntilde" . 241) - ("ograve" . 242) - ("oacute" . 243) - ("ocirc" . 244) - ("otilde" . 245) - ("ouml" . 246) - ("divide" . 247) - ("oslash" . 248) - ("ugrave" . 249) - ("uacute" . 250) - ("ucirc" . 251) - ("uuml" . 252) - ("yacute" . 253) - ("thorn" . 254) - ("yuml" . 255) - ("fnof" . 402) - ("Alpha" . 913) - ("Beta" . 914) - ("Gamma" . 915) - ("Delta" . 916) - ("Epsilon" . 917) - ("Zeta" . 918) - ("Eta" . 919) - ("Theta" . 920) - ("Iota" . 921) - ("Kappa" . 922) - ("Lambda" . 923) - ("Mu" . 924) - ("Nu" . 925) - ("Xi" . 926) - ("Omicron" . 927) - ("Pi" . 928) - ("Rho" . 929) - ("Sigma" . 931) - ("Tau" . 932) - ("Upsilon" . 933) - ("Phi" . 934) - ("Chi" . 935) - ("Psi" . 936) - ("Omega" . 937) - ("alpha" . 945) - ("beta" . 946) - ("gamma" . 947) - ("delta" . 948) - ("epsilon" . 949) - ("zeta" . 950) - ("eta" . 951) - ("theta" . 952) - ("iota" . 953) - ("kappa" . 954) - ("lambda" . 955) - ("mu" . 956) - ("nu" . 957) - ("xi" . 958) - ("omicron" . 959) - ("pi" . 960) - ("rho" . 961) - ("sigmaf" . 962) - ("sigma" . 963) - ("tau" . 964) - ("upsilon" . 965) - ("phi" . 966) - ("chi" . 967) - ("psi" . 968) - ("omega" . 969) - ("thetasym" . 977) - ("upsih" . 978) - ("piv" . 982) - ("bull" . 8226) - ("hellip" . 8230) - ("prime" . 8242) - ("Prime" . 8243) - ("oline" . 8254) - ("frasl" . 8260) - ("weierp" . 8472) - ("image" . 8465) - ("real" . 8476) - ("trade" . 8482) - ("alefsym" . 8501) - ("larr" . 8592) - ("uarr" . 8593) - ("rarr" . 8594) - ("darr" . 8595) - ("harr" . 8596) - ("crarr" . 8629) - ("lArr" . 8656) - ("uArr" . 8657) - ("rArr" . 8658) - ("dArr" . 8659) - ("hArr" . 8660) - ("forall" . 8704) - ("part" . 8706) - ("exist" . 8707) - ("empty" . 8709) - ("nabla" . 8711) - ("isin" . 8712) - ("notin" . 8713) - ("ni" . 8715) - ("prod" . 8719) - ("sum" . 8721) - ("minus" . 8722) - ("lowast" . 8727) - ("radic" . 8730) - ("prop" . 8733) - ("infin" . 8734) - ("ang" . 8736) - ("and" . 8743) - ("or" . 8744) - ("cap" . 8745) - ("cup" . 8746) - ("int" . 8747) - ("there4" . 8756) - ("sim" . 8764) - ("cong" . 8773) - ("asymp" . 8776) - ("ne" . 8800) - ("equiv" . 8801) - ("le" . 8804) - ("ge" . 8805) - ("sub" . 8834) - ("sup" . 8835) - ("nsub" . 8836) - ("sube" . 8838) - ("supe" . 8839) - ("oplus" . 8853) - ("otimes" . 8855) - ("perp" . 8869) - ("sdot" . 8901) - ("lceil" . 8968) - ("rceil" . 8969) - ("lfloor" . 8970) - ("rfloor" . 8971) - ("lang" . 9001) - ("rang" . 9002) - ("loz" . 9674) - ("spades" . 9824) - ("clubs" . 9827) - ("hearts" . 9829) - ("diams" . 9830) - ("quot" . 34) - ("amp" . 38) - ("lt" . 60) - ("gt" . 62) - ("OElig" . 338) - ("oelig" . 339) - ("Scaron" . 352) - ("scaron" . 353) - ("Yuml" . 376) - ("circ" . 710) - ("tilde" . 732) - ("ensp" . 8194) - ("emsp" . 8195) - ("thinsp" . 8201) - ("zwnj" . 8204) - ("zwj" . 8205) - ("lrm" . 8206) - ("rlm" . 8207) - ("ndash" . 8211) - ("mdash" . 8212) - ("lsquo" . 8216) - ("rsquo" . 8217) - ("sbquo" . 8218) - ("ldquo" . 8220) - ("rdquo" . 8221) - ("bdquo" . 8222) - ("dagger" . 8224) - ("Dagger" . 8225) - ("permil" . 8240) - ("lsaquo" . 8249) - ("rsaquo" . 8250) - ("euro" . 8364) - )) - (setf (gethash (car ent) table) (cdr ent))) + ("iexcl" . 161) + ("cent" . 162) + ("pound" . 163) + ("curren" . 164) + ("yen" . 165) + ("brvbar" . 166) + ("sect" . 167) + ("uml" . 168) + ("copy" . 169) + ("ordf" . 170) + ("laquo" . 171) + ("not" . 172) + ("shy" . 173) + ("reg" . 174) + ("macr" . 175) + ("deg" . 176) + ("plusmn" . 177) + ("sup2" . 178) + ("sup3" . 179) + ("acute" . 180) + ("micro" . 181) + ("para" . 182) + ("middot" . 183) + ("cedil" . 184) + ("sup1" . 185) + ("ordm" . 186) + ("raquo" . 187) + ("frac14" . 188) + ("frac12" . 189) + ("frac34" . 190) + ("iquest" . 191) + ("Agrave" . 192) + ("Aacute" . 193) + ("Acirc" . 194) + ("Atilde" . 195) + ("Auml" . 196) + ("Aring" . 197) + ("AElig" . 198) + ("Ccedil" . 199) + ("Egrave" . 200) + ("Eacute" . 201) + ("Ecirc" . 202) + ("Euml" . 203) + ("Igrave" . 204) + ("Iacute" . 205) + ("Icirc" . 206) + ("Iuml" . 207) + ("ETH" . 208) + ("Ntilde" . 209) + ("Ograve" . 210) + ("Oacute" . 211) + ("Ocirc" . 212) + ("Otilde" . 213) + ("Ouml" . 214) + ("times" . 215) + ("Oslash" . 216) + ("Ugrave" . 217) + ("Uacute" . 218) + ("Ucirc" . 219) + ("Uuml" . 220) + ("Yacute" . 221) + ("THORN" . 222) + ("szlig" . 223) + ("agrave" . 224) + ("aacute" . 225) + ("acirc" . 226) + ("atilde" . 227) + ("auml" . 228) + ("aring" . 229) + ("aelig" . 230) + ("ccedil" . 231) + ("egrave" . 232) + ("eacute" . 233) + ("ecirc" . 234) + ("euml" . 235) + ("igrave" . 236) + ("iacute" . 237) + ("icirc" . 238) + ("iuml" . 239) + ("eth" . 240) + ("ntilde" . 241) + ("ograve" . 242) + ("oacute" . 243) + ("ocirc" . 244) + ("otilde" . 245) + ("ouml" . 246) + ("divide" . 247) + ("oslash" . 248) + ("ugrave" . 249) + ("uacute" . 250) + ("ucirc" . 251) + ("uuml" . 252) + ("yacute" . 253) + ("thorn" . 254) + ("yuml" . 255) + ("fnof" . 402) + ("Alpha" . 913) + ("Beta" . 914) + ("Gamma" . 915) + ("Delta" . 916) + ("Epsilon" . 917) + ("Zeta" . 918) + ("Eta" . 919) + ("Theta" . 920) + ("Iota" . 921) + ("Kappa" . 922) + ("Lambda" . 923) + ("Mu" . 924) + ("Nu" . 925) + ("Xi" . 926) + ("Omicron" . 927) + ("Pi" . 928) + ("Rho" . 929) + ("Sigma" . 931) + ("Tau" . 932) + ("Upsilon" . 933) + ("Phi" . 934) + ("Chi" . 935) + ("Psi" . 936) + ("Omega" . 937) + ("alpha" . 945) + ("beta" . 946) + ("gamma" . 947) + ("delta" . 948) + ("epsilon" . 949) + ("zeta" . 950) + ("eta" . 951) + ("theta" . 952) + ("iota" . 953) + ("kappa" . 954) + ("lambda" . 955) + ("mu" . 956) + ("nu" . 957) + ("xi" . 958) + ("omicron" . 959) + ("pi" . 960) + ("rho" . 961) + ("sigmaf" . 962) + ("sigma" . 963) + ("tau" . 964) + ("upsilon" . 965) + ("phi" . 966) + ("chi" . 967) + ("psi" . 968) + ("omega" . 969) + ("thetasym" . 977) + ("upsih" . 978) + ("piv" . 982) + ("bull" . 8226) + ("hellip" . 8230) + ("prime" . 8242) + ("Prime" . 8243) + ("oline" . 8254) + ("frasl" . 8260) + ("weierp" . 8472) + ("image" . 8465) + ("real" . 8476) + ("trade" . 8482) + ("alefsym" . 8501) + ("larr" . 8592) + ("uarr" . 8593) + ("rarr" . 8594) + ("darr" . 8595) + ("harr" . 8596) + ("crarr" . 8629) + ("lArr" . 8656) + ("uArr" . 8657) + ("rArr" . 8658) + ("dArr" . 8659) + ("hArr" . 8660) + ("forall" . 8704) + ("part" . 8706) + ("exist" . 8707) + ("empty" . 8709) + ("nabla" . 8711) + ("isin" . 8712) + ("notin" . 8713) + ("ni" . 8715) + ("prod" . 8719) + ("sum" . 8721) + ("minus" . 8722) + ("lowast" . 8727) + ("radic" . 8730) + ("prop" . 8733) + ("infin" . 8734) + ("ang" . 8736) + ("and" . 8743) + ("or" . 8744) + ("cap" . 8745) + ("cup" . 8746) + ("int" . 8747) + ("there4" . 8756) + ("sim" . 8764) + ("cong" . 8773) + ("asymp" . 8776) + ("ne" . 8800) + ("equiv" . 8801) + ("le" . 8804) + ("ge" . 8805) + ("sub" . 8834) + ("sup" . 8835) + ("nsub" . 8836) + ("sube" . 8838) + ("supe" . 8839) + ("oplus" . 8853) + ("otimes" . 8855) + ("perp" . 8869) + ("sdot" . 8901) + ("lceil" . 8968) + ("rceil" . 8969) + ("lfloor" . 8970) + ("rfloor" . 8971) + ("lang" . 9001) + ("rang" . 9002) + ("loz" . 9674) + ("spades" . 9824) + ("clubs" . 9827) + ("hearts" . 9829) + ("diams" . 9830) + ("quot" . 34) + ("amp" . 38) + ("lt" . 60) + ("gt" . 62) + ("OElig" . 338) + ("oelig" . 339) + ("Scaron" . 352) + ("scaron" . 353) + ("Yuml" . 376) + ("circ" . 710) + ("tilde" . 732) + ("ensp" . 8194) + ("emsp" . 8195) + ("thinsp" . 8201) + ("zwnj" . 8204) + ("zwj" . 8205) + ("lrm" . 8206) + ("rlm" . 8207) + ("ndash" . 8211) + ("mdash" . 8212) + ("lsquo" . 8216) + ("rsquo" . 8217) + ("sbquo" . 8218) + ("ldquo" . 8220) + ("rdquo" . 8221) + ("bdquo" . 8222) + ("dagger" . 8224) + ("Dagger" . 8225) + ("permil" . 8240) + ("lsaquo" . 8249) + ("rsaquo" . 8250) + ("euro" . 8364) + )) + (setf (gethash (car ent) table) (cdr ent))) table)) @@ -536,392 +536,392 @@ (let (buf) (without-scheduling (do* ((bufs *tokenbufs* (cdr bufs)) - (this (car bufs) (car bufs))) - ((null bufs)) - (if* this - then (setf (car bufs) nil) - (setq buf this) - (return)))) + (this (car bufs) (car bufs))) + ((null bufs)) + (if* this + then (setf (car bufs) nil) + (setq buf this) + (return)))) (if* buf then (setf (tokenbuf-cur buf) 0) - (setf (tokenbuf-max buf) 0) - buf + (setf (tokenbuf-max buf) 0) + buf else (make-tokenbuf - :cur 0 - :max 0 - :data (make-array 1024 :element-type 'character))))) + :cur 0 + :max 0 + :data (make-array 1024 :element-type 'character))))) (defun put-back-tokenbuf (buf) (declare (optimize (speed 3) (safety 1))) - (without-scheduling + (without-scheduling (do ((bufs *tokenbufs* (cdr bufs))) - ((null bufs) - ; toss it away - nil) + ((null bufs) + ; toss it away + nil) (if* (null (car bufs)) - then (setf (car bufs) buf) - (return))))) + then (setf (car bufs) buf) + (return))))) (defun to-preferred-case (ch) (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) then (char-upcase ch) else (char-downcase ch))) - - + + (defun next-token (stream ignore-strings raw-mode-delimiter - read-sequence-func tokenbuf parse-entities) + read-sequence-func tokenbuf parse-entities) (declare (optimize (speed 3) (safety 1))) - ;; return two values: + ;; return two values: ;; the next token from the stream. - ;; the kind of token (:pcdata, :start-tag, :end-tag, :eof) + ;; the kind of token (:pcdata, :start-tag, :end-tag, :eof) ;; ;; if read-sequence-func is non-nil, ;; read-sequence-func is called to fetch the next character (macrolet ((next-char (stream) - `(let ((cur (tokenbuf-cur tokenbuf)) - (tb (tokenbuf-data tokenbuf))) - (if* (>= cur (tokenbuf-max tokenbuf)) - then ; fill buffer - (if* (zerop (setf (tokenbuf-max tokenbuf) - (if* read-sequence-func - then (funcall read-sequence-func tb stream) - else (read-sequence tb stream)))) - then (setq cur nil) ; eof - else (setq cur 0))) - (if* cur - then (prog1 (schar tb cur) - (setf (tokenbuf-cur tokenbuf) (1+ cur)))))) - - - (un-next-char (stream ch) - `(decf (tokenbuf-cur tokenbuf))) - - (clear-coll (coll) - `(setf (collector-next coll) 0)) - - (add-to-coll (coll ch) - `(let ((.next. (collector-next ,coll))) - (if* (>= .next. (collector-max ,coll)) - then (grow-and-add ,coll ,ch) - else (setf (schar (collector-data ,coll) .next.) - ,ch) - (setf (collector-next ,coll) (1+ .next.))))) - - ) - + `(let ((cur (tokenbuf-cur tokenbuf)) + (tb (tokenbuf-data tokenbuf))) + (if* (>= cur (tokenbuf-max tokenbuf)) + then ; fill buffer + (if* (zerop (setf (tokenbuf-max tokenbuf) + (if* read-sequence-func + then (funcall read-sequence-func tb stream) + else (read-sequence tb stream)))) + then (setq cur nil) ; eof + else (setq cur 0))) + (if* cur + then (prog1 (schar tb cur) + (setf (tokenbuf-cur tokenbuf) (1+ cur)))))) + + + (un-next-char (stream ch) + `(decf (tokenbuf-cur tokenbuf))) + + (clear-coll (coll) + `(setf (collector-next coll) 0)) + + (add-to-coll (coll ch) + `(let ((.next. (collector-next ,coll))) + (if* (>= .next. (collector-max ,coll)) + then (grow-and-add ,coll ,ch) + else (setf (schar (collector-data ,coll) .next.) + ,ch) + (setf (collector-next ,coll) (1+ .next.))))) + + ) + (let ((state (if* raw-mode-delimiter then state-rawdata else state-pcdata)) - (coll (get-collector)) - (ch) - - (value-delim) - - (tag-to-return) - (attribs-to-return) - - (end-tag) - - (attrib-name) - (attrib-value) - - (name-length 0) ;; count only when it could be a comment - - (raw-length 0) + (coll (get-collector)) + (ch) + + (value-delim) + + (tag-to-return) + (attribs-to-return) + + (end-tag) + + (attrib-name) + (attrib-value) + + (name-length 0) ;; count only when it could be a comment + + (raw-length 0) (xml-bailout) - ) - + ) + (loop - - (setq ch (next-char stream)) - ;;(format t "ch: ~s state: ~s~%" ch state) - - (if* (null ch) - then (return) ; eof -- exit loop - ) - - - (case state - (#.state-pcdata - ; collect everything until we see a < - (if* (eq ch #\<) - then ; if we've collected nothing then get a tag - (if* (> (collector-next coll) 0) - then ; have collected something, return this string - (un-next-char stream ch) ; push back the < - (return) - else ; collect a tag - (setq state state-readtagfirst)) - elseif (and parse-entities (eq ch #\&)) - then ; reading an entity. entity ends at semicolon - (let (res (max 10)) - (loop (let ((ch (next-char stream))) - (if* (null ch) - then (error "End of file after & entity marker") - elseif (eq ch #\;) - then (return) - elseif (zerop (decf max)) - then (error "No semicolon found after entity starting: &~{~a~}" (nreverse res)) - else (push ch res)))) - (setq res (nreverse res)) - (if* (eq (car res) #\#) - then ; decimal entity - (let ((count 0)) - (dolist (ch (cdr res)) - (let ((code (char-code ch))) - (if* (<= #.(char-code #\0) - code - #.(char-code #\9)) - then (setq count - (+ (* 10 count) - (- code - #.(char-code #\0)))) - else (error "non decimal digit after &# - ~s" ch) - ))) - (add-to-coll coll (code-char count))) - else (let ((name (make-array (length res) - :element-type 'character - :initial-contents res))) - (let ((ch (gethash name *html-entity-to-code*))) - (if* ch - then (add-to-coll coll (code-char ch)) - else (error "No such entity as ~s" name)))))) - - else ; we will check for & here eventually - (if* (not (eq ch #\return)) - then (add-to-coll coll ch)))) - - (#.state-readtagfirst - ; starting to read a tag name - (if* (eq #\/ ch) - then ; end tag - (setq end-tag t) - else (if* (eq #\! ch) ; possible comment - then (setf xml-bailout t) - (setq name-length 0)) - (un-next-char stream ch)) - (setq state state-readtag)) - - (#.state-readtag - ;; reading the whole tag name - (if* (char-characteristic ch char-tagcharacter) - then (add-to-coll coll (to-preferred-case ch)) - (incf name-length) - (if* (and (eq name-length 3) - (coll-has-comment coll)) - then (clear-coll coll) - (setq state state-readcomment)) - - else (setq tag-to-return (compute-tag coll)) - (clear-coll coll) - (if* (eq ch #\>) - then (return) ; we're done - elseif xml-bailout then - (un-next-char stream ch) - (return) - else (if* (eq tag-to-return :!--) - then ; a comment - (setq state state-readcomment) - else (un-next-char stream ch) - (setq state state-findattribname))))) - - (#.state-findattribname - ;; search until we find the start of an attribute name - ;; or the end of the tag - (if* (eq ch #\>) - then ; end of the line - (return) - elseif (eq ch #\=) - then ; value for previous attribute name - ; (syntax "foo = bar" is bogus I think but it's - ; used some places, here is where we handle this - (pop attribs-to-return) - (setq attrib-name (pop attribs-to-return)) - (setq state state-findvalue) - elseif (char-characteristic ch char-attribnamechar) - then (un-next-char stream ch) - (setq state state-attribname) - else nil ; ignore other things - )) - - (#.state-findvalue - ;; find the start of the value - (if* (char-characteristic ch char-spacechar) - thenret ; keep looking - elseif (eq ch #\>) - then ; no value, set the value to be the - ; name as a string - (setq attrib-value - (string-downcase (string attrib-name))) - - (push attrib-name attribs-to-return) - (push attrib-value attribs-to-return) - (un-next-char stream ch) - (setq state state-findattribname) - else (un-next-char stream ch) - (setq state state-attribstartvalue))) - - - (#.state-attribname - ;; collect attribute name - - (if* (char-characteristic ch char-attribnamechar) - then (add-to-coll coll (to-preferred-case ch)) - elseif (eq #\= ch) - then ; end of attribute name, value is next - (setq attrib-name (compute-tag coll)) - (clear-coll coll) - (setq state state-attribstartvalue) - else ; end of attribute name with no value, - (setq attrib-name (compute-tag coll)) - (clear-coll coll) - (setq attrib-value - (string-downcase (string attrib-name))) - (push attrib-name attribs-to-return) - (push attrib-value attribs-to-return) - (un-next-char stream ch) - (setq state state-findattribname))) - - (#.state-attribstartvalue - ;; begin to collect value - (if* (or (eq ch #\") - (eq ch #\')) - then (setq value-delim ch) - (setq state state-attribvaluedelim) - ;; gobble spaces; assume since we've seen a '=' there really is a value - elseif (eq #\space ch) then nil - else (un-next-char stream ch) - (setq state state-attribvaluenodelim))) - - (#.state-attribvaluedelim - (if* (eq ch value-delim) - then (setq attrib-value (compute-coll-string coll)) - (clear-coll coll) - (push attrib-name attribs-to-return) - (push attrib-value attribs-to-return) - (setq state state-findattribname) - else (add-to-coll coll ch))) - - (#.state-attribvaluenodelim - ;; an attribute value not delimited by ' or " and thus restricted - ;; in the possible characters - (if* (char-characteristic ch char-attribundelimattribvalue) - then (add-to-coll coll ch) - else (un-next-char stream ch) - (setq attrib-value (compute-coll-string coll)) - (clear-coll coll) - (push attrib-name attribs-to-return) - (push attrib-value attribs-to-return) - (setq state state-findattribname))) - - (#.state-readcomment - ;; a comment ends on the first --, but we'll look for --> - ;; since that's what most people expect - (if* (eq ch #\-) - then (setq state state-readcomment-one) - else (add-to-coll coll ch))) - - (#.state-readcomment-one - ;; seen one -, looking for -> - - (if* (eq ch #\-) - then (setq state state-readcomment-two) - else ; not a comment end, put back the -'s - (add-to-coll coll #\-) - (add-to-coll coll ch) - (setq state state-readcomment))) - - (#.state-readcomment-two - ;; seen two -'s, looking for > - - (if* (eq ch #\>) - then ; end of the line - (return) - elseif (eq ch #\-) - then ; still at two -'s, have to put out first - (add-to-coll coll #\-) - else ; put out two hypens and back to looking for a hypen - (add-to-coll coll #\-) - (add-to-coll coll #\-) - (setq state state-readcomment))) - - (#.state-rawdata - ;; collect everything until we see the delimiter - (if* (eq (to-preferred-case ch) (elt raw-mode-delimiter raw-length)) - then - (incf raw-length) - (when (= raw-length (length raw-mode-delimiter)) - ;; push the end tag back so it can then be lexed - ;; but don't do it for xml stuff - (when (/= (length raw-mode-delimiter) 1) - (push :end-tag (tokenbuf-first-pass tokenbuf)) - (if* (equal raw-mode-delimiter "") - then (push :STYLE (tokenbuf-first-pass tokenbuf)) - elseif (equal raw-mode-delimiter "") - then (push :style (tokenbuf-first-pass tokenbuf)) - elseif (equal raw-mode-delimiter "") - then (push :SCRIPT (tokenbuf-first-pass tokenbuf)) - elseif (equal raw-mode-delimiter "") - then (push :script (tokenbuf-first-pass tokenbuf)) - else (error "unexpected raw-mode-delimiter")) - ) - ;; set state to state-pcdata for next section - (return)) - else - ;; push partial matches into data string - (dotimes (i raw-length) - (add-to-coll coll (elt raw-mode-delimiter i))) - (setf raw-length 0) - (add-to-coll coll ch))) - - )) - - - ;; out of the loop. + + (setq ch (next-char stream)) + ;;(format t "ch: ~s state: ~s~%" ch state) + + (if* (null ch) + then (return) ; eof -- exit loop + ) + + + (case state + (#.state-pcdata + ; collect everything until we see a < + (if* (eq ch #\<) + then ; if we've collected nothing then get a tag + (if* (> (collector-next coll) 0) + then ; have collected something, return this string + (un-next-char stream ch) ; push back the < + (return) + else ; collect a tag + (setq state state-readtagfirst)) + elseif (and parse-entities (eq ch #\&)) + then ; reading an entity. entity ends at semicolon + (let (res (max 10)) + (loop (let ((ch (next-char stream))) + (if* (null ch) + then (error "End of file after & entity marker") + elseif (eq ch #\;) + then (return) + elseif (zerop (decf max)) + then (error "No semicolon found after entity starting: &~{~a~}" (nreverse res)) + else (push ch res)))) + (setq res (nreverse res)) + (if* (eq (car res) #\#) + then ; decimal entity + (let ((count 0)) + (dolist (ch (cdr res)) + (let ((code (char-code ch))) + (if* (<= #.(char-code #\0) + code + #.(char-code #\9)) + then (setq count + (+ (* 10 count) + (- code + #.(char-code #\0)))) + else (error "non decimal digit after &# - ~s" ch) + ))) + (add-to-coll coll (code-char count))) + else (let ((name (make-array (length res) + :element-type 'character + :initial-contents res))) + (let ((ch (gethash name *html-entity-to-code*))) + (if* ch + then (add-to-coll coll (code-char ch)) + else (error "No such entity as ~s" name)))))) + + else ; we will check for & here eventually + (if* (not (eq ch #\return)) + then (add-to-coll coll ch)))) + + (#.state-readtagfirst + ; starting to read a tag name + (if* (eq #\/ ch) + then ; end tag + (setq end-tag t) + else (if* (eq #\! ch) ; possible comment + then (setf xml-bailout t) + (setq name-length 0)) + (un-next-char stream ch)) + (setq state state-readtag)) + + (#.state-readtag + ;; reading the whole tag name + (if* (char-characteristic ch char-tagcharacter) + then (add-to-coll coll (to-preferred-case ch)) + (incf name-length) + (if* (and (eq name-length 3) + (coll-has-comment coll)) + then (clear-coll coll) + (setq state state-readcomment)) + + else (setq tag-to-return (compute-tag coll)) + (clear-coll coll) + (if* (eq ch #\>) + then (return) ; we're done + elseif xml-bailout then + (un-next-char stream ch) + (return) + else (if* (eq tag-to-return :!--) + then ; a comment + (setq state state-readcomment) + else (un-next-char stream ch) + (setq state state-findattribname))))) + + (#.state-findattribname + ;; search until we find the start of an attribute name + ;; or the end of the tag + (if* (eq ch #\>) + then ; end of the line + (return) + elseif (eq ch #\=) + then ; value for previous attribute name + ; (syntax "foo = bar" is bogus I think but it's + ; used some places, here is where we handle this + (pop attribs-to-return) + (setq attrib-name (pop attribs-to-return)) + (setq state state-findvalue) + elseif (char-characteristic ch char-attribnamechar) + then (un-next-char stream ch) + (setq state state-attribname) + else nil ; ignore other things + )) + + (#.state-findvalue + ;; find the start of the value + (if* (char-characteristic ch char-spacechar) + thenret ; keep looking + elseif (eq ch #\>) + then ; no value, set the value to be the + ; name as a string + (setq attrib-value + (string-downcase (string attrib-name))) + + (push attrib-name attribs-to-return) + (push attrib-value attribs-to-return) + (un-next-char stream ch) + (setq state state-findattribname) + else (un-next-char stream ch) + (setq state state-attribstartvalue))) + + + (#.state-attribname + ;; collect attribute name + + (if* (char-characteristic ch char-attribnamechar) + then (add-to-coll coll (to-preferred-case ch)) + elseif (eq #\= ch) + then ; end of attribute name, value is next + (setq attrib-name (compute-tag coll)) + (clear-coll coll) + (setq state state-attribstartvalue) + else ; end of attribute name with no value, + (setq attrib-name (compute-tag coll)) + (clear-coll coll) + (setq attrib-value + (string-downcase (string attrib-name))) + (push attrib-name attribs-to-return) + (push attrib-value attribs-to-return) + (un-next-char stream ch) + (setq state state-findattribname))) + + (#.state-attribstartvalue + ;; begin to collect value + (if* (or (eq ch #\") + (eq ch #\')) + then (setq value-delim ch) + (setq state state-attribvaluedelim) + ;; gobble spaces; assume since we've seen a '=' there really is a value + elseif (eq #\space ch) then nil + else (un-next-char stream ch) + (setq state state-attribvaluenodelim))) + + (#.state-attribvaluedelim + (if* (eq ch value-delim) + then (setq attrib-value (compute-coll-string coll)) + (clear-coll coll) + (push attrib-name attribs-to-return) + (push attrib-value attribs-to-return) + (setq state state-findattribname) + else (add-to-coll coll ch))) + + (#.state-attribvaluenodelim + ;; an attribute value not delimited by ' or " and thus restricted + ;; in the possible characters + (if* (char-characteristic ch char-attribundelimattribvalue) + then (add-to-coll coll ch) + else (un-next-char stream ch) + (setq attrib-value (compute-coll-string coll)) + (clear-coll coll) + (push attrib-name attribs-to-return) + (push attrib-value attribs-to-return) + (setq state state-findattribname))) + + (#.state-readcomment + ;; a comment ends on the first --, but we'll look for --> + ;; since that's what most people expect + (if* (eq ch #\-) + then (setq state state-readcomment-one) + else (add-to-coll coll ch))) + + (#.state-readcomment-one + ;; seen one -, looking for -> + + (if* (eq ch #\-) + then (setq state state-readcomment-two) + else ; not a comment end, put back the -'s + (add-to-coll coll #\-) + (add-to-coll coll ch) + (setq state state-readcomment))) + + (#.state-readcomment-two + ;; seen two -'s, looking for > + + (if* (eq ch #\>) + then ; end of the line + (return) + elseif (eq ch #\-) + then ; still at two -'s, have to put out first + (add-to-coll coll #\-) + else ; put out two hypens and back to looking for a hypen + (add-to-coll coll #\-) + (add-to-coll coll #\-) + (setq state state-readcomment))) + + (#.state-rawdata + ;; collect everything until we see the delimiter + (if* (eq (to-preferred-case ch) (elt raw-mode-delimiter raw-length)) + then + (incf raw-length) + (when (= raw-length (length raw-mode-delimiter)) + ;; push the end tag back so it can then be lexed + ;; but don't do it for xml stuff + (when (/= (length raw-mode-delimiter) 1) + (push :end-tag (tokenbuf-first-pass tokenbuf)) + (if* (equal raw-mode-delimiter "") + then (push :STYLE (tokenbuf-first-pass tokenbuf)) + elseif (equal raw-mode-delimiter "") + then (push :style (tokenbuf-first-pass tokenbuf)) + elseif (equal raw-mode-delimiter "") + then (push :SCRIPT (tokenbuf-first-pass tokenbuf)) + elseif (equal raw-mode-delimiter "") + then (push :script (tokenbuf-first-pass tokenbuf)) + else (error "unexpected raw-mode-delimiter")) + ) + ;; set state to state-pcdata for next section + (return)) + else + ;; push partial matches into data string + (dotimes (i raw-length) + (add-to-coll coll (elt raw-mode-delimiter i))) + (setf raw-length 0) + (add-to-coll coll ch))) + + )) + + + ;; out of the loop. ;; if we're in certain states then it means we should return a value ;; (case state - ((#.state-pcdata #.state-rawdata) - ;; return the buffer as a string - (if* (zerop (collector-next coll)) - then (values nil (if (eq state state-pcdata) :eof :pcdata)) - else (values (prog1 - (if* (null ignore-strings) - then (compute-coll-string coll)) - (put-back-collector coll)) - :pcdata))) - - (#.state-readtag - (when (null tag-to-return) - (error "unexpected end of input encountered")) - ;; we've read a tag with no attributes - (put-back-collector coll) - (values tag-to-return - (if* end-tag - then :end-tag - else (if* xml-bailout then :xml else :start-tag)) - )) - - (#.state-findattribname - ;; returning a tag with possible attributes - (put-back-collector coll) - (if* end-tag - then ; ignore any attributes - (values tag-to-return :end-tag) - elseif attribs-to-return - then (values (cons tag-to-return - (nreverse attribs-to-return)) - :start-tag) - else (values tag-to-return :start-tag))) - - (#.state-readcomment-two - ;; returning a comment - (values (prog1 (if* (null ignore-strings) - then (compute-coll-string coll)) - (put-back-collector coll)) - :comment)) - - (t - (if* (null ch) then (error "unexpected end of input encountered") - else (error "internal error, can't be here in state ~d" state))))))) + ((#.state-pcdata #.state-rawdata) + ;; return the buffer as a string + (if* (zerop (collector-next coll)) + then (values nil (if (eq state state-pcdata) :eof :pcdata)) + else (values (prog1 + (if* (null ignore-strings) + then (compute-coll-string coll)) + (put-back-collector coll)) + :pcdata))) + + (#.state-readtag + (when (null tag-to-return) + (error "unexpected end of input encountered")) + ;; we've read a tag with no attributes + (put-back-collector coll) + (values tag-to-return + (if* end-tag + then :end-tag + else (if* xml-bailout then :xml else :start-tag)) + )) + + (#.state-findattribname + ;; returning a tag with possible attributes + (put-back-collector coll) + (if* end-tag + then ; ignore any attributes + (values tag-to-return :end-tag) + elseif attribs-to-return + then (values (cons tag-to-return + (nreverse attribs-to-return)) + :start-tag) + else (values tag-to-return :start-tag))) + + (#.state-readcomment-two + ;; returning a comment + (values (prog1 (if* (null ignore-strings) + then (compute-coll-string coll)) + (put-back-collector coll)) + :comment)) + + (t + (if* (null ch) then (error "unexpected end of input encountered") + else (error "internal error, can't be here in state ~d" state))))))) (defvar *kwd-package* (find-package :keyword)) @@ -937,10 +937,10 @@ (declare (optimize (speed 3) (safety 1))) ;; return the string that's in the collection (let ((str (make-string (collector-next coll))) - (from (collector-data coll))) + (from (collector-data coll))) (dotimes (i (collector-next coll)) (setf (schar str i) (schar from i))) - + str)) (defun coll-has-comment (coll) @@ -948,39 +948,39 @@ ;; true if the collector has exactly "!--" in it (and (eq 3 (collector-next coll)) (let ((data (collector-data coll))) - (and (eq #\! (schar data 0)) - (eq #\- (schar data 1)) - (eq #\- (schar data 2)))))) - + (and (eq #\! (schar data 0)) + (eq #\- (schar data 1)) + (eq #\- (schar data 2)))))) + ;;;;;;;;;;; quick and dirty parse ; the elements with no body and thus no end tag -(dolist (opt '(:area :base :basefont :bgsound :br :button :col - ;;:colgroup - no, this is an element with contents - :embed :hr :img :frame - :input :isindex :keygen :link :meta - :plaintext :spacer :wbr)) +(dolist (opt '(:area :base :basefont :bgsound :br :button :col + ;;:colgroup - no, this is an element with contents + :embed :hr :img :frame + :input :isindex :keygen :link :meta + :plaintext :spacer :wbr)) (setf (tag-no-end opt) t)) (defvar *in-line* '(:tt :i :b :big :small :em :strong :dfn :code :samp :kbd - :var :cite :abbr :acronym :a :img :object :br :map - :q :sub :sup :span :bdo :input :select :textarea :label :button :font)) + :var :cite :abbr :acronym :a :img :object :br :map + :q :sub :sup :span :bdo :input :select :textarea :label :button :font)) (defvar *ch-format* '(:i :b :tt :big :small :strike :s :u - :em :strong :font)) + :em :strong :font)) (defvar *known-tags* '(:!doctype :a :acronym :address :applet :area :b :base :basefont - :bdo :bgsound :big :blink :blockquote :body :br :button :caption - :center :cite :code :col :colgroup :comment :dd :del :dfn :dir - :div :dl :dt :em :embed :fieldset :font :form :frame :frameset - :h1 :h2 :h3 :h4 :h5 :h6 :head :hr :html :i :iframe :img :input - :ins :isindex :kbd :label :layer :legend :li :link :listing :map - :marquee :menu :meta :multicol :nobr :noframes :noscript :object - :ol :option :p :param :plaintext :pre :q :samp :script :select - :small :spacer :span :s :strike :strong :style :sub :sup :table - :tbody :td :textarea :tfoot :th :thead :title :tr :tt :u :ul :var - :wbr :xmp)) + :bdo :bgsound :big :blink :blockquote :body :br :button :caption + :center :cite :code :col :colgroup :comment :dd :del :dfn :dir + :div :dl :dt :em :embed :fieldset :font :form :frame :frameset + :h1 :h2 :h3 :h4 :h5 :h6 :head :hr :html :i :iframe :img :input + :ins :isindex :kbd :label :layer :legend :li :link :listing :map + :marquee :menu :meta :multicol :nobr :noframes :noscript :object + :ol :option :p :param :plaintext :pre :q :samp :script :select + :small :spacer :span :s :strike :strong :style :sub :sup :table + :tbody :td :textarea :tfoot :th :thead :title :tr :tt :u :ul :var + :wbr :xmp)) ; the elements whose start tag can end a previous tag @@ -1046,339 +1046,339 @@ (defmethod parse-html ((p stream) &key callback-only callbacks collect-rogue-tags - no-body-tags - parse-entities) + no-body-tags + parse-entities) (declare (optimize (speed 3) (safety 1))) (phtml-internal p nil callback-only callbacks collect-rogue-tags - no-body-tags parse-entities)) + no-body-tags parse-entities)) (defmacro tag-callback (tag) `(rest (assoc ,tag callbacks))) -(defun phtml-internal (p read-sequence-func callback-only - callbacks collect-rogue-tags - no-body-tags - parse-entities) +(defun phtml-internal (p read-sequence-func callback-only + callbacks collect-rogue-tags + no-body-tags + parse-entities) (declare (optimize (speed 3) (safety 1))) (let ((raw-mode-delimiter nil) - (pending nil) - (current-tag :start-parse) - (last-tag :start-parse) - (current-callback-tags nil) - (pending-ch-format nil) - (closed-pending-ch-format nil) - (new-opens nil) - (tokenbuf (get-tokenbuf)) - (guts) - (rogue-tags) - ) + (pending nil) + (current-tag :start-parse) + (last-tag :start-parse) + (current-callback-tags nil) + (pending-ch-format nil) + (closed-pending-ch-format nil) + (new-opens nil) + (tokenbuf (get-tokenbuf)) + (guts) + (rogue-tags) + ) (labels ((close-off-tags (name stop-at collect-rogues once-only) - ;; close off an open 'name' tag, but search no further - ;; than a 'stop-at' tag. - #+ignore (format t "close off name ~s, stop at ~s, ct ~s~%" - name stop-at current-tag) - (if* (member (tag-name current-tag) name :test #'eq) - then ;; close current tag(s) - (loop - (when (and collect-rogues - (not (member (tag-name current-tag) - *known-tags*))) - (push (tag-name current-tag) rogue-tags)) - (close-current-tag) - (if* (or once-only - (member (tag-name current-tag) - *ch-format*) - (not (member - (tag-name current-tag) name :test #'eq))) - then (return))) - elseif (member (tag-name current-tag) stop-at :test #'eq) - then nil - else ; search if there is a tag to close - (dolist (ent pending) - (if* (member (tag-name (car ent)) name :test #'eq) - then ; found one to close - (loop - (when (and collect-rogues - (not (member (tag-name current-tag) - *known-tags*))) - (push (tag-name current-tag) rogue-tags)) - (close-current-tag) - (if* (member (tag-name current-tag) name - :test #'eq) - then (close-current-tag) - (return))) - (return) - elseif (member (tag-name (car ent)) stop-at - :test #'eq) - then (return) ;; do nothing - )))) - - (close-current-tag () - ;; close off the current tag and open the pending tag - (when (member (tag-name current-tag) *ch-format* :test #'eq) - (push current-tag closed-pending-ch-format) - ) - (let (element) - (if* (tag-no-pcdata (tag-name current-tag)) - then (setq element `(,current-tag - ,@(strip-rev-pcdata guts))) - else (setq element `(,current-tag ,@(nreverse guts)))) - (let ((callback (tag-callback (tag-name current-tag)))) - (when callback - (setf current-callback-tags (rest current-callback-tags)) - (funcall callback element))) - (let* ((prev (pop pending))) - (setq current-tag (car prev) - guts (cdr prev)) - (push element guts)))) - - (save-state () - ;; push the current tag state since we're starting: - ;; a new open tag - (push (cons current-tag guts) pending) - #+ignore (format t "state saved, pending ~s~%" pending) - ) - - - (strip-rev-pcdata (stuff) - ;; reverse the list stuff, omitting all the strings - (let (res) - (dolist (st stuff) - (if* (not (stringp st)) then (push st res))) - res)) - (check-in-line (check-tag) - (setf new-opens nil) - (let (val kind (i 0) - (length (length (tokenbuf-first-pass tokenbuf)))) - (loop - (if* (< i length) then - (setf val (nth i (tokenbuf-first-pass tokenbuf))) - (setf kind (nth (+ i 1) (tokenbuf-first-pass tokenbuf))) - (setf i (+ i 2)) - (if* (= i length) then (setf (tokenbuf-first-pass tokenbuf) - (nreverse (tokenbuf-first-pass tokenbuf)))) - else - (multiple-value-setq (val kind) - (get-next-token t)) - (push val (tokenbuf-first-pass tokenbuf)) - (push kind (tokenbuf-first-pass tokenbuf)) - ) - (when (eq kind :eof) - (if* (= i length) then - (setf (tokenbuf-first-pass tokenbuf) - (nreverse (tokenbuf-first-pass tokenbuf)))) - (return)) - (when (and (eq val check-tag) (eq kind :end-tag)) - (if* (= i length) then - (setf (tokenbuf-first-pass tokenbuf) - (nreverse (tokenbuf-first-pass tokenbuf)))) - (return)) - (when (member val *ch-format* :test #'eq) - (if* (eq kind :start-tag) then (push val new-opens) - elseif (member val new-opens :test #'eq) then - (setf new-opens (remove val new-opens :count 1)) - else (close-off-tags (list val) nil nil nil) - ))))) - - (get-next-token (force) - (if* (or force (null (tokenbuf-first-pass tokenbuf))) then - (multiple-value-bind (val kind) - (next-token p nil raw-mode-delimiter read-sequence-func - tokenbuf parse-entities) - (values val kind)) - else - (let ((val (first (tokenbuf-first-pass tokenbuf))) - (kind (second (tokenbuf-first-pass tokenbuf)))) - (setf (tokenbuf-first-pass tokenbuf) - (rest (rest (tokenbuf-first-pass tokenbuf)))) - (values val kind)))) - ) + ;; close off an open 'name' tag, but search no further + ;; than a 'stop-at' tag. + #+ignore (format t "close off name ~s, stop at ~s, ct ~s~%" + name stop-at current-tag) + (if* (member (tag-name current-tag) name :test #'eq) + then ;; close current tag(s) + (loop + (when (and collect-rogues + (not (member (tag-name current-tag) + *known-tags*))) + (push (tag-name current-tag) rogue-tags)) + (close-current-tag) + (if* (or once-only + (member (tag-name current-tag) + *ch-format*) + (not (member + (tag-name current-tag) name :test #'eq))) + then (return))) + elseif (member (tag-name current-tag) stop-at :test #'eq) + then nil + else ; search if there is a tag to close + (dolist (ent pending) + (if* (member (tag-name (car ent)) name :test #'eq) + then ; found one to close + (loop + (when (and collect-rogues + (not (member (tag-name current-tag) + *known-tags*))) + (push (tag-name current-tag) rogue-tags)) + (close-current-tag) + (if* (member (tag-name current-tag) name + :test #'eq) + then (close-current-tag) + (return))) + (return) + elseif (member (tag-name (car ent)) stop-at + :test #'eq) + then (return) ;; do nothing + )))) + + (close-current-tag () + ;; close off the current tag and open the pending tag + (when (member (tag-name current-tag) *ch-format* :test #'eq) + (push current-tag closed-pending-ch-format) + ) + (let (element) + (if* (tag-no-pcdata (tag-name current-tag)) + then (setq element `(,current-tag + ,@(strip-rev-pcdata guts))) + else (setq element `(,current-tag ,@(nreverse guts)))) + (let ((callback (tag-callback (tag-name current-tag)))) + (when callback + (setf current-callback-tags (rest current-callback-tags)) + (funcall callback element))) + (let* ((prev (pop pending))) + (setq current-tag (car prev) + guts (cdr prev)) + (push element guts)))) + + (save-state () + ;; push the current tag state since we're starting: + ;; a new open tag + (push (cons current-tag guts) pending) + #+ignore (format t "state saved, pending ~s~%" pending) + ) + + + (strip-rev-pcdata (stuff) + ;; reverse the list stuff, omitting all the strings + (let (res) + (dolist (st stuff) + (if* (not (stringp st)) then (push st res))) + res)) + (check-in-line (check-tag) + (setf new-opens nil) + (let (val kind (i 0) + (length (length (tokenbuf-first-pass tokenbuf)))) + (loop + (if* (< i length) then + (setf val (nth i (tokenbuf-first-pass tokenbuf))) + (setf kind (nth (+ i 1) (tokenbuf-first-pass tokenbuf))) + (setf i (+ i 2)) + (if* (= i length) then (setf (tokenbuf-first-pass tokenbuf) + (nreverse (tokenbuf-first-pass tokenbuf)))) + else + (multiple-value-setq (val kind) + (get-next-token t)) + (push val (tokenbuf-first-pass tokenbuf)) + (push kind (tokenbuf-first-pass tokenbuf)) + ) + (when (eq kind :eof) + (if* (= i length) then + (setf (tokenbuf-first-pass tokenbuf) + (nreverse (tokenbuf-first-pass tokenbuf)))) + (return)) + (when (and (eq val check-tag) (eq kind :end-tag)) + (if* (= i length) then + (setf (tokenbuf-first-pass tokenbuf) + (nreverse (tokenbuf-first-pass tokenbuf)))) + (return)) + (when (member val *ch-format* :test #'eq) + (if* (eq kind :start-tag) then (push val new-opens) + elseif (member val new-opens :test #'eq) then + (setf new-opens (remove val new-opens :count 1)) + else (close-off-tags (list val) nil nil nil) + ))))) + + (get-next-token (force) + (if* (or force (null (tokenbuf-first-pass tokenbuf))) then + (multiple-value-bind (val kind) + (next-token p nil raw-mode-delimiter read-sequence-func + tokenbuf parse-entities) + (values val kind)) + else + (let ((val (first (tokenbuf-first-pass tokenbuf))) + (kind (second (tokenbuf-first-pass tokenbuf)))) + (setf (tokenbuf-first-pass tokenbuf) + (rest (rest (tokenbuf-first-pass tokenbuf)))) + (values val kind)))) + ) (loop - (multiple-value-bind (val kind) - (get-next-token nil) - #+ignore (format t "val: ~s kind: ~s last-tag ~s pending ~s~%" val kind - last-tag pending) - (case kind - (:pcdata - (when (or (and callback-only current-callback-tags) - (not callback-only)) - (if* (member last-tag *in-line*) - then - (push val guts) - else - (when (dotimes (i (length val) nil) - (when (not (char-characteristic (elt val i) - char-spacechar)) - (return t))) - (push val guts)))) - (when (and (= (length raw-mode-delimiter) 1) ;; xml tag... - (or (and callback-only current-callback-tags) - (not callback-only))) - (close-off-tags (list last-tag) nil nil t)) - (setf raw-mode-delimiter nil) - ) - - (:xml - (setf last-tag val) - (setf raw-mode-delimiter ">") - (let* ((name (tag-name val))) - (when (and callback-only (tag-callback name)) - (push name current-callback-tags)) - (save-state) - (setq current-tag val) - (setq guts nil) - )) - - (:start-tag - (setf last-tag val) - (if* (or (eq last-tag :style) - (and (listp last-tag) (eq (first last-tag) :style))) - then - (setf raw-mode-delimiter - (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) - then "" - else "")) - elseif (or (eq last-tag :script) - (and (listp last-tag) (eq (first last-tag) :script))) - then - (setf raw-mode-delimiter - (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) - then "" - else ""))) - ; maybe this is an end tag too - (let* ((name (tag-name val)) - (auto-close (tag-auto-close name)) - (auto-close-stop nil) - (no-end (or (tag-no-end name) (member name no-body-tags)))) - (when (and callback-only (tag-callback name)) - (push name current-callback-tags)) - (when (or (and callback-only current-callback-tags) - (not callback-only)) - (if* auto-close - then (setq auto-close-stop (tag-auto-close-stop name)) - (close-off-tags auto-close auto-close-stop nil nil)) - (when (and pending-ch-format (not no-end)) - (if* (member name *ch-format* :test #'eq) then nil - elseif (member name *in-line* :test #'eq) then - ;; close off only tags that are within *in-line* block - (check-in-line name) - else ;; close ALL pending char tags and then reopen - (dolist (this-tag (reverse pending-ch-format)) - (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil nil)) - )) - (if* no-end - then ; this is a singleton tag - (let ((callback (tag-callback (tag-name (if* (atom val) - then val - else (first val)))))) - (when callback - (funcall callback (if* (atom val) - then val - else (list val))))) - (push (if* (atom val) - then val - else (list val)) - guts) - else (save-state) - (setq current-tag val) - (setq guts nil)) - (if* (member name *ch-format* :test #'eq) - then (push val pending-ch-format) - else (when (not - (or (eq last-tag :style) - (and (listp last-tag) (eq (first last-tag) :style)) - (eq last-tag :script) - (and (listp last-tag) (eq (first last-tag) :script)))) - (dolist (tmp (reverse closed-pending-ch-format)) - (save-state) - (setf current-tag tmp) - (setf guts nil))) - ) - (when (not - (or (eq last-tag :style) - (and (listp last-tag) (eq (first last-tag) :style)) - (eq last-tag :script) - (and (listp last-tag) (eq (first last-tag) :script)))) - (setf closed-pending-ch-format nil)) - ))) - - (:end-tag - (setf raw-mode-delimiter nil) - (when (or (and callback-only current-callback-tags) - (not callback-only)) - (close-off-tags (list val) nil nil t) - (when (member val *ch-format* :test #'eq) - (setf pending-ch-format - (remove val pending-ch-format :count 1 - :test #'(lambda (x y) (eq x (if (listp y) (first y) y))))) - (setf closed-pending-ch-format - (remove val closed-pending-ch-format :count 1 - :test #'(lambda (x y) (eq x (if (listp y) (first y) y))))) - ) - (dolist (tmp (reverse closed-pending-ch-format)) - (save-state) - (setf current-tag tmp) - (setf guts nil)) - (setf closed-pending-ch-format nil) - )) - - (:comment - (setf raw-mode-delimiter nil) - (when (or (and callback-only current-callback-tags) - (not callback-only)) - (push `(:comment ,val) guts))) - - (:eof - (setf raw-mode-delimiter nil) - ;; close off all tags - (when (or (and callback-only current-callback-tags) - (not callback-only)) - (close-off-tags '(:start-parse) nil collect-rogue-tags nil)) - (put-back-tokenbuf tokenbuf) - (if collect-rogue-tags - (return (values (cdar guts) rogue-tags)) - (return (cdar guts)))))))))) - - + (multiple-value-bind (val kind) + (get-next-token nil) + #+ignore (format t "val: ~s kind: ~s last-tag ~s pending ~s~%" val kind + last-tag pending) + (case kind + (:pcdata + (when (or (and callback-only current-callback-tags) + (not callback-only)) + (if* (member last-tag *in-line*) + then + (push val guts) + else + (when (dotimes (i (length val) nil) + (when (not (char-characteristic (elt val i) + char-spacechar)) + (return t))) + (push val guts)))) + (when (and (= (length raw-mode-delimiter) 1) ;; xml tag... + (or (and callback-only current-callback-tags) + (not callback-only))) + (close-off-tags (list last-tag) nil nil t)) + (setf raw-mode-delimiter nil) + ) + + (:xml + (setf last-tag val) + (setf raw-mode-delimiter ">") + (let* ((name (tag-name val))) + (when (and callback-only (tag-callback name)) + (push name current-callback-tags)) + (save-state) + (setq current-tag val) + (setq guts nil) + )) + + (:start-tag + (setf last-tag val) + (if* (or (eq last-tag :style) + (and (listp last-tag) (eq (first last-tag) :style))) + then + (setf raw-mode-delimiter + (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) + then "" + else "")) + elseif (or (eq last-tag :script) + (and (listp last-tag) (eq (first last-tag) :script))) + then + (setf raw-mode-delimiter + (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) + then "" + else ""))) + ; maybe this is an end tag too + (let* ((name (tag-name val)) + (auto-close (tag-auto-close name)) + (auto-close-stop nil) + (no-end (or (tag-no-end name) (member name no-body-tags)))) + (when (and callback-only (tag-callback name)) + (push name current-callback-tags)) + (when (or (and callback-only current-callback-tags) + (not callback-only)) + (if* auto-close + then (setq auto-close-stop (tag-auto-close-stop name)) + (close-off-tags auto-close auto-close-stop nil nil)) + (when (and pending-ch-format (not no-end)) + (if* (member name *ch-format* :test #'eq) then nil + elseif (member name *in-line* :test #'eq) then + ;; close off only tags that are within *in-line* block + (check-in-line name) + else ;; close ALL pending char tags and then reopen + (dolist (this-tag (reverse pending-ch-format)) + (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil nil)) + )) + (if* no-end + then ; this is a singleton tag + (let ((callback (tag-callback (tag-name (if* (atom val) + then val + else (first val)))))) + (when callback + (funcall callback (if* (atom val) + then val + else (list val))))) + (push (if* (atom val) + then val + else (list val)) + guts) + else (save-state) + (setq current-tag val) + (setq guts nil)) + (if* (member name *ch-format* :test #'eq) + then (push val pending-ch-format) + else (when (not + (or (eq last-tag :style) + (and (listp last-tag) (eq (first last-tag) :style)) + (eq last-tag :script) + (and (listp last-tag) (eq (first last-tag) :script)))) + (dolist (tmp (reverse closed-pending-ch-format)) + (save-state) + (setf current-tag tmp) + (setf guts nil))) + ) + (when (not + (or (eq last-tag :style) + (and (listp last-tag) (eq (first last-tag) :style)) + (eq last-tag :script) + (and (listp last-tag) (eq (first last-tag) :script)))) + (setf closed-pending-ch-format nil)) + ))) + + (:end-tag + (setf raw-mode-delimiter nil) + (when (or (and callback-only current-callback-tags) + (not callback-only)) + (close-off-tags (list val) nil nil t) + (when (member val *ch-format* :test #'eq) + (setf pending-ch-format + (remove val pending-ch-format :count 1 + :test #'(lambda (x y) (eq x (if (listp y) (first y) y))))) + (setf closed-pending-ch-format + (remove val closed-pending-ch-format :count 1 + :test #'(lambda (x y) (eq x (if (listp y) (first y) y))))) + ) + (dolist (tmp (reverse closed-pending-ch-format)) + (save-state) + (setf current-tag tmp) + (setf guts nil)) + (setf closed-pending-ch-format nil) + )) + + (:comment + (setf raw-mode-delimiter nil) + (when (or (and callback-only current-callback-tags) + (not callback-only)) + (push `(:comment ,val) guts))) + + (:eof + (setf raw-mode-delimiter nil) + ;; close off all tags + (when (or (and callback-only current-callback-tags) + (not callback-only)) + (close-off-tags '(:start-parse) nil collect-rogue-tags nil)) + (put-back-tokenbuf tokenbuf) + (if collect-rogue-tags + (return (values (cdar guts) rogue-tags)) + (return (cdar guts)))))))))) + + (defmethod parse-html (file &key callback-only callbacks collect-rogue-tags - no-body-tags parse-entities) + no-body-tags parse-entities) (declare (optimize (speed 3) (safety 1))) (with-open-file (p file :direction :input) (parse-html p :callback-only callback-only :callbacks callbacks - :collect-rogue-tags collect-rogue-tags - :no-body-tags no-body-tags - :parse-entities parse-entities - ))) - + :collect-rogue-tags collect-rogue-tags + :no-body-tags no-body-tags + :parse-entities parse-entities + ))) + (defmethod parse-html ((str string) &key callback-only callbacks collect-rogue-tags - no-body-tags parse-entities) + no-body-tags parse-entities) (declare (optimize (speed 3) (safety 1))) - (parse-html (make-string-input-stream str) - :callback-only callback-only :callbacks callbacks - :collect-rogue-tags collect-rogue-tags - :no-body-tags no-body-tags - :parse-entities parse-entities - )) - - - - - - - - - + (parse-html (make-string-input-stream str) + :callback-only callback-only :callbacks callbacks + :collect-rogue-tags collect-rogue-tags + :no-body-tags no-body-tags + :parse-entities parse-entities + )) + + + + + + + + + ;;;;;;;;;;;; test ;;;(defun doit (ignore-data) ;;; (with-open-file (p "readme.htm") ;;; (loop ;;; (multiple-value-bind (val kind) (next-token p ignore-data) -;;; ;(format t "~s -> ~s~%" kind val) -;;; -;;; (if* (eq kind :eof) then (return)))))) +;;; ;(format t "~s -> ~s~%" kind val) +;;; +;;; (if* (eq kind :eof) then (return)))))) ;;; ;;;(defun pdoit (&optional (file "testa.html")) ;;; (with-open-file (p file) @@ -1387,8 +1387,8 @@ ;;; ;;;;; requires http client module to work ;;;(defun getparse (host path) -;;; (parse-html (httpr-body -;;; (parse-response -;;; (simple-get host path))))) +;;; (parse-html (httpr-body +;;; (parse-response +;;; (simple-get host path))))) (provide :phtml) diff --git a/pxml-test.cl b/pxml-test.cl index 0392f70..733d864 100644 --- a/pxml-test.cl +++ b/pxml-test.cl @@ -1,9 +1,9 @@ ;; -;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA +;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA ;; ;; This code is free software; you can redistribute it and/or ;; modify it under the terms of the version 2.1 of -;; the GNU Lesser General Public License as published by +;; the GNU Lesser General Public License as published by ;; the Free Software Foundation, as clarified by the AllegroServe ;; prequel found in license-allegroserve.txt. ;; @@ -12,15 +12,15 @@ ;; merchantability or fitness for a particular purpose. See the GNU ;; Lesser General Public License for more details. ;; -;; Version 2.1 of the GNU Lesser General Public License is in the file +;; Version 2.1 of the GNU Lesser General Public License is in the file ;; license-lgpl.txt that was distributed with this file. ;; If it is not present, you can access it from ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer -;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, +;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; Change Log +;; Change Log ;; ;; 10/14/00 add namespace example; xml-error related change @@ -40,18 +40,18 @@ (defun test-one-file (int external-callback) (let ((filename (concatenate 'string (format nil "~3,'0d" int) ".xml"))) - (equalp (with-open-file (p filename) - (parse-xml p :external-callback external-callback - :content-only t)) - (with-open-file (p (concatenate 'string "out/" filename)) - (parse-xml p))))) + (equalp (with-open-file (p filename) + (parse-xml p :external-callback external-callback + :content-only t)) + (with-open-file (p (concatenate 'string "out/" filename)) + (parse-xml p))))) (defun test-some-files (max &key skip-list external-callback) (dotimes (i max) (if* (member (+ 1 i) skip-list) then - (format t "i: ~s skipping...~%" (+ 1 i)) + (format t "i: ~s skipping...~%" (+ 1 i)) else - (format t "i: ~s equalp: ~s~%" (+ 1 i) (test-one-file (+ 1 i) external-callback))))) + (format t "i: ~s equalp: ~s~%" (+ 1 i) (test-one-file (+ 1 i) external-callback))))) ;; have to be in valid/sa directory when this is run (defun test-sa-files () @@ -67,19 +67,19 @@ (defun test-one-bad-file (filename external-callback) (ignore-errors - (with-open-file (p filename) + (with-open-file (p filename) (parse-xml p :external-callback external-callback - :content-only t)))) + :content-only t)))) (defun test-some-bad-files (max external-callback) (dotimes (i max) (let* ((index (+ 1 i)) - (filename (concatenate 'string (format nil "~3,'0d" index) ".xml"))) + (filename (concatenate 'string (format nil "~3,'0d" index) ".xml"))) (multiple-value-bind (val error) - (test-one-bad-file filename external-callback) - (format t "i: ~s error: ~s~%" - index (if error - (simple-condition-format-arguments error) val)))))) + (test-one-bad-file filename external-callback) + (format t "i: ~s error: ~s~%" + index (if error + (simple-condition-format-arguments error) val)))))) ;; have to be in not-wf/sa directory when this is run (defun test-not-wf-sa-files () @@ -103,8 +103,8 @@ (setf var-name (uri-path var-name)) (if* (equal var-name "null") then nil else - (let ((string (eval (intern var-name (find-package :user))))) - (make-string-input-stream string)))) + (let ((string (eval (intern var-name (find-package :user))))) + (make-string-input-stream string)))) (defvar *xml-example-string* " diff --git a/pxml0.cl b/pxml0.cl index 47c0de6..f0fdb9a 100644 --- a/pxml0.cl +++ b/pxml0.cl @@ -45,10 +45,10 @@ (let ((pxml-version-strings nil)) (defun pxml-dribble-bug-hook (stream-or-string) (if (stringp stream-or-string) - (push stream-or-string pxml-version-strings) - (loop for string in (reverse pxml-version-strings) - do (write-string string stream-or-string) - (terpri stream-or-string)))) + (push stream-or-string pxml-version-strings) + (loop for string in (reverse pxml-version-strings) + do (write-string string stream-or-string) + (terpri stream-or-string)))) #+excl (push 'pxml-dribble-bug-hook excl:*dribble-bug-hooks*))) @@ -68,9 +68,9 @@ (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (eq code #x20) - (eq code #x9) - (eq code #xD) - (eq code #xA)))) + (eq code #x9) + (eq code #xD) + (eq code #xA)))) #+unused (defmacro xml-eql-char-p (char) @@ -80,87 +80,87 @@ (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (<= #x0041 code #x005A) (<= #x0061 code #x007A) - (<= #x00C0 code #x00D6) (<= #x00D8 code #x00F6) - (<= #x00F8 code #x00FF) (<= #x0100 code #x0131) - (<= #x0134 code #x013E) (<= #x0141 code #x0148) - (<= #x014A code #x017E) (<= #x0180 code #x01C3) - (<= #x01CD code #x01F0) (<= #x01F4 code #x01F5) - (<= #x01FA code #x0217) (<= #x0250 code #x02A8) - (<= #x02BB code #x02C1) (= code #x0386) (<= #x0388 code #x038A) - (= code #x038C) (<= #x038E code #x03A1) (<= #x03A3 code #x03CE) - (<= #x03D0 code #x03D6) (= code #x03DA) (= code #x03DC) (= code #x03DE) - (= code #x03E0) (<= #x03E2 code #x03F3) (<= #x0401 code #x040C) - (<= #x040E code #x044F) (<= #x0451 code #x045C) - (<= #x045E code #x0481) (<= #x0490 code #x04C4) - (<= #x04C7 code #x04C8) (<= #x04CB code #x04CC) - (<= #x04D0 code #x04EB) (<= #x04EE code #x04F5) - (<= #x04F8 code #x04F9) (<= #x0531 code #x0556) (= code #x0559) - (<= #x0561 code #x0586) (<= #x05D0 code #x05EA) - (<= #x05F0 code #x05F2) (<= #x0621 code #x063A) - (<= #x0641 code #x064A) (<= #x0671 code #x06B7) - (<= #x06BA code #x06BE) (<= #x06C0 code #x06CE) - (<= #x06D0 code #x06D3) (= code #x06D5) (<= #x06E5 code #x06E6) - (<= #x0905 code #x0939) (= code #x093D) (<= #x0958 code #x0961) - (<= #x0985 code #x098C) (<= #x098F code #x0990) - (<= #x0993 code #x09A8) (<= #x09AA code #x09B0) (= code #x09B2) - (<= #x09B6 code #x09B9) (<= #x09DC code #x09DD) - (<= #x09DF code #x09E1) (<= #x09F0 code #x09F1) - (<= #x0A05 code #x0A0A) (<= #x0A0F code #x0A10) - (<= #x0A13 code #x0A28) (<= #x0A2A code #x0A30) - (<= #x0A32 code #x0A33) (<= #x0A35 code #x0A36) - (<= #x0A38 code #x0A39) (<= #x0A59 code #x0A5C) (= code #x0A5E) - (<= #x0A72 code #x0A74) (<= #x0A85 code #x0A8B) (= code #x0A8D) - (<= #x0A8F code #x0A91) (<= #x0A93 code #x0AA8) - (<= #x0AAA code #x0AB0) (<= #x0AB2 code #x0AB3) - (<= #x0AB5 code #x0AB9) (<= #x0ABD code #x0AE0) - (<= #x0B05 code #x0B0C) (<= #x0B0F code #x0B10) - (<= #x0B13 code #x0B28) (<= #x0B2A code #x0B30) - (<= #x0B32 code #x0B33) (<= #x0B36 code #x0B39) (= code #x0B3D) - (<= #x0B5C code #x0B5D) (<= #x0B5F code #x0B61) - (<= #x0B85 code #x0B8A) (<= #x0B8E code #x0B90) - (<= #x0B92 code #x0B95) (<= #x0B99 code #x0B9A) (= code #x0B9C) - (<= #x0B9E code #x0B9F) (<= #x0BA3 code #x0BA4) - (<= #x0BA8 code #x0BAA) (<= #x0BAE code #x0BB5) - (<= #x0BB7 code #x0BB9) (<= #x0C05 code #x0C0C) - (<= #x0C0E code #x0C10) (<= #x0C12 code #x0C28) - (<= #x0C2A code #x0C33) (<= #x0C35 code #x0C39) - (<= #x0C60 code #x0C61) (<= #x0C85 code #x0C8C) - (<= #x0C8E code #x0C90) (<= #x0C92 code #x0CA8) - (<= #x0CAA code #x0CB3) (<= #x0CB5 code #x0CB9) (= code #x0CDE) - (<= #x0CE0 code #x0CE1) (<= #x0D05 code #x0D0C) - (<= #x0D0E code #x0D10) (<= #x0D12 code #x0D28) - (<= #x0D2A code #x0D39) (<= #x0D60 code #x0D61) - (<= #x0E01 code #x0E2E) (= code #x0E30) (<= #x0E32 code #x0E33) - (<= #x0E40 code #x0E45) (<= #x0E81 code #x0E82) (= code #x0E84) - (<= #x0E87 code #x0E88) (= code #x0E8A) (= code #x0E8D) - (<= #x0E94 code #x0E97) (<= #x0E99 code #x0E9F) - (<= #x0EA1 code #x0EA3) (= code #x0EA5) (= code #x0EA7) - (<= #x0EAA code #x0EAB) (<= #x0EAD code #x0EAE) (= code #x0EB0) - (<= #x0EB2 code #x0EB3) (= code #x0EBD) (<= #x0EC0 code #x0EC4) - (<= #x0F40 code #x0F47) (<= #x0F49 code #x0F69) - (<= #x10A0 code #x10C5) (<= #x10D0 code #x10F6) (= code #x1100) - (<= #x1102 code #x1103) (<= #x1105 code #x1107) (= code #x1109) - (<= #x110B code #x110C) (<= #x110E code #x1112) (= code #x113C) - (= code #x113E) (= code #x1140) (= code #x114C) (= code #x114E) (= code #x1150) - (<= #x1154 code #x1155) (= code #x1159) (<= #x115F code #x1161) - (= code #x1163) (= code #x1165) (= code #x1167) (= code #x1169) - (<= #x116D code #x116E) (<= #x1172 code #x1173) (= code #x1175) - (= code #x119E) (= code #x11A8) (= code #x11AB) (<= #x11AE code #x11AF) - (<= #x11B7 code #x11B8) (= code #x11BA) (<= #x11BC code #x11C2) - (= code #x11EB) (= code #x11F0) (= code #x11F9) (<= #x1E00 code #x1E9B) - (<= #x1EA0 code #x1EF9) (<= #x1F00 code #x1F15) - (<= #x1F18 code #x1F1D) (<= #x1F20 code #x1F45) - (<= #x1F48 code #x1F4D) (<= #x1F50 code #x1F57) (= code #x1F59) - (= code #x1F5B) (= code #x1F5D) (<= #x1F5F code #x1F7D) - (<= #x1F80 code #x1FB4) (<= #x1FB6 code #x1FBC) (= code #x1FBE) - (<= #x1FC2 code #x1FC4) (<= #x1FC6 code #x1FCC) - (<= #x1FD0 code #x1FD3) (<= #x1FD6 code #x1FDB) - (<= #x1FE0 code #x1FEC) (<= #x1FF2 code #x1FF4) - (<= #x1FF6 code #x1FFC) (= code #x2126) (<= #x212A code #x212B) - (= code #x212E) (<= #x2180 code #x2182) (<= #x3041 code #x3094) - (<= #x30A1 code #x30FA) (<= #x3105 code #x312C) - (<= #xAC00 code #xD7A3) - ))) + (<= #x00C0 code #x00D6) (<= #x00D8 code #x00F6) + (<= #x00F8 code #x00FF) (<= #x0100 code #x0131) + (<= #x0134 code #x013E) (<= #x0141 code #x0148) + (<= #x014A code #x017E) (<= #x0180 code #x01C3) + (<= #x01CD code #x01F0) (<= #x01F4 code #x01F5) + (<= #x01FA code #x0217) (<= #x0250 code #x02A8) + (<= #x02BB code #x02C1) (= code #x0386) (<= #x0388 code #x038A) + (= code #x038C) (<= #x038E code #x03A1) (<= #x03A3 code #x03CE) + (<= #x03D0 code #x03D6) (= code #x03DA) (= code #x03DC) (= code #x03DE) + (= code #x03E0) (<= #x03E2 code #x03F3) (<= #x0401 code #x040C) + (<= #x040E code #x044F) (<= #x0451 code #x045C) + (<= #x045E code #x0481) (<= #x0490 code #x04C4) + (<= #x04C7 code #x04C8) (<= #x04CB code #x04CC) + (<= #x04D0 code #x04EB) (<= #x04EE code #x04F5) + (<= #x04F8 code #x04F9) (<= #x0531 code #x0556) (= code #x0559) + (<= #x0561 code #x0586) (<= #x05D0 code #x05EA) + (<= #x05F0 code #x05F2) (<= #x0621 code #x063A) + (<= #x0641 code #x064A) (<= #x0671 code #x06B7) + (<= #x06BA code #x06BE) (<= #x06C0 code #x06CE) + (<= #x06D0 code #x06D3) (= code #x06D5) (<= #x06E5 code #x06E6) + (<= #x0905 code #x0939) (= code #x093D) (<= #x0958 code #x0961) + (<= #x0985 code #x098C) (<= #x098F code #x0990) + (<= #x0993 code #x09A8) (<= #x09AA code #x09B0) (= code #x09B2) + (<= #x09B6 code #x09B9) (<= #x09DC code #x09DD) + (<= #x09DF code #x09E1) (<= #x09F0 code #x09F1) + (<= #x0A05 code #x0A0A) (<= #x0A0F code #x0A10) + (<= #x0A13 code #x0A28) (<= #x0A2A code #x0A30) + (<= #x0A32 code #x0A33) (<= #x0A35 code #x0A36) + (<= #x0A38 code #x0A39) (<= #x0A59 code #x0A5C) (= code #x0A5E) + (<= #x0A72 code #x0A74) (<= #x0A85 code #x0A8B) (= code #x0A8D) + (<= #x0A8F code #x0A91) (<= #x0A93 code #x0AA8) + (<= #x0AAA code #x0AB0) (<= #x0AB2 code #x0AB3) + (<= #x0AB5 code #x0AB9) (<= #x0ABD code #x0AE0) + (<= #x0B05 code #x0B0C) (<= #x0B0F code #x0B10) + (<= #x0B13 code #x0B28) (<= #x0B2A code #x0B30) + (<= #x0B32 code #x0B33) (<= #x0B36 code #x0B39) (= code #x0B3D) + (<= #x0B5C code #x0B5D) (<= #x0B5F code #x0B61) + (<= #x0B85 code #x0B8A) (<= #x0B8E code #x0B90) + (<= #x0B92 code #x0B95) (<= #x0B99 code #x0B9A) (= code #x0B9C) + (<= #x0B9E code #x0B9F) (<= #x0BA3 code #x0BA4) + (<= #x0BA8 code #x0BAA) (<= #x0BAE code #x0BB5) + (<= #x0BB7 code #x0BB9) (<= #x0C05 code #x0C0C) + (<= #x0C0E code #x0C10) (<= #x0C12 code #x0C28) + (<= #x0C2A code #x0C33) (<= #x0C35 code #x0C39) + (<= #x0C60 code #x0C61) (<= #x0C85 code #x0C8C) + (<= #x0C8E code #x0C90) (<= #x0C92 code #x0CA8) + (<= #x0CAA code #x0CB3) (<= #x0CB5 code #x0CB9) (= code #x0CDE) + (<= #x0CE0 code #x0CE1) (<= #x0D05 code #x0D0C) + (<= #x0D0E code #x0D10) (<= #x0D12 code #x0D28) + (<= #x0D2A code #x0D39) (<= #x0D60 code #x0D61) + (<= #x0E01 code #x0E2E) (= code #x0E30) (<= #x0E32 code #x0E33) + (<= #x0E40 code #x0E45) (<= #x0E81 code #x0E82) (= code #x0E84) + (<= #x0E87 code #x0E88) (= code #x0E8A) (= code #x0E8D) + (<= #x0E94 code #x0E97) (<= #x0E99 code #x0E9F) + (<= #x0EA1 code #x0EA3) (= code #x0EA5) (= code #x0EA7) + (<= #x0EAA code #x0EAB) (<= #x0EAD code #x0EAE) (= code #x0EB0) + (<= #x0EB2 code #x0EB3) (= code #x0EBD) (<= #x0EC0 code #x0EC4) + (<= #x0F40 code #x0F47) (<= #x0F49 code #x0F69) + (<= #x10A0 code #x10C5) (<= #x10D0 code #x10F6) (= code #x1100) + (<= #x1102 code #x1103) (<= #x1105 code #x1107) (= code #x1109) + (<= #x110B code #x110C) (<= #x110E code #x1112) (= code #x113C) + (= code #x113E) (= code #x1140) (= code #x114C) (= code #x114E) (= code #x1150) + (<= #x1154 code #x1155) (= code #x1159) (<= #x115F code #x1161) + (= code #x1163) (= code #x1165) (= code #x1167) (= code #x1169) + (<= #x116D code #x116E) (<= #x1172 code #x1173) (= code #x1175) + (= code #x119E) (= code #x11A8) (= code #x11AB) (<= #x11AE code #x11AF) + (<= #x11B7 code #x11B8) (= code #x11BA) (<= #x11BC code #x11C2) + (= code #x11EB) (= code #x11F0) (= code #x11F9) (<= #x1E00 code #x1E9B) + (<= #x1EA0 code #x1EF9) (<= #x1F00 code #x1F15) + (<= #x1F18 code #x1F1D) (<= #x1F20 code #x1F45) + (<= #x1F48 code #x1F4D) (<= #x1F50 code #x1F57) (= code #x1F59) + (= code #x1F5B) (= code #x1F5D) (<= #x1F5F code #x1F7D) + (<= #x1F80 code #x1FB4) (<= #x1FB6 code #x1FBC) (= code #x1FBE) + (<= #x1FC2 code #x1FC4) (<= #x1FC6 code #x1FCC) + (<= #x1FD0 code #x1FD3) (<= #x1FD6 code #x1FDB) + (<= #x1FE0 code #x1FEC) (<= #x1FF2 code #x1FF4) + (<= #x1FF6 code #x1FFC) (= code #x2126) (<= #x212A code #x212B) + (= code #x212E) (<= #x2180 code #x2182) (<= #x3041 code #x3094) + (<= #x30A1 code #x30FA) (<= #x3105 code #x312C) + (<= #xAC00 code #xD7A3) + ))) (defun xml-ideographic-p (char) (declare (optimize (speed 3) (safety 1))) @@ -171,63 +171,63 @@ (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (<= #x0300 code #x0345) (<= #x0360 code #x0361) - (<= #x0483 code #x0486) (<= #x0591 code #x05A1) - (<= #x05A3 code #x05B9) (<= #x05BB code #x05BD) (= code #x05BF) - (<= #x05C1 code #x05C2) (= code #x05C4) (<= #x064B code #x0652) - (= code #x0670) (<= #x06D6 code #x06DC) (<= #x06DD code #x06DF) - (<= #x06E0 code #x06E4) (<= #x06E7 code #x06E8) - (<= #x06EA code #x06ED) (<= #x0901 code #x0903) (= code #x093C) - (<= #x093E code #x094C) (= code #x094D) (<= #x0951 code #x0954) - (<= #x0962 code #x0963) (<= #x0981 code #x0983) (= code #x09BC) - (<= #x09BE code #x09BF) (<= #x09C0 code #x09C4) - (<= #x09C7 code #x09C8) (<= #x09CB code #x09CD) (= code #x09D7) - (<= #x09E2 code #x09E3) (= code #x0A02) (= code #x0A3C) (= code #x0A3E) - (= code #x0A3F) (<= #x0A40 code #x0A42) (<= #x0A47 code #x0A48) - (<= #x0A4B code #x0A4D) (<= #x0A70 code #x0A71) - (<= #x0A81 code #x0A83) (= code #x0ABC) (<= #x0ABE code #x0AC5) - (<= #x0AC7 code #x0AC9) (<= #x0ACB code #x0ACD) - (<= #x0B01 code #x0B03) (= code #x0B3C) (<= #x0B3E code #x0B43) - (<= #x0B47 code #x0B48) (<= #x0B4B code #x0B4D) - (<= #x0B56 code #x0B57) (<= #x0B82 code #x0B83) - (<= #x0BBE code #x0BC2) (<= #x0BC6 code #x0BC8) - (<= #x0BCA code #x0BCD) (= code #x0BD7) (<= #x0C01 code #x0C03) - (<= #x0C3E code #x0C44) (<= #x0C46 code #x0C48) - (<= #x0C4A code #x0C4D) (<= #x0C55 code #x0C56) - (<= #x0C82 code #x0C83) (<= #x0CBE code #x0CC4) - (<= #x0CC6 code #x0CC8) (<= #x0CCA code #x0CCD) - (<= #x0CD5 code #x0CD6) (<= #x0D02 code #x0D03) - (<= #x0D3E code #x0D43) (<= #x0D46 code #x0D48) - (<= #x0D4A code #x0D4D) (= code #x0D57) (= code #x0E31) - (<= #x0E34 code #x0E3A) (<= #x0E47 code #x0E4E) (= code #x0EB1) - (<= #x0EB4 code #x0EB9) (<= #x0EBB code #x0EBC) - (<= #x0EC8 code #x0ECD) (<= #x0F18 code #x0F19) (= code #x0F35) - (= code #x0F37) (= code #x0F39) (= code #x0F3E) (= code #x0F3F) - (<= #x0F71 code #x0F84) (<= #x0F86 code #x0F8B) - (<= #x0F90 code #x0F95) (= code #x0F97) (<= #x0F99 code #x0FAD) - (<= #x0FB1 code #x0FB7) (= code #x0FB9) (<= #x20D0 code #x20DC) - (= code #x20E1) (<= #x302A code #x302F) (= code #x3099) (= code #x309A) - ))) + (<= #x0483 code #x0486) (<= #x0591 code #x05A1) + (<= #x05A3 code #x05B9) (<= #x05BB code #x05BD) (= code #x05BF) + (<= #x05C1 code #x05C2) (= code #x05C4) (<= #x064B code #x0652) + (= code #x0670) (<= #x06D6 code #x06DC) (<= #x06DD code #x06DF) + (<= #x06E0 code #x06E4) (<= #x06E7 code #x06E8) + (<= #x06EA code #x06ED) (<= #x0901 code #x0903) (= code #x093C) + (<= #x093E code #x094C) (= code #x094D) (<= #x0951 code #x0954) + (<= #x0962 code #x0963) (<= #x0981 code #x0983) (= code #x09BC) + (<= #x09BE code #x09BF) (<= #x09C0 code #x09C4) + (<= #x09C7 code #x09C8) (<= #x09CB code #x09CD) (= code #x09D7) + (<= #x09E2 code #x09E3) (= code #x0A02) (= code #x0A3C) (= code #x0A3E) + (= code #x0A3F) (<= #x0A40 code #x0A42) (<= #x0A47 code #x0A48) + (<= #x0A4B code #x0A4D) (<= #x0A70 code #x0A71) + (<= #x0A81 code #x0A83) (= code #x0ABC) (<= #x0ABE code #x0AC5) + (<= #x0AC7 code #x0AC9) (<= #x0ACB code #x0ACD) + (<= #x0B01 code #x0B03) (= code #x0B3C) (<= #x0B3E code #x0B43) + (<= #x0B47 code #x0B48) (<= #x0B4B code #x0B4D) + (<= #x0B56 code #x0B57) (<= #x0B82 code #x0B83) + (<= #x0BBE code #x0BC2) (<= #x0BC6 code #x0BC8) + (<= #x0BCA code #x0BCD) (= code #x0BD7) (<= #x0C01 code #x0C03) + (<= #x0C3E code #x0C44) (<= #x0C46 code #x0C48) + (<= #x0C4A code #x0C4D) (<= #x0C55 code #x0C56) + (<= #x0C82 code #x0C83) (<= #x0CBE code #x0CC4) + (<= #x0CC6 code #x0CC8) (<= #x0CCA code #x0CCD) + (<= #x0CD5 code #x0CD6) (<= #x0D02 code #x0D03) + (<= #x0D3E code #x0D43) (<= #x0D46 code #x0D48) + (<= #x0D4A code #x0D4D) (= code #x0D57) (= code #x0E31) + (<= #x0E34 code #x0E3A) (<= #x0E47 code #x0E4E) (= code #x0EB1) + (<= #x0EB4 code #x0EB9) (<= #x0EBB code #x0EBC) + (<= #x0EC8 code #x0ECD) (<= #x0F18 code #x0F19) (= code #x0F35) + (= code #x0F37) (= code #x0F39) (= code #x0F3E) (= code #x0F3F) + (<= #x0F71 code #x0F84) (<= #x0F86 code #x0F8B) + (<= #x0F90 code #x0F95) (= code #x0F97) (<= #x0F99 code #x0FAD) + (<= #x0FB1 code #x0FB7) (= code #x0FB9) (<= #x20D0 code #x20DC) + (= code #x20E1) (<= #x302A code #x302F) (= code #x3099) (= code #x309A) + ))) (defun xml-digit-p (char) (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (<= #x0030 code #x0039) (<= #x0660 code #x0669) - (<= #x06F0 code #x06F9) (<= #x0966 code #x096F) - (<= #x09E6 code #x09EF) (<= #x0A66 code #x0A6F) - (<= #x0AE6 code #x0AEF) (<= #x0B66 code #x0B6F) - (<= #x0BE7 code #x0BEF) (<= #x0C66 code #x0C6F) - (<= #x0CE6 code #x0CEF) (<= #x0D66 code #x0D6F) - (<= #x0E50 code #x0E59) (<= #x0ED0 code #x0ED9) - (<= #x0F20 code #x0F29) - ))) + (<= #x06F0 code #x06F9) (<= #x0966 code #x096F) + (<= #x09E6 code #x09EF) (<= #x0A66 code #x0A6F) + (<= #x0AE6 code #x0AEF) (<= #x0B66 code #x0B6F) + (<= #x0BE7 code #x0BEF) (<= #x0C66 code #x0C6F) + (<= #x0CE6 code #x0CEF) (<= #x0D66 code #x0D6F) + (<= #x0E50 code #x0E59) (<= #x0ED0 code #x0ED9) + (<= #x0F20 code #x0F29) + ))) (defun xml-extender-p (char) (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (= code #x00B7) (= code #x02D0) (= code #x02D1) (= code #x0387) (= code #x0640) - (= code #x0E46) (= code #x0EC6) (= code #x3005) (<= #x3031 code #x3035) - (<= #x309D code #x309E) (<= #x30FC code #x30FE) - ))) + (= code #x0E46) (= code #x0EC6) (= code #x3005) (<= #x3031 code #x3035) + (<= #x309D code #x309E) (<= #x30FC code #x30FE) + ))) (defmacro xml-letter-p (char) `(or (xml-base-char-p ,char) (xml-ideographic-p ,char))) diff --git a/pxml1.cl b/pxml1.cl index ce9fa65..3ddd101 100644 --- a/pxml1.cl +++ b/pxml1.cl @@ -33,22 +33,22 @@ (declare (optimize (speed 3) (safety 1))) (without-scheduling (do ((cols *collectors* (cdr cols))) - ((null cols) - ; toss it away - nil) + ((null cols) + ; toss it away + nil) (if* (null (car cols)) - then (setf (car cols) col) - (return))))) + then (setf (car cols) col) + (return))))) (defun pub-id-char-p (char) (declare (optimize (speed 3) (safety 1))) (let ((code (char-code char))) (or (= #x20 code) (= #xD code) (= #xA code) - (<= (char-code #\a) code (char-code #\z)) - (<= (char-code #\A) code (char-code #\Z)) - (<= (char-code #\0) code (char-code #\9)) - (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\? - #\; #\! #\* #\# #\@ #\$ #\_ #\%))))) + (<= (char-code #\a) code (char-code #\z)) + (<= (char-code #\A) code (char-code #\Z)) + (<= (char-code #\0) code (char-code #\9)) + (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\? + #\; #\! #\* #\# #\@ #\$ #\_ #\%))))) (defparameter *keyword-package* (find-package :keyword)) @@ -85,21 +85,21 @@ (let (buf) (without-scheduling (do* ((bufs *tokenbufs* (cdr bufs)) - (this (car bufs) (car bufs))) - ((null bufs)) - (if* this - then (setf (car bufs) nil) - (setq buf this) - (return)))) + (this (car bufs) (car bufs))) + ((null bufs)) + (if* this + then (setf (car bufs) nil) + (setq buf this) + (return)))) (if* buf then (setf (tokenbuf-cur buf) 0) - (setf (tokenbuf-max buf) 0) - (setf (tokenbuf-stream buf) nil) - buf + (setf (tokenbuf-max buf) 0) + (setf (tokenbuf-stream buf) nil) + buf else (make-tokenbuf - :cur 0 - :max 0 - :data (make-array 1024 :element-type 'character))))) + :cur 0 + :max 0 + :data (make-array 1024 :element-type 'character))))) (defstruct collector next ; next index to set @@ -113,57 +113,57 @@ (if* (not ns-to-package) then (excl::intern* (collector-data coll) (collector-next coll) package) else - (let (new-package (data (collector-data coll))) - (if* (and (eq (schar data 0) #\x) - (eq (schar data 1) #\m) - (eq (schar data 2) #\l) - (eq (schar data 3) #\n) - (eq (schar data 4) #\s) - (or (eq (schar data 5) #\:) - (= (collector-next coll) 5))) - then ;; putting xmlns: in :none namespace - (setf new-package (assoc :none ns-to-package)) - (when new-package (setf package (rest new-package))) - (excl::intern* (collector-data coll) (collector-next coll) package) - else - (let ((colon-index -1) - (data (collector-data coll))) - (dotimes (i (collector-next coll)) - (when (eq (schar data i) #\:) - (setf colon-index i) - (return))) - (if* (> colon-index -1) then - (let ((string1 (make-string colon-index)) - new-package string2) - (dotimes (i colon-index) - (setf (schar string1 i) (schar data i))) - (setf new-package (assoc string1 ns-to-package :test 'string=)) - (if* new-package - then - (setf string2 (make-string (- (collector-next coll) - (+ 1 colon-index)))) - (dotimes (i (- (collector-next coll) - (+ 1 colon-index))) - (setf (schar string2 i) - (schar data (+ colon-index 1 i)))) - (excl::intern string2 (rest new-package)) - else - (excl::intern* (collector-data coll) - (collector-next coll) package))) - else - (let ((new-package (assoc :none ns-to-package))) - (when new-package - (setf package (rest new-package)))) - (excl::intern* (collector-data coll) - (collector-next coll) package))) - )) - )) + (let (new-package (data (collector-data coll))) + (if* (and (eq (schar data 0) #\x) + (eq (schar data 1) #\m) + (eq (schar data 2) #\l) + (eq (schar data 3) #\n) + (eq (schar data 4) #\s) + (or (eq (schar data 5) #\:) + (= (collector-next coll) 5))) + then ;; putting xmlns: in :none namespace + (setf new-package (assoc :none ns-to-package)) + (when new-package (setf package (rest new-package))) + (excl::intern* (collector-data coll) (collector-next coll) package) + else + (let ((colon-index -1) + (data (collector-data coll))) + (dotimes (i (collector-next coll)) + (when (eq (schar data i) #\:) + (setf colon-index i) + (return))) + (if* (> colon-index -1) then + (let ((string1 (make-string colon-index)) + new-package string2) + (dotimes (i colon-index) + (setf (schar string1 i) (schar data i))) + (setf new-package (assoc string1 ns-to-package :test 'string=)) + (if* new-package + then + (setf string2 (make-string (- (collector-next coll) + (+ 1 colon-index)))) + (dotimes (i (- (collector-next coll) + (+ 1 colon-index))) + (setf (schar string2 i) + (schar data (+ colon-index 1 i)))) + (excl::intern string2 (rest new-package)) + else + (excl::intern* (collector-data coll) + (collector-next coll) package))) + else + (let ((new-package (assoc :none ns-to-package))) + (when new-package + (setf package (rest new-package)))) + (excl::intern* (collector-data coll) + (collector-next coll) package))) + )) + )) (defun compute-coll-string (coll) (declare (optimize (speed 3) (safety 1))) ;; return the string that's in the collection (let ((str (make-string (collector-next coll))) - (from (collector-data coll))) + (from (collector-data coll))) (dotimes (i (collector-next coll)) (setf (schar str i) (schar from i))) @@ -174,7 +174,7 @@ ;; increase the size of the data portion of the collector and then ;; add the given char at the end (let* ((odata (collector-data coll)) - (ndata (make-string (* 2 (length odata))))) + (ndata (make-string (* 2 (length odata))))) (dotimes (i (length odata)) (setf (schar ndata i) (schar odata i))) (setf (collector-data coll) ndata) @@ -187,82 +187,82 @@ (declare (optimize (speed 3) (safety 1))) (without-scheduling (do ((bufs *tokenbufs* (cdr bufs))) - ((null bufs) - ; toss it away - nil) + ((null bufs) + ; toss it away + nil) (if* (null (car bufs)) - then (setf (car bufs) buf) - (return))))) + then (setf (car bufs) buf) + (return))))) (defun get-collector () (declare (optimize (speed 3) (safety 1))) (let (col) (without-scheduling (do* ((cols *collectors* (cdr cols)) - (this (car cols) (car cols))) - ((null cols)) - (if* this - then (setf (car cols) nil) - (setq col this) - (return)))) + (this (car cols) (car cols))) + ((null cols)) + (if* this + then (setf (car cols) nil) + (setq col this) + (return)))) (if* col then (setf (collector-next col) 0) - col + col else (make-collector - :next 0 - :max 100 - :data (make-string 100))))) + :next 0 + :max 100 + :data (make-string 100))))) (defmacro next-char (tokenbuf read-sequence-func) `(let ((cur (tokenbuf-cur ,tokenbuf)) - (tb (tokenbuf-data ,tokenbuf))) + (tb (tokenbuf-data ,tokenbuf))) (if* (>= cur (tokenbuf-max ,tokenbuf)) - then ;; fill buffer - (if* (or (not (tokenbuf-stream ,tokenbuf)) - (zerop (setf (tokenbuf-max ,tokenbuf) - (if* ,read-sequence-func - then (funcall ,read-sequence-func tb - (tokenbuf-stream ,tokenbuf)) - else (read-sequence tb (tokenbuf-stream ,tokenbuf)))))) - then (setq cur nil) ;; eof - else (setq cur 0))) + then ;; fill buffer + (if* (or (not (tokenbuf-stream ,tokenbuf)) + (zerop (setf (tokenbuf-max ,tokenbuf) + (if* ,read-sequence-func + then (funcall ,read-sequence-func tb + (tokenbuf-stream ,tokenbuf)) + else (read-sequence tb (tokenbuf-stream ,tokenbuf)))))) + then (setq cur nil) ;; eof + else (setq cur 0))) (if* cur - then (prog1 - (let ((cc (schar tb cur))) - (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc)) - (setf (tokenbuf-cur ,tokenbuf) (1+ cur)))))) + then (prog1 + (let ((cc (schar tb cur))) + (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc)) + (setf (tokenbuf-cur ,tokenbuf) (1+ cur)))))) (defun get-next-char (iostruct) (declare (optimize (speed 3) (safety 1))) (let* (from-stream (tmp-char - (let (char) - (if* (iostruct-unget-char iostruct) then - ;; from-stream is used to do input CR/LF normalization - (setf from-stream t) - (setf char (first (iostruct-unget-char iostruct))) - (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct))) - char - elseif (iostruct-entity-bufs iostruct) then - (let (entity-buf) - (loop - (setf entity-buf (first (iostruct-entity-bufs iostruct))) - (if* (streamp (tokenbuf-stream entity-buf)) - then (setf from-stream t) - else (setf from-stream nil)) - (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct))) - (when char (return)) - (when (streamp (tokenbuf-stream entity-buf)) - (close (tokenbuf-stream entity-buf)) - (put-back-tokenbuf entity-buf)) - (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct))) - (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct))) - (when (not (iostruct-entity-bufs iostruct)) (return)))) - (if* char then char - else (next-char (iostruct-tokenbuf iostruct) - (iostruct-read-sequence-func iostruct))) - else (setf from-stream t) - (next-char (iostruct-tokenbuf iostruct) - (iostruct-read-sequence-func iostruct)))))) + (let (char) + (if* (iostruct-unget-char iostruct) then + ;; from-stream is used to do input CR/LF normalization + (setf from-stream t) + (setf char (first (iostruct-unget-char iostruct))) + (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct))) + char + elseif (iostruct-entity-bufs iostruct) then + (let (entity-buf) + (loop + (setf entity-buf (first (iostruct-entity-bufs iostruct))) + (if* (streamp (tokenbuf-stream entity-buf)) + then (setf from-stream t) + else (setf from-stream nil)) + (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct))) + (when char (return)) + (when (streamp (tokenbuf-stream entity-buf)) + (close (tokenbuf-stream entity-buf)) + (put-back-tokenbuf entity-buf)) + (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct))) + (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct))) + (when (not (iostruct-entity-bufs iostruct)) (return)))) + (if* char then char + else (next-char (iostruct-tokenbuf iostruct) + (iostruct-read-sequence-func iostruct))) + else (setf from-stream t) + (next-char (iostruct-tokenbuf iostruct) + (iostruct-read-sequence-func iostruct)))))) (if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char))) (defun unicode-check (p tokenbuf) @@ -275,90 +275,90 @@ #+allegro (let ((format (ignore-errors (excl:sniff-for-unicode p)))) (if* (eq format (find-external-format :unicode)) - then - (setf (stream-external-format p) format) - else - (setf (stream-external-format p) (find-external-format :utf8)))) + then + (setf (stream-external-format p) format) + else + (setf (stream-external-format p) (find-external-format :utf8)))) #-allegro (let* ((c (read-char p nil)) c2 - (c-code (if c (char-code c) nil))) + (c-code (if c (char-code c) nil))) (if* (eq #xFF c-code) then - (setf c2 (read-char p nil)) - (setf c-code (if c (char-code c2) nil)) - (if* (eq #xFE c-code) then - (format t "set unicode~%") - (setf (stream-external-format p) - (find-external-format - #+allegro :unicode - #-allegro :fat-little)) - else - (xml-error "stream has incomplete Unicode marker")) - else (setf (stream-external-format p) - (find-external-format :utf8)) - (when c - (push c (iostruct-unget-char tokenbuf)) - #+ignore (unread-char c p) ;; bug when there is single ^M in file - ))))) + (setf c2 (read-char p nil)) + (setf c-code (if c (char-code c2) nil)) + (if* (eq #xFE c-code) then + (format t "set unicode~%") + (setf (stream-external-format p) + (find-external-format + #+allegro :unicode + #-allegro :fat-little)) + else + (xml-error "stream has incomplete Unicode marker")) + else (setf (stream-external-format p) + (find-external-format :utf8)) + (when c + (push c (iostruct-unget-char tokenbuf)) + #+ignore (unread-char c p) ;; bug when there is single ^M in file + ))))) (defun add-default-values (val attlist-data) (declare (ignorable old-coll) (optimize (speed 3) (safety 1))) (if* (symbolp val) then - (let* ((tag-defaults (assoc val attlist-data)) defaults) - (dolist (def (rest tag-defaults)) - (if* (stringp (third def)) then - (push (first def) defaults) - (push (if (eq (second def) :CDATA) (third def) - (normalize-attrib-value (third def))) defaults) - elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then - (push (first def) defaults) - (push (if (eq (second def) :CDATA) (fourth def) - (normalize-attrib-value (fourth def))) defaults) - )) - (if* defaults then - (setf val (append (list val) (nreverse defaults))) - else val) - ) + (let* ((tag-defaults (assoc val attlist-data)) defaults) + (dolist (def (rest tag-defaults)) + (if* (stringp (third def)) then + (push (first def) defaults) + (push (if (eq (second def) :CDATA) (third def) + (normalize-attrib-value (third def))) defaults) + elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then + (push (first def) defaults) + (push (if (eq (second def) :CDATA) (fourth def) + (normalize-attrib-value (fourth def))) defaults) + )) + (if* defaults then + (setf val (append (list val) (nreverse defaults))) + else val) + ) else - ;; first make sure there are no errors in given list - (let ((pairs (rest val))) - (loop - (when (null pairs) (return)) - (let ((this-one (first pairs))) - (setf pairs (rest (rest pairs))) - (when (member this-one pairs) - (xml-error (concatenate 'string "Entity: " - (string (first val)) - " has multiple " - (string this-one) - " attribute values")))))) - (let ((tag-defaults (assoc (first val) attlist-data)) defaults) - (dolist (def (rest tag-defaults)) - (let ((old (member (first def) (rest val)))) - (if* (not old) then - (if* (stringp (third def)) then - (push (first def) defaults) - (push (third def) defaults) - elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then - (push (first def) defaults) - (push (fourth def) defaults)) - else - (push (first old) defaults) - (push (second old) defaults)))) - (if* defaults then - ;; now look for attributes in original list that weren't in dtd - (let ((tmp-val (rest val)) att att-val) - (loop - (when (null tmp-val) (return)) - (setf att (first tmp-val)) - (setf att-val (second tmp-val)) - (setf tmp-val (rest (rest tmp-val))) - (when (not (member att defaults)) - (push att defaults) - (push att-val defaults)))) - (setf val (append (list (first val)) (nreverse defaults))) - else val)) - )) + ;; first make sure there are no errors in given list + (let ((pairs (rest val))) + (loop + (when (null pairs) (return)) + (let ((this-one (first pairs))) + (setf pairs (rest (rest pairs))) + (when (member this-one pairs) + (xml-error (concatenate 'string "Entity: " + (string (first val)) + " has multiple " + (string this-one) + " attribute values")))))) + (let ((tag-defaults (assoc (first val) attlist-data)) defaults) + (dolist (def (rest tag-defaults)) + (let ((old (member (first def) (rest val)))) + (if* (not old) then + (if* (stringp (third def)) then + (push (first def) defaults) + (push (third def) defaults) + elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then + (push (first def) defaults) + (push (fourth def) defaults)) + else + (push (first old) defaults) + (push (second old) defaults)))) + (if* defaults then + ;; now look for attributes in original list that weren't in dtd + (let ((tmp-val (rest val)) att att-val) + (loop + (when (null tmp-val) (return)) + (setf att (first tmp-val)) + (setf att-val (second tmp-val)) + (setf tmp-val (rest (rest tmp-val))) + (when (not (member att defaults)) + (push att defaults) + (push att-val defaults)))) + (setf val (append (list (first val)) (nreverse defaults))) + else val)) + )) (defun normalize-public-value (public-value) (setf public-value (string-trim '(#\space) public-value)) @@ -367,11 +367,11 @@ (when (= count stop) (return public-value)) (setf cch (schar public-value count)) (if* (and (eq cch #\space) (eq last-ch #\space)) then - (setf public-value - (remove #\space public-value :start count :count 1)) - (decf stop) - else (incf count) - (setf last-ch cch))))) + (setf public-value + (remove #\space public-value :start count :count 1)) + (decf stop) + else (incf count) + (setf last-ch cch))))) (defun normalize-attrib-value (attrib-value &optional first-pass) @@ -379,71 +379,71 @@ (when first-pass (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch) (loop - (when (= count stop) (return)) - (setf cch (schar attrib-value count)) - (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space) - elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then - (setf (schar attrib-value count) #\space) - elseif (and (eq cch #\newline) (eq last-ch #\return)) then - (setf attrib-value - (remove #\space attrib-value :start count :count 1)) - (decf stop)) - (incf count) - (setf last-ch cch)))) + (when (= count stop) (return)) + (setf cch (schar attrib-value count)) + (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space) + elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then + (setf (schar attrib-value count) #\space) + elseif (and (eq cch #\newline) (eq last-ch #\return)) then + (setf attrib-value + (remove #\space attrib-value :start count :count 1)) + (decf stop)) + (incf count) + (setf last-ch cch)))) (setf attrib-value (string-trim '(#\space) attrib-value)) (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch) (loop (when (= count stop) (return attrib-value)) (setf cch (schar attrib-value count)) (if* (and (eq cch #\space) (eq last-ch #\space)) then - (setf attrib-value - (remove #\space attrib-value :start count :count 1)) - (decf stop) - else (incf count) - (setf last-ch cch))))) + (setf attrib-value + (remove #\space attrib-value :start count :count 1)) + (decf stop) + else (incf count) + (setf last-ch cch))))) (defun check-xmldecl (val tokenbuf) (declare (ignorable old-coll) (optimize (speed 3) (safety 1))) (when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val))))) (xml-error "XML declaration tag does not include correct 'version' attribute")) (when (and (fourth val) - (or (not (symbolp (fourth val))) - (and (not (string= "standalone" (symbol-name (fourth val)))) - (not (string= "encoding" (symbol-name (fourth val))))))) + (or (not (symbolp (fourth val))) + (and (not (string= "standalone" (symbol-name (fourth val)))) + (not (string= "encoding" (symbol-name (fourth val))))))) (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute")) (when (and (fourth val) (string= "standalone" (symbol-name (fourth val)))) (if* (equal (fifth val) "yes") then - (setf (iostruct-standalonep tokenbuf) t) + (setf (iostruct-standalonep tokenbuf) t) elseif (not (equal (fifth val) "no")) then - (xml-error "XML declaration tag does not include correct 'standalone' attribute value"))) + (xml-error "XML declaration tag does not include correct 'standalone' attribute value"))) (dotimes (i (length (third val))) (let ((c (schar (third val) i))) (when (and (not (alpha-char-p c)) - (not (digit-char-p c)) - (not (member c '(#\. #\_ #\- #\:))) - ) - (xml-error "XML declaration tag does not include correct 'version' attribute value")))) + (not (digit-char-p c)) + (not (member c '(#\. #\_ #\- #\:))) + ) + (xml-error "XML declaration tag does not include correct 'version' attribute value")))) (if* (and (fourth val) (eql :encoding (fourth val))) then (dotimes (i (length (fifth val))) - (let ((c (schar (fifth val) i))) - (when (and (not (alpha-char-p c)) - (if* (> i 0) then - (and (not (digit-char-p c)) - (not (member c '(#\. #\_ #\-)))) - else t)) - (xml-error "XML declaration tag does not include correct 'encoding' attribute value")))) - ;; jkf 3/26/02 - ;; if we have a stream we're reading from set its external-format - ;; to the encoding - ;; note - tokenbuf is really an iostruct, not a tokenbuf + (let ((c (schar (fifth val) i))) + (when (and (not (alpha-char-p c)) + (if* (> i 0) then + (and (not (digit-char-p c)) + (not (member c '(#\. #\_ #\-)))) + else t)) + (xml-error "XML declaration tag does not include correct 'encoding' attribute value")))) + ;; jkf 3/26/02 + ;; if we have a stream we're reading from set its external-format + ;; to the encoding + ;; note - tokenbuf is really an iostruct, not a tokenbuf #+allegro - (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) - then (setf (stream-external-format - (tokenbuf-stream (iostruct-tokenbuf tokenbuf))) - (find-external-format (fifth val)))) - - - )) + (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) + then (setf (stream-external-format + (tokenbuf-stream (iostruct-tokenbuf tokenbuf))) + (find-external-format (fifth val)))) + + + )) (defun xml-error (text) (declare (optimize (speed 3) (safety 1))) diff --git a/pxml2.cl b/pxml2.cl index f1b88f3..dcb697b 100644 --- a/pxml2.cl +++ b/pxml2.cl @@ -32,18 +32,18 @@ (defvar *debug-xml* nil) (defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities - content-only uri-to-package) + content-only uri-to-package) (declare (optimize (speed 3) (safety 1))) (parse-xml (make-string-input-stream str) :external-callback external-callback - :general-entities general-entities - :parameter-entities parameter-entities :content-only content-only - :uri-to-package uri-to-package)) + :general-entities general-entities + :parameter-entities parameter-entities :content-only content-only + :uri-to-package uri-to-package)) (defmethod parse-xml ((p stream) &key external-callback general-entities - parameter-entities content-only uri-to-package) + parameter-entities content-only uri-to-package) (declare (optimize (speed 3) (safety 1))) (pxml-internal0 p nil external-callback general-entities parameter-entities content-only - uri-to-package)) + uri-to-package)) (eval-when (compile load eval) (defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element @@ -58,11 +58,11 @@ (when (not (xml-space-p (elt val i))) (return nil)))) (defun pxml-internal0 (p read-sequence-func external-callback - general-entities parameter-entities content-only uri-to-package) + general-entities parameter-entities content-only uri-to-package) (declare (optimize (speed 3) (safety 1))) (let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf) - :do-entity t - :read-sequence-func read-sequence-func))) + :do-entity t + :read-sequence-func read-sequence-func))) ;; set up stream right (setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p) ;; set up user specified entities @@ -72,402 +72,402 @@ ;; look for Unicode file (unicode-check p tokenbuf) (unwind-protect - (values (pxml-internal tokenbuf external-callback content-only) - (iostruct-uri-to-package tokenbuf)) + (values (pxml-internal tokenbuf external-callback content-only) + (iostruct-uri-to-package tokenbuf)) (dolist (entity-buf (iostruct-entity-bufs tokenbuf)) - (when (streamp (tokenbuf-stream entity-buf)) - (close (tokenbuf-stream entity-buf)) - (put-back-tokenbuf entity-buf)))) + (when (streamp (tokenbuf-stream entity-buf)) + (close (tokenbuf-stream entity-buf)) + (put-back-tokenbuf entity-buf)))) )) (defun pxml-internal (tokenbuf external-callback content-only) (declare (optimize (speed 3) (safety 1))) (let ((state state-docstart) - (guts) - (pending) - (attlist-data) - (public-string) - (system-string) - (entity-open-tags) - ) + (guts) + (pending) + (attlist-data) + (public-string) + (system-string) + (entity-open-tags) + ) (loop (multiple-value-bind (val kind kind2) - (next-token tokenbuf external-callback attlist-data) - (when *debug-xml* - (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state)) - (case state - (#.state-docstart - (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag)) - then - (check-xmldecl val tokenbuf) - (when (not content-only) (push val guts)) - (setf state state-docstart-misc) - elseif (eq kind :comment) - then - (when (not content-only) (push val guts)) - (setf state state-docstart-misc) - elseif (and (listp val) (eq :DOCTYPE (first val))) - then - (if* (eq (third val) :SYSTEM) then - (setf system-string (fourth val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val)) - elseif (eq (third val) :PUBLIC) then - (setf public-string (normalize-public-value (fourth val))) - (setf system-string (fifth val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val))) - (when system-string - (if* external-callback then - (let ((ext-stream (apply external-callback - (list (parse-uri system-string) - :DOCTYPE - public-string - )))) - (when ext-stream - (let (ext-io (entity-buf (get-tokenbuf))) - (setf (tokenbuf-stream entity-buf) ext-stream) - (setf ext-io (make-iostruct :tokenbuf entity-buf - :do-entity - (iostruct-do-entity tokenbuf) - :read-sequence-func - (iostruct-read-sequence-func tokenbuf))) - (unicode-check ext-stream ext-io) - (setf (iostruct-parameter-entities ext-io) - (iostruct-parameter-entities tokenbuf)) - (setf (iostruct-general-entities ext-io) - (iostruct-general-entities tokenbuf)) - (unwind-protect - (setf val (append val - (list (append - (list :external) - (parse-dtd - ext-io - t external-callback))))) - (setf (iostruct-seen-any-dtd tokenbuf) t) - (setf (iostruct-seen-external-dtd tokenbuf) t) - (setf (iostruct-seen-parameter-reference tokenbuf) - (iostruct-seen-parameter-reference ext-io)) - (setf (iostruct-general-entities tokenbuf) - (iostruct-general-entities ext-io)) - (setf (iostruct-parameter-entities tokenbuf) - (iostruct-parameter-entities ext-io)) - (setf (iostruct-do-entity tokenbuf) - (iostruct-do-entity ext-io)) - (dolist (entity-buf2 (iostruct-entity-bufs ext-io)) - (when (streamp (tokenbuf-stream entity-buf2)) - (close (tokenbuf-stream entity-buf2)) - (put-back-tokenbuf entity-buf2))) - (close (tokenbuf-stream entity-buf)) - (put-back-tokenbuf entity-buf)) - ))) - else - (setf (iostruct-do-entity tokenbuf) nil))) - (setf attlist-data - (process-attlist (rest (rest val)) attlist-data)) - (when (not content-only) (push val guts)) - (setf state state-docstart-misc2) - elseif (eq kind :pi) - then - (push val guts) - (setf state state-docstart-misc) - elseif (eq kind :pcdata) - then - (when (or (not kind2) (not (all-xml-whitespace-p val))) - (if* (not kind2) then - (xml-error "An entity reference occured where only whitespace or the first element may occur") - else - (xml-error (concatenate 'string - "unrecognized content '" - (subseq val 0 (min (length val) 40)) "'")))) - (setf state state-docstart-misc) - elseif (or (symbolp val) - (and (listp val) (symbolp (first val)))) - then - (when (eq kind :start-tag) - (setf val (add-default-values val attlist-data))) - (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) - then (push (list val) guts) - (setf state state-element-done) - elseif (eq kind :start-tag) - then (push (list val) pending) - ;;(format t "pending: ~s guts: ~s <1>~%" pending guts) - (when (iostruct-entity-bufs tokenbuf) - (push (if (symbolp val) val (first val)) entity-open-tags)) - (setf state state-element-contents) - else (xml-error (concatenate 'string - "encountered token at illegal syntax position: '" - (string kind) "'" - (if* (null guts) then - " at start of contents" - else - (concatenate 'string - " following: '" - (format nil "~s" (first guts)) - "'"))))) - else - (print (list val kind kind2)) - (break "need to check for other allowable docstarts"))) - (#.state-docstart-misc2 - (if* (eq kind :pcdata) - then - (when (or (not kind2) (not (all-xml-whitespace-p val))) - (if* (not kind2) then - (xml-error "An entity reference occured where only whitespace or the first element may occur") - else - (xml-error (concatenate 'string - "unrecognized content '" - (subseq val 0 (min (length val) 40)) "'")))) - elseif (and (listp val) (eq :comment (first val))) - then - (when (not content-only) (push val guts)) - elseif (eq kind :pi) - then - (push val guts) - elseif (eq kind :eof) - then - (xml-error "unexpected end of file encountered") - elseif (or (symbolp val) - (and (listp val) (symbolp (first val)))) - then - (when (eq kind :start-tag) - (setf val (add-default-values val attlist-data))) - (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) - then (push (list val) guts) - (setf state state-element-done) - elseif (eq kind :start-tag) - then (push (list val) pending) - ;;(format t "pending: ~s guts: ~s <2>~%" pending guts) - (when (iostruct-entity-bufs tokenbuf) - (push (if (symbolp val) val (first val)) entity-open-tags)) - (setf state state-element-contents) - else (xml-error (concatenate 'string - "encountered token at illegal syntax position: '" - (string kind) "'" - (if* (null guts) then - " at start of contents" - else - (concatenate 'string - " following: '" - (format nil "~s" (first guts)) - "'"))))) - else - (error "this branch unexpected <1>"))) - (#.state-docstart-misc - (if* (eq kind :pcdata) - then - (when (or (not kind2) (not (all-xml-whitespace-p val))) - (if* (not kind2) then - (xml-error "An entity reference occured where only whitespace or the first element may occur") - else - (xml-error (concatenate 'string - "unrecognized content '" - (subseq val 0 (min (length val) 40)) "'")))) - elseif (and (listp val) (eq :DOCTYPE (first val))) - then - (if* (eq (third val) :SYSTEM) then - (setf system-string (fourth val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val)) - elseif (eq (third val) :PUBLIC) then - (setf public-string (normalize-public-value (fourth val))) - (setf system-string (fifth val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val)) - (setf val (remove (third val) val))) - (when system-string - (if* external-callback then - (let ((ext-stream (apply external-callback - (list (parse-uri system-string) - :DOCTYPE - public-string - )))) - (when ext-stream - (let (ext-io (entity-buf (get-tokenbuf))) - (setf (tokenbuf-stream entity-buf) ext-stream) - (setf ext-io (make-iostruct :tokenbuf entity-buf - :do-entity - (iostruct-do-entity tokenbuf) - :read-sequence-func - (iostruct-read-sequence-func tokenbuf))) - (unicode-check ext-stream ext-io) - (setf (iostruct-parameter-entities ext-io) - (iostruct-parameter-entities tokenbuf)) - (setf (iostruct-general-entities ext-io) - (iostruct-general-entities tokenbuf)) - (unwind-protect - (setf val (append val - (list (append - (list :external) - (parse-dtd - ext-io - t external-callback))))) - (setf (iostruct-seen-any-dtd tokenbuf) t) - (setf (iostruct-seen-external-dtd tokenbuf) t) - (setf (iostruct-seen-parameter-reference tokenbuf) - (iostruct-seen-parameter-reference ext-io)) - (setf (iostruct-general-entities tokenbuf) - (iostruct-general-entities ext-io)) - (setf (iostruct-parameter-entities tokenbuf) - (iostruct-parameter-entities ext-io)) - (setf (iostruct-do-entity tokenbuf) - (iostruct-do-entity ext-io)) - (dolist (entity-buf2 (iostruct-entity-bufs ext-io)) - (when (streamp (tokenbuf-stream entity-buf2)) - (close (tokenbuf-stream entity-buf2)) - (put-back-tokenbuf entity-buf2))) - (close (tokenbuf-stream entity-buf)) - (put-back-tokenbuf entity-buf)) - ))) - else - (setf (iostruct-do-entity tokenbuf) nil))) - (setf attlist-data - (process-attlist (rest (rest val)) attlist-data)) - (when (not content-only) (push val guts)) - (setf state state-docstart-misc2) - elseif (and (listp val) (eq :comment (first val))) - then - (when (not content-only) (push val guts)) - elseif (eq kind :pi) - then - (push val guts) - elseif (or (symbolp val) - (and (listp val) (symbolp (first val)))) - then - (when (eq kind :start-tag) - (setf val (add-default-values val attlist-data))) - (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) - then (push (list val) guts) - (setf state state-element-done) - elseif (eq kind :start-tag) - then (push (list val) pending) - ;;(format t "pending: ~s guts: ~s <3>~%" pending guts) - (when (iostruct-entity-bufs tokenbuf) - (push (if (symbolp val) val (first val)) entity-open-tags)) - (setf state state-element-contents) - else (xml-error (concatenate 'string - "encountered token at illegal syntax position: '" - (string kind) "'" - (concatenate 'string - " following: '" - (format nil "~s" (first guts)) - "'")))) - else - (print (list val kind kind2)) - (break "check for other docstart-misc states"))) - (#.state-element-contents - (if* (or (symbolp val) - (and (listp val) (symbolp (first val)))) - then - (when (eq kind :start-tag) - (setf val (add-default-values val attlist-data))) - (if* (eq kind :end-tag) - then (let ((candidate (first (first pending)))) - (when (listp candidate) (setf candidate (first candidate))) - (if* (eq candidate val) - then - (if* (iostruct-entity-bufs tokenbuf) then - (when (not (eq (first entity-open-tags) val)) - (xml-error - (concatenate 'string - (string val) - " element closed in entity that did not open it"))) - (setf entity-open-tags (rest entity-open-tags)) - else - (when (eq (first entity-open-tags) val) - (xml-error - (concatenate 'string - (string val) - " element closed outside of entity that did not open it"))) - ) - (if* (= (length pending) 1) - then - (push (first pending) guts) - (setf state state-element-done) - else - (setf (second pending) - (append (second pending) (list (first pending))))) - (setf pending (rest pending)) - ;;(format t "pending: ~s guts: ~s <4>~%" pending guts) - else (xml-error (format nil - "encountered end tag: ~s expected: ~s" - val candidate)))) - elseif (and (eq kind :start-tag) (eq kind2 :end-tag)) - then - (setf (first pending) - (append (first pending) (list (list val)))) - ;;(format t "pending: ~s guts: ~s <5>~%" pending guts) - elseif (eq kind :start-tag) - then - (push (list val) pending) - ;;(format t "pending: ~s guts: ~s <6>~%" pending guts) - (when (iostruct-entity-bufs tokenbuf) - (push (if (symbolp val) val (first val)) entity-open-tags)) - elseif (eq kind :cdata) then - (setf (first pending) - (append (first pending) (rest val))) - (let ((old (first pending)) - (new)) - (dolist (item old) - (if* (and (stringp (first new)) (stringp item)) then - (setf (first new) - (concatenate 'string (first new) item)) - else (push item new))) - (setf (first pending) (reverse new))) - elseif (eq kind :comment) then - (when (not content-only) (push val guts)) - elseif (eq kind :pi) - then - (setf (first pending) - (append (first pending) (list val))) - elseif (eq kind :eof) - then - (xml-error "unexpected end of file encountered") - else (xml-error (format nil "unexpected token: ~s" val))) - elseif (eq kind :pcdata) - then - (setf (first pending) - (append (first pending) (list val))) - (let ((old (first pending)) - (new)) - (dolist (item old) - (if* (and (stringp (first new)) (stringp item)) then - (setf (first new) - (concatenate 'string (first new) item)) - else (push item new))) - (setf (first pending) (reverse new))) - else (xml-error (format nil "unexpected token: ~s" val)))) - (#.state-element-done - (if* (eq kind :pcdata) - then - (when (or (not kind2) (not (all-xml-whitespace-p val))) - (if* (not kind2) then - (xml-error "An entity reference occured where only whitespace or the first element may occur") - else - (xml-error (concatenate 'string - "unrecognized content '" - (subseq val 0 (min (length val) 40)) "'")))) - elseif (eq kind :eof) then - (put-back-tokenbuf (iostruct-tokenbuf tokenbuf)) - (return (nreverse guts)) - elseif (eq kind :comment) then - (when (not content-only) (push val guts)) - elseif (eq kind :pi) - then (push val guts) - else - (xml-error (concatenate 'string - "encountered token at illegal syntax position: '" - (string kind) "'" - (concatenate 'string - " following: '" - (format nil "~s" (first guts)) - "'"))) - )) - (t - (error "need to support state:~s token:~s kind:~s kind2:~s " state val kind kind2))) - )))) + (next-token tokenbuf external-callback attlist-data) + (when *debug-xml* + (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state)) + (case state + (#.state-docstart + (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag)) + then + (check-xmldecl val tokenbuf) + (when (not content-only) (push val guts)) + (setf state state-docstart-misc) + elseif (eq kind :comment) + then + (when (not content-only) (push val guts)) + (setf state state-docstart-misc) + elseif (and (listp val) (eq :DOCTYPE (first val))) + then + (if* (eq (third val) :SYSTEM) then + (setf system-string (fourth val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val)) + elseif (eq (third val) :PUBLIC) then + (setf public-string (normalize-public-value (fourth val))) + (setf system-string (fifth val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val))) + (when system-string + (if* external-callback then + (let ((ext-stream (apply external-callback + (list (parse-uri system-string) + :DOCTYPE + public-string + )))) + (when ext-stream + (let (ext-io (entity-buf (get-tokenbuf))) + (setf (tokenbuf-stream entity-buf) ext-stream) + (setf ext-io (make-iostruct :tokenbuf entity-buf + :do-entity + (iostruct-do-entity tokenbuf) + :read-sequence-func + (iostruct-read-sequence-func tokenbuf))) + (unicode-check ext-stream ext-io) + (setf (iostruct-parameter-entities ext-io) + (iostruct-parameter-entities tokenbuf)) + (setf (iostruct-general-entities ext-io) + (iostruct-general-entities tokenbuf)) + (unwind-protect + (setf val (append val + (list (append + (list :external) + (parse-dtd + ext-io + t external-callback))))) + (setf (iostruct-seen-any-dtd tokenbuf) t) + (setf (iostruct-seen-external-dtd tokenbuf) t) + (setf (iostruct-seen-parameter-reference tokenbuf) + (iostruct-seen-parameter-reference ext-io)) + (setf (iostruct-general-entities tokenbuf) + (iostruct-general-entities ext-io)) + (setf (iostruct-parameter-entities tokenbuf) + (iostruct-parameter-entities ext-io)) + (setf (iostruct-do-entity tokenbuf) + (iostruct-do-entity ext-io)) + (dolist (entity-buf2 (iostruct-entity-bufs ext-io)) + (when (streamp (tokenbuf-stream entity-buf2)) + (close (tokenbuf-stream entity-buf2)) + (put-back-tokenbuf entity-buf2))) + (close (tokenbuf-stream entity-buf)) + (put-back-tokenbuf entity-buf)) + ))) + else + (setf (iostruct-do-entity tokenbuf) nil))) + (setf attlist-data + (process-attlist (rest (rest val)) attlist-data)) + (when (not content-only) (push val guts)) + (setf state state-docstart-misc2) + elseif (eq kind :pi) + then + (push val guts) + (setf state state-docstart-misc) + elseif (eq kind :pcdata) + then + (when (or (not kind2) (not (all-xml-whitespace-p val))) + (if* (not kind2) then + (xml-error "An entity reference occured where only whitespace or the first element may occur") + else + (xml-error (concatenate 'string + "unrecognized content '" + (subseq val 0 (min (length val) 40)) "'")))) + (setf state state-docstart-misc) + elseif (or (symbolp val) + (and (listp val) (symbolp (first val)))) + then + (when (eq kind :start-tag) + (setf val (add-default-values val attlist-data))) + (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) + then (push (list val) guts) + (setf state state-element-done) + elseif (eq kind :start-tag) + then (push (list val) pending) + ;;(format t "pending: ~s guts: ~s <1>~%" pending guts) + (when (iostruct-entity-bufs tokenbuf) + (push (if (symbolp val) val (first val)) entity-open-tags)) + (setf state state-element-contents) + else (xml-error (concatenate 'string + "encountered token at illegal syntax position: '" + (string kind) "'" + (if* (null guts) then + " at start of contents" + else + (concatenate 'string + " following: '" + (format nil "~s" (first guts)) + "'"))))) + else + (print (list val kind kind2)) + (break "need to check for other allowable docstarts"))) + (#.state-docstart-misc2 + (if* (eq kind :pcdata) + then + (when (or (not kind2) (not (all-xml-whitespace-p val))) + (if* (not kind2) then + (xml-error "An entity reference occured where only whitespace or the first element may occur") + else + (xml-error (concatenate 'string + "unrecognized content '" + (subseq val 0 (min (length val) 40)) "'")))) + elseif (and (listp val) (eq :comment (first val))) + then + (when (not content-only) (push val guts)) + elseif (eq kind :pi) + then + (push val guts) + elseif (eq kind :eof) + then + (xml-error "unexpected end of file encountered") + elseif (or (symbolp val) + (and (listp val) (symbolp (first val)))) + then + (when (eq kind :start-tag) + (setf val (add-default-values val attlist-data))) + (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) + then (push (list val) guts) + (setf state state-element-done) + elseif (eq kind :start-tag) + then (push (list val) pending) + ;;(format t "pending: ~s guts: ~s <2>~%" pending guts) + (when (iostruct-entity-bufs tokenbuf) + (push (if (symbolp val) val (first val)) entity-open-tags)) + (setf state state-element-contents) + else (xml-error (concatenate 'string + "encountered token at illegal syntax position: '" + (string kind) "'" + (if* (null guts) then + " at start of contents" + else + (concatenate 'string + " following: '" + (format nil "~s" (first guts)) + "'"))))) + else + (error "this branch unexpected <1>"))) + (#.state-docstart-misc + (if* (eq kind :pcdata) + then + (when (or (not kind2) (not (all-xml-whitespace-p val))) + (if* (not kind2) then + (xml-error "An entity reference occured where only whitespace or the first element may occur") + else + (xml-error (concatenate 'string + "unrecognized content '" + (subseq val 0 (min (length val) 40)) "'")))) + elseif (and (listp val) (eq :DOCTYPE (first val))) + then + (if* (eq (third val) :SYSTEM) then + (setf system-string (fourth val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val)) + elseif (eq (third val) :PUBLIC) then + (setf public-string (normalize-public-value (fourth val))) + (setf system-string (fifth val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val)) + (setf val (remove (third val) val))) + (when system-string + (if* external-callback then + (let ((ext-stream (apply external-callback + (list (parse-uri system-string) + :DOCTYPE + public-string + )))) + (when ext-stream + (let (ext-io (entity-buf (get-tokenbuf))) + (setf (tokenbuf-stream entity-buf) ext-stream) + (setf ext-io (make-iostruct :tokenbuf entity-buf + :do-entity + (iostruct-do-entity tokenbuf) + :read-sequence-func + (iostruct-read-sequence-func tokenbuf))) + (unicode-check ext-stream ext-io) + (setf (iostruct-parameter-entities ext-io) + (iostruct-parameter-entities tokenbuf)) + (setf (iostruct-general-entities ext-io) + (iostruct-general-entities tokenbuf)) + (unwind-protect + (setf val (append val + (list (append + (list :external) + (parse-dtd + ext-io + t external-callback))))) + (setf (iostruct-seen-any-dtd tokenbuf) t) + (setf (iostruct-seen-external-dtd tokenbuf) t) + (setf (iostruct-seen-parameter-reference tokenbuf) + (iostruct-seen-parameter-reference ext-io)) + (setf (iostruct-general-entities tokenbuf) + (iostruct-general-entities ext-io)) + (setf (iostruct-parameter-entities tokenbuf) + (iostruct-parameter-entities ext-io)) + (setf (iostruct-do-entity tokenbuf) + (iostruct-do-entity ext-io)) + (dolist (entity-buf2 (iostruct-entity-bufs ext-io)) + (when (streamp (tokenbuf-stream entity-buf2)) + (close (tokenbuf-stream entity-buf2)) + (put-back-tokenbuf entity-buf2))) + (close (tokenbuf-stream entity-buf)) + (put-back-tokenbuf entity-buf)) + ))) + else + (setf (iostruct-do-entity tokenbuf) nil))) + (setf attlist-data + (process-attlist (rest (rest val)) attlist-data)) + (when (not content-only) (push val guts)) + (setf state state-docstart-misc2) + elseif (and (listp val) (eq :comment (first val))) + then + (when (not content-only) (push val guts)) + elseif (eq kind :pi) + then + (push val guts) + elseif (or (symbolp val) + (and (listp val) (symbolp (first val)))) + then + (when (eq kind :start-tag) + (setf val (add-default-values val attlist-data))) + (if* (and (eq kind :start-tag) (eq kind2 :end-tag)) + then (push (list val) guts) + (setf state state-element-done) + elseif (eq kind :start-tag) + then (push (list val) pending) + ;;(format t "pending: ~s guts: ~s <3>~%" pending guts) + (when (iostruct-entity-bufs tokenbuf) + (push (if (symbolp val) val (first val)) entity-open-tags)) + (setf state state-element-contents) + else (xml-error (concatenate 'string + "encountered token at illegal syntax position: '" + (string kind) "'" + (concatenate 'string + " following: '" + (format nil "~s" (first guts)) + "'")))) + else + (print (list val kind kind2)) + (break "check for other docstart-misc states"))) + (#.state-element-contents + (if* (or (symbolp val) + (and (listp val) (symbolp (first val)))) + then + (when (eq kind :start-tag) + (setf val (add-default-values val attlist-data))) + (if* (eq kind :end-tag) + then (let ((candidate (first (first pending)))) + (when (listp candidate) (setf candidate (first candidate))) + (if* (eq candidate val) + then + (if* (iostruct-entity-bufs tokenbuf) then + (when (not (eq (first entity-open-tags) val)) + (xml-error + (concatenate 'string + (string val) + " element closed in entity that did not open it"))) + (setf entity-open-tags (rest entity-open-tags)) + else + (when (eq (first entity-open-tags) val) + (xml-error + (concatenate 'string + (string val) + " element closed outside of entity that did not open it"))) + ) + (if* (= (length pending) 1) + then + (push (first pending) guts) + (setf state state-element-done) + else + (setf (second pending) + (append (second pending) (list (first pending))))) + (setf pending (rest pending)) + ;;(format t "pending: ~s guts: ~s <4>~%" pending guts) + else (xml-error (format nil + "encountered end tag: ~s expected: ~s" + val candidate)))) + elseif (and (eq kind :start-tag) (eq kind2 :end-tag)) + then + (setf (first pending) + (append (first pending) (list (list val)))) + ;;(format t "pending: ~s guts: ~s <5>~%" pending guts) + elseif (eq kind :start-tag) + then + (push (list val) pending) + ;;(format t "pending: ~s guts: ~s <6>~%" pending guts) + (when (iostruct-entity-bufs tokenbuf) + (push (if (symbolp val) val (first val)) entity-open-tags)) + elseif (eq kind :cdata) then + (setf (first pending) + (append (first pending) (rest val))) + (let ((old (first pending)) + (new)) + (dolist (item old) + (if* (and (stringp (first new)) (stringp item)) then + (setf (first new) + (concatenate 'string (first new) item)) + else (push item new))) + (setf (first pending) (reverse new))) + elseif (eq kind :comment) then + (when (not content-only) (push val guts)) + elseif (eq kind :pi) + then + (setf (first pending) + (append (first pending) (list val))) + elseif (eq kind :eof) + then + (xml-error "unexpected end of file encountered") + else (xml-error (format nil "unexpected token: ~s" val))) + elseif (eq kind :pcdata) + then + (setf (first pending) + (append (first pending) (list val))) + (let ((old (first pending)) + (new)) + (dolist (item old) + (if* (and (stringp (first new)) (stringp item)) then + (setf (first new) + (concatenate 'string (first new) item)) + else (push item new))) + (setf (first pending) (reverse new))) + else (xml-error (format nil "unexpected token: ~s" val)))) + (#.state-element-done + (if* (eq kind :pcdata) + then + (when (or (not kind2) (not (all-xml-whitespace-p val))) + (if* (not kind2) then + (xml-error "An entity reference occured where only whitespace or the first element may occur") + else + (xml-error (concatenate 'string + "unrecognized content '" + (subseq val 0 (min (length val) 40)) "'")))) + elseif (eq kind :eof) then + (put-back-tokenbuf (iostruct-tokenbuf tokenbuf)) + (return (nreverse guts)) + elseif (eq kind :comment) then + (when (not content-only) (push val guts)) + elseif (eq kind :pi) + then (push val guts) + else + (xml-error (concatenate 'string + "encountered token at illegal syntax position: '" + (string kind) "'" + (concatenate 'string + " following: '" + (format nil "~s" (first guts)) + "'"))) + )) + (t + (error "need to support state:~s token:~s kind:~s kind2:~s " state val kind kind2))) + )))) (eval-when (compile load eval) (defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data @@ -537,1510 +537,1510 @@ (declare (optimize (speed 3) (safety 1))) ;; return two values: ;; the next token from the stream. - ;; the kind of token + ;; the kind of token ;; ;; if read-sequence-func is non-nil, ;; read-sequence-func is called to fetch the next character (macrolet ((add-to-entity-buf (entity-symbol p-value) - `(progn - (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value) - (iostruct-entity-bufs tokenbuf)))) + `(progn + (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value) + (iostruct-entity-bufs tokenbuf)))) - (un-next-char (ch) - `(push ,ch (iostruct-unget-char tokenbuf))) + (un-next-char (ch) + `(push ,ch (iostruct-unget-char tokenbuf))) - (clear-coll (coll) - `(setf (collector-next ,coll) 0)) + (clear-coll (coll) + `(setf (collector-next ,coll) 0)) - (add-to-coll (coll ch) - `(let ((.next. (collector-next ,coll))) - (if* (>= .next. (collector-max ,coll)) - then (grow-and-add ,coll ,ch) - else (setf (schar (collector-data ,coll) .next.) - ,ch) - (setf (collector-next ,coll) (1+ .next.))))) + (add-to-coll (coll ch) + `(let ((.next. (collector-next ,coll))) + (if* (>= .next. (collector-max ,coll)) + then (grow-and-add ,coll ,ch) + else (setf (schar (collector-data ,coll) .next.) + ,ch) + (setf (collector-next ,coll) (1+ .next.))))) - (to-preferred-case (ch) - ;; should check the case mode - `(char-downcase ,ch)) + (to-preferred-case (ch) + ;; should check the case mode + `(char-downcase ,ch)) - ) + ) (let ((state state-pcdata) - (coll (get-collector)) - (entity (get-collector)) - (tag-to-return) - (tag-to-return-string) - (attrib-name) - (empty-delim) - (value-delim) - (attrib-value) - (attribs-to-return) - (contents-to-return) - (char-code 0) - (special-tag-count 0) - (attrib-value-tokenbuf) - (last-ch) - (cdatap t) - (pcdatap t) - (entity-source) - (ns-token) - (ch)) + (coll (get-collector)) + (entity (get-collector)) + (tag-to-return) + (tag-to-return-string) + (attrib-name) + (empty-delim) + (value-delim) + (attrib-value) + (attribs-to-return) + (contents-to-return) + (char-code 0) + (special-tag-count 0) + (attrib-value-tokenbuf) + (last-ch) + (cdatap t) + (pcdatap t) + (entity-source) + (ns-token) + (ch)) (loop - (setq ch (get-next-char tokenbuf)) - (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%" - ch (char-code ch) state (iostruct-entity-names tokenbuf))) - (if* (null ch) - then (return) ; eof -- exit loop - ) - - - (case state - (#.state-pcdata - (if* (eq ch #\<) - then - (setf entity-source (first (iostruct-entity-bufs tokenbuf))) - (if* (> (collector-next coll) 0) - then ; have collected something, return this string - (un-next-char ch) ; push back the < - (return) - else ; collect a tag - (setq state state-readtagfirst)) - elseif (eq #\& ch) - then (setf state state-pcdata2) - (setf entity-source (first (iostruct-entity-bufs tokenbuf))) - (setf pcdatap nil) - elseif (eq #\] ch) then (setf state state-pcdata7) - elseif (not (xml-char-p ch)) then - (xml-error (concatenate 'string - "Illegal character: " - (string ch) - " detected in input")) - else - (add-to-coll coll ch) - #+ignore - (if* (not (eq ch #\return)) - then (add-to-coll coll ch)))) - - (#.state-pcdata7 - (if* (eq #\] ch) then (setf state state-pcdata8) - else (setf state state-pcdata) - (add-to-coll coll #\]) (un-next-char ch))) - - (#.state-pcdata8 - (if* (eq #\> ch) then - (add-to-coll coll #\]) - (add-to-coll coll #\]) - (add-to-coll coll #\>) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "content cannot contain ']]>':'" - (compute-coll-string coll) - "'")) - elseif (eq #\] ch) then - (add-to-coll coll #\]) - else (setf state state-pcdata) - (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch))) - - (#.state-pcdata2 - (if* (eq #\# ch) - then (setf state state-pcdata3) - elseif (xml-name-start-char-p ch) - then (setf state state-pcdata4) - (un-next-char ch) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal reference name, starting at: '&" - (compute-coll-string coll) - "'")) - )) - - (#.state-pcdata3 - (if* (eq #\x ch) - then (setf state state-pcdata5) - elseif (<= (char-code #\0) (char-code ch) (char-code #\9)) - then (setf state state-pcdata6) - (un-next-char ch) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal character reference code, starting at: '&#" - (compute-coll-string coll) - "'")) - )) - - (#.state-pcdata4 - (if* (xml-name-char-p ch) - then (add-to-coll entity ch) - elseif (eq #\; ch) - then (let ((entity-symbol (compute-tag entity))) - (clear-coll entity) - (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - (concatenate 'string - (string entity-symbol) - " reference cannot be constructed from entity reference/character data sequence")) - else - (setf entity-source nil)) - (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&) - elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<) - elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>) - elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\') - elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\") - else - (let (p-value) - (if* (and (iostruct-do-entity tokenbuf) - (setf p-value - (assoc entity-symbol - (iostruct-general-entities tokenbuf)))) then - (setf p-value (rest p-value)) - (when (member entity-symbol (iostruct-entity-names tokenbuf)) - (xml-error (concatenate 'string - "entity:" - (string entity-symbol) - " in recursive reference"))) - (push entity-symbol (iostruct-entity-names tokenbuf)) - (if* (stringp p-value) then - (add-to-entity-buf entity-symbol p-value) - elseif (null external-callback) then - (setf (iostruct-do-entity tokenbuf) nil) - elseif p-value then - (let ((entity-stream (apply external-callback p-value))) - (if* entity-stream then - (let ((entity-buf (get-tokenbuf))) - (setf (tokenbuf-stream entity-buf) entity-stream) - (unicode-check entity-stream tokenbuf) - (push entity-buf - (iostruct-entity-bufs tokenbuf)) - ;; check for possible external textdecl - (let ((count 0) cch - (string " ch) then - (let ((tag-string (compute-coll-string coll))) - (when (and (iostruct-ns-scope tokenbuf) - (string= tag-string - (first (first (iostruct-ns-scope tokenbuf))))) - (dolist (item (second (first (iostruct-ns-scope tokenbuf)))) - (setf (iostruct-ns-to-package tokenbuf) - (remove (assoc item (iostruct-ns-to-package tokenbuf)) - (iostruct-ns-to-package tokenbuf)))) - (setf (iostruct-ns-scope tokenbuf) - (rest (iostruct-ns-scope tokenbuf))))) - (setq tag-to-return (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (return) - elseif (xml-space-p ch) then (setf state state-readtag-end3) - (let ((tag-string (compute-coll-string coll))) - (when (and (iostruct-ns-scope tokenbuf) - (string= tag-string - (first (first (iostruct-ns-scope tokenbuf))))) - (setf (iostruct-ns-scope tokenbuf) - (rest (iostruct-ns-scope tokenbuf))))) - (setq tag-to-return (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - else (let ((tmp (compute-coll-string coll))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal end tag name, starting at: ' ch) then (return) - else (let ((tmp (compute-coll-string coll))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal end tag name, starting at: '" - (compute-coll-string coll) - "' end tag name: " tmp ))) - )) - - (#.state-readtagfirst - ; starting to read a tag name - (if* (eq #\/ ch) - then (setf state state-readtag-end) - elseif (eq #\? ch) - then (setf state state-readtag-?) - (setf empty-delim #\?) - elseif (eq #\! ch) - then (setf state state-readtag-!) - (setf empty-delim nil) - elseif (xml-name-start-char-p ch) - then (setf state state-readtag) - (setf empty-delim #\/) - (un-next-char ch) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal character following '<', starting at '" - (compute-coll-string coll) - "'")) - )) - - (#.state-readtag-! - (if* (xml-name-start-char-p ch) - then - (setf state state-readtag-!-name) - (un-next-char ch) - elseif (eq #\[ ch) - then - (setf state state-readtag-!-conditional) - elseif (eq #\- ch) - then - (setf state state-readtag-!-comment) - else - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal character following ' ch) - then - (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - "CDATA cannot be constructed from entity reference/character data sequence") - else - (setf entity-source nil)) - (return) - elseif (eq #\] ch) then - (add-to-coll coll #\]) ;; come back here to check again - else (setf state state-readtag-!-conditional5) - (add-to-coll coll #\]) - (add-to-coll coll #\]) - (add-to-coll coll ch))) - - (#.state-readtag-!-comment - (if* (eq #\- ch) - then (setf state state-readtag-!-readcomment) - (setf tag-to-return :comment) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal token following ' ch) - then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - (concatenate 'string - (string tag-to-return) - " tag cannot be constructed from entity reference/character data sequence")) - else - (setf entity-source nil)) - (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal token following '--' comment terminator, starting at '--" - (compute-coll-string coll) - "'")) - )) - - (#.state-readtag - (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test - then - (add-to-coll coll ch) - else - (if* (xml-space-p ch) then - (setf tag-to-return-string (compute-coll-string coll)) - (setq tag-to-return - (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (clear-coll coll) - (setf state state-readtag2) - elseif (eq #\> ch) then - (setq tag-to-return - (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (clear-coll coll) - (return) - elseif (eq #\/ ch) then - (setq tag-to-return - (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (clear-coll coll) - (setf state state-readtag3) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal token name, starting at '" - (compute-coll-string coll) - "'")) - ))) - - (#.state-readtag2 - (if* (xml-space-p ch) then nil - elseif (eq #\> ch) then (return) - elseif (eq #\/ ch) then (setf state state-readtag3) - elseif (xml-name-start-char-p ch) then - (un-next-char ch) - (setf state state-readtag4) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal token, starting at '" - (compute-coll-string coll) - "' following element token start: " (string tag-to-return))) - )) - - (#.state-readtag4 - (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test - then - (add-to-coll coll ch) - elseif (eq #\= ch) then - (setq attrib-name (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (clear-coll coll) - (let ((name (symbol-name attrib-name))) - (when (and (>= (length name) 5) - (string= name "xmlns" :end1 5)) - (if* (= (length name) 5) - then - (setf ns-token :none) - elseif (eq (schar name 5) #\:) - then - (setf ns-token (subseq name 6))))) - (setf state state-readtag5) - elseif (xml-space-p ch) then - (setq attrib-name (compute-tag coll *package* - (iostruct-ns-to-package tokenbuf))) - (clear-coll coll) - (let ((name (symbol-name attrib-name))) - (when (and (>= (length name) 5) - (string= name "xmlns" :end1 5)) - (if* (= (length name) 5) - then - (setf ns-token :none) - else - (setf ns-token (subseq name 6))))) - (setf state state-readtag12) - else (let ((tmp (compute-coll-string coll))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "looking for attribute '=', found: '" - (compute-coll-string coll) - "' following attribute name: " tmp))) - )) - - (#.state-readtag12 - (if* (xml-space-p ch) then nil - elseif (eq #\= ch) then (setf state state-readtag5) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "looking for attribute '=', found: '" - (compute-coll-string coll) - "' following attribute name: " (string attrib-name))))) - - (#.state-readtag5 - ;; begin to collect attribute value - (if* (or (eq ch #\") - (eq ch #\')) - then (setq value-delim ch) - (let* ((tag-defaults (assoc tag-to-return attlist-data)) - (this-attrib (assoc attrib-name tag-defaults))) - (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA))) - (setf cdatap nil)) - ) - (setq state state-readtag6) - elseif (xml-space-p ch) then nil - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value not delimited by ' or \" : '" - (compute-coll-string coll) - "' following attribute: " (string attrib-name))) - )) - - (#.state-readtag6 - (let ((from-entity (and attrib-value-tokenbuf - (eq attrib-value-tokenbuf - (first (iostruct-entity-bufs tokenbuf)))))) - (when (not from-entity) (setf attrib-value-tokenbuf nil)) - (if* from-entity then - (if* (eq #\newline ch) then (setf ch #\space) - elseif (eq #\return ch) then (setf ch #\space) - elseif (eq #\tab ch) then (setf ch #\space) - )) - (if* (and (not from-entity) (eq ch value-delim)) - then (setq attrib-value (compute-coll-string coll)) - (when (not cdatap) - (setf attrib-value (normalize-attrib-value attrib-value))) - (clear-coll coll) - (push attrib-name attribs-to-return) - (push attrib-value attribs-to-return) - (when ns-token - (let ((package (assoc (parse-uri attrib-value) - (iostruct-uri-to-package tokenbuf) - :test 'uri=))) - (if* package then (setf package (rest package)) - else - (setf package - (let ((i 0) new-package) - (loop - (let* ((candidate (concatenate 'string - "net.xml.namespace." - (format nil "~s" i))) - (exists (find-package candidate))) - (if* exists - then (incf i) - else (setf new-package (make-package candidate)) - (setf (iostruct-uri-to-package tokenbuf) - (acons (parse-uri attrib-value) new-package - (iostruct-uri-to-package tokenbuf))) - (return new-package))))))) - (setf (iostruct-ns-to-package tokenbuf) - (acons ns-token package (iostruct-ns-to-package tokenbuf))) - ) - (if* (and (first (iostruct-ns-scope tokenbuf)) - (string= (first (first (iostruct-ns-scope tokenbuf))) - tag-to-return-string)) - then - (push ns-token (second (first (iostruct-ns-scope tokenbuf)))) - else - (push (list tag-to-return-string (list ns-token)) - (iostruct-ns-scope tokenbuf))) - (setf ns-token nil)) - (setq state state-readtag6a) - elseif (eq #\newline ch) then - (when (not (eq #\return last-ch)) (add-to-coll coll #\space)) - elseif (or (eq #\tab ch) (eq #\return ch)) then - (add-to-coll coll #\space) - elseif (eq #\& ch) - then (setq state state-readtag7) - (setf entity-source (first (iostruct-entity-bufs tokenbuf))) - elseif (and (xml-char-p ch) (not (eq #\< ch))) - then (add-to-coll coll ch) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value cannot contain '<': '" - (compute-coll-string coll) - "' following attribute: " (string attrib-name))) - ) - (setf last-ch ch))) - - (#.state-readtag6a - (if* (xml-space-p ch) then (setf state state-readtag2) - elseif (eq #\> ch) then (setf state state-readtag2) - (return) - elseif (eq #\/ ch) then (setf state state-readtag3) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal token, starting at '" - (compute-coll-string coll) - "' following element token start: " (string tag-to-return))) - )) - - (#.state-readtag7 - (if* (eq #\# ch) - then (setf state state-readtag8) - elseif (xml-name-start-char-p ch) - then (setf state state-readtag9) - (un-next-char ch) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value contains illegal reference name: '&" - (compute-coll-string coll) - "' in attribute value for: " (string attrib-name))) - )) - - (#.state-readtag8 - (if* (eq #\x ch) - then (setf state state-readtag10) - elseif (<= (char-code #\0) (char-code ch) (char-code #\9)) - then (setf state state-readtag11) - (un-next-char ch) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value contains illegal character reference code: '" - (compute-coll-string coll) - "' in attribute value for: " (string attrib-name))) - )) - - (#.state-readtag10 - (let ((code (char-code ch))) - (if* (eq #\; ch) - then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - (concatenate 'string - (string (code-char char-code)) - " reference cannot be constructed from entity reference/character data sequence")) - else - (setf entity-source nil)) - (add-to-coll coll (code-char char-code)) - (setf char-code 0) - (setq state state-readtag6) - elseif (<= (char-code #\0) code (char-code #\9)) - then (setf char-code (+ (* char-code 16) (- code (char-code #\0)))) - elseif (<= (char-code #\A) code (char-code #\F)) - then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A)))) - elseif (<= (char-code #\a) code (char-code #\f)) - then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a)))) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value contains illegal hexidecimal character reference code: '" - (compute-coll-string coll) - "' in attribute value for: " (string attrib-name))) - ))) - - (#.state-readtag11 - (let ((code (char-code ch))) - (if* (eq #\; ch) - then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - (concatenate 'string - (string (code-char char-code)) - " reference cannot be constructed from entity reference/character data sequence")) - else - (setf entity-source nil)) - (add-to-coll coll (code-char char-code)) - (setf char-code 0) - (setq state state-readtag6) - elseif (<= (char-code #\0) code (char-code #\9)) - then (setf char-code (+ (* char-code 10) (- code (char-code #\0)))) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "attribute value contains illegal decimal character reference code: '" - (compute-coll-string coll) - "' in attribute value for: " (string attrib-name))) - ))) - - (#.state-readtag9 - (if* (xml-name-char-p ch) - then (add-to-coll entity ch) - elseif (eq #\; ch) - then (let ((entity-symbol (compute-tag entity))) - (clear-coll entity) - (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then - (xml-error - (concatenate 'string - (string entity-symbol) - " reference cannot be constructed from entity reference/character data sequence")) - else - (setf entity-source nil)) - (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&) - elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<) - elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>) - elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\') - elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\") - else (let (p-value) - (if* (and (iostruct-do-entity tokenbuf) - (setf p-value - (assoc entity-symbol - (iostruct-general-entities tokenbuf)))) then - (setf p-value (rest p-value)) - (when (member entity-symbol (iostruct-entity-names tokenbuf)) - (xml-error (concatenate 'string - "entity:" - (string entity-symbol) - " in recursive reference"))) - (push entity-symbol (iostruct-entity-names tokenbuf)) - (if* (stringp p-value) then - (add-to-entity-buf entity-symbol p-value) - (when (not attrib-value-tokenbuf) - (setf attrib-value-tokenbuf - (first (iostruct-entity-bufs tokenbuf)))) - elseif (null external-callback) then - (setf (iostruct-do-entity tokenbuf) nil) - elseif p-value then - (let ((entity-stream (apply external-callback p-value))) - (if* entity-stream then - (let ((entity-buf (get-tokenbuf))) - (setf (tokenbuf-stream entity-buf) entity-stream) - (unicode-check entity-stream tokenbuf) - (push entity-buf - (iostruct-entity-bufs tokenbuf)) - ;; check for possible external textdecl - (let ((count 0) cch - (string " ch) then (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "expected '>' found '" - (compute-coll-string coll) - "' in element: " (string tag-to-return))) - )) - - (#.state-readtag-!-name - (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test - then - (add-to-coll coll ch) - else - (when (not (xml-space-p ch)) - (xml-error (concatenate 'string - "expecting whitespace following: ' ch) - then (return) - else (un-next-char ch) - (setf state state-!-contents))) - - (#.state-begin-dtd - (un-next-char ch) - (let ((val (parse-dtd tokenbuf nil external-callback))) - (setf (iostruct-seen-any-dtd tokenbuf) t) - (push (append (list :[) val) - contents-to-return)) - (setf state state-!-doctype-ext3)) - - (#.state-!-contents - (if* (xml-name-char-p ch) - then (add-to-coll coll ch) - elseif (eq #\> ch) - then (push (compute-coll-string coll) contents-to-return) - (clear-coll coll) - (return) - elseif (eq #\[ ch) - then (push (compute-tag coll) contents-to-return) - (clear-coll coll) - (setf state state-begin-dtd) - elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE)) - ;; look at tag-to-return and set state accordingly - then (push (compute-tag coll) contents-to-return) - (clear-coll coll) - (setf state state-!-doctype) - else (xml-error - (concatenate 'string - "illegal name: '" - (string tag-to-return) - "' in ch) then (return) - elseif (eq #\[ ch) - then (setf state state-begin-dtd) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal char in DOCTYPE token: '" - (compute-coll-string coll) "'")) - )) - - (#.state-!-doctype-ext3 - (if* (xml-space-p ch) then nil - elseif (eq #\> ch) then (return) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal char in DOCTYPE token following dtd: '" - (compute-coll-string coll) "'")) - )) - - (#.state-!-doctype - ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [ - (if* (xml-space-p ch) then nil - elseif (xml-name-start-char-p ch) - then - (setf state state-!-doctype-ext) - (un-next-char ch) - elseif (eq #\> ch) then (return) - elseif (eq #\[ ch) - then (setf state state-begin-dtd) - else (xml-error - (concatenate 'string - "illegal character: '" - (string ch) - "' in ch) - then (return) - elseif (eq #\? ch) then - (add-to-coll coll #\?) ;; come back here to try again - else (setf state state-readpi) - (add-to-coll coll #\?) - (add-to-coll coll ch))) - - (#.state-findattributename0 - (if* (xml-space-p ch) then (setf state state-findattributename) - elseif (eq ch empty-delim) - then (setf state state-noattributename) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "expected space or tag end before: '" - (compute-coll-string coll) "'")))) - (#.state-findattributename - ;; search until we find the start of an attribute name - ;; or the end of the tag - (if* (eq ch empty-delim) - then (setf state state-noattributename) - elseif (xml-space-p ch) - then nil ;; skip whitespace - elseif (xml-name-start-char-p ch) - then - (un-next-char ch) - (setf state state-attribname) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "illegal char in ch) - then - (return) ;; ready to build return token - else - (xml-error - (concatenate 'string - "expected '>' found: '" (string ch) "' in (collector-next coll) 0) + then ; have collected something, return this string + (un-next-char ch) ; push back the < + (return) + else ; collect a tag + (setq state state-readtagfirst)) + elseif (eq #\& ch) + then (setf state state-pcdata2) + (setf entity-source (first (iostruct-entity-bufs tokenbuf))) + (setf pcdatap nil) + elseif (eq #\] ch) then (setf state state-pcdata7) + elseif (not (xml-char-p ch)) then + (xml-error (concatenate 'string + "Illegal character: " + (string ch) + " detected in input")) + else + (add-to-coll coll ch) + #+ignore + (if* (not (eq ch #\return)) + then (add-to-coll coll ch)))) + + (#.state-pcdata7 + (if* (eq #\] ch) then (setf state state-pcdata8) + else (setf state state-pcdata) + (add-to-coll coll #\]) (un-next-char ch))) + + (#.state-pcdata8 + (if* (eq #\> ch) then + (add-to-coll coll #\]) + (add-to-coll coll #\]) + (add-to-coll coll #\>) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "content cannot contain ']]>':'" + (compute-coll-string coll) + "'")) + elseif (eq #\] ch) then + (add-to-coll coll #\]) + else (setf state state-pcdata) + (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch))) + + (#.state-pcdata2 + (if* (eq #\# ch) + then (setf state state-pcdata3) + elseif (xml-name-start-char-p ch) + then (setf state state-pcdata4) + (un-next-char ch) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal reference name, starting at: '&" + (compute-coll-string coll) + "'")) + )) + + (#.state-pcdata3 + (if* (eq #\x ch) + then (setf state state-pcdata5) + elseif (<= (char-code #\0) (char-code ch) (char-code #\9)) + then (setf state state-pcdata6) + (un-next-char ch) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal character reference code, starting at: '&#" + (compute-coll-string coll) + "'")) + )) + + (#.state-pcdata4 + (if* (xml-name-char-p ch) + then (add-to-coll entity ch) + elseif (eq #\; ch) + then (let ((entity-symbol (compute-tag entity))) + (clear-coll entity) + (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + (concatenate 'string + (string entity-symbol) + " reference cannot be constructed from entity reference/character data sequence")) + else + (setf entity-source nil)) + (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&) + elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<) + elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>) + elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\') + elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\") + else + (let (p-value) + (if* (and (iostruct-do-entity tokenbuf) + (setf p-value + (assoc entity-symbol + (iostruct-general-entities tokenbuf)))) then + (setf p-value (rest p-value)) + (when (member entity-symbol (iostruct-entity-names tokenbuf)) + (xml-error (concatenate 'string + "entity:" + (string entity-symbol) + " in recursive reference"))) + (push entity-symbol (iostruct-entity-names tokenbuf)) + (if* (stringp p-value) then + (add-to-entity-buf entity-symbol p-value) + elseif (null external-callback) then + (setf (iostruct-do-entity tokenbuf) nil) + elseif p-value then + (let ((entity-stream (apply external-callback p-value))) + (if* entity-stream then + (let ((entity-buf (get-tokenbuf))) + (setf (tokenbuf-stream entity-buf) entity-stream) + (unicode-check entity-stream tokenbuf) + (push entity-buf + (iostruct-entity-bufs tokenbuf)) + ;; check for possible external textdecl + (let ((count 0) cch + (string " ch) then + (let ((tag-string (compute-coll-string coll))) + (when (and (iostruct-ns-scope tokenbuf) + (string= tag-string + (first (first (iostruct-ns-scope tokenbuf))))) + (dolist (item (second (first (iostruct-ns-scope tokenbuf)))) + (setf (iostruct-ns-to-package tokenbuf) + (remove (assoc item (iostruct-ns-to-package tokenbuf)) + (iostruct-ns-to-package tokenbuf)))) + (setf (iostruct-ns-scope tokenbuf) + (rest (iostruct-ns-scope tokenbuf))))) + (setq tag-to-return (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (return) + elseif (xml-space-p ch) then (setf state state-readtag-end3) + (let ((tag-string (compute-coll-string coll))) + (when (and (iostruct-ns-scope tokenbuf) + (string= tag-string + (first (first (iostruct-ns-scope tokenbuf))))) + (setf (iostruct-ns-scope tokenbuf) + (rest (iostruct-ns-scope tokenbuf))))) + (setq tag-to-return (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + else (let ((tmp (compute-coll-string coll))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal end tag name, starting at: ' ch) then (return) + else (let ((tmp (compute-coll-string coll))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal end tag name, starting at: '" + (compute-coll-string coll) + "' end tag name: " tmp ))) + )) + + (#.state-readtagfirst + ; starting to read a tag name + (if* (eq #\/ ch) + then (setf state state-readtag-end) + elseif (eq #\? ch) + then (setf state state-readtag-?) + (setf empty-delim #\?) + elseif (eq #\! ch) + then (setf state state-readtag-!) + (setf empty-delim nil) + elseif (xml-name-start-char-p ch) + then (setf state state-readtag) + (setf empty-delim #\/) + (un-next-char ch) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal character following '<', starting at '" + (compute-coll-string coll) + "'")) + )) + + (#.state-readtag-! + (if* (xml-name-start-char-p ch) + then + (setf state state-readtag-!-name) + (un-next-char ch) + elseif (eq #\[ ch) + then + (setf state state-readtag-!-conditional) + elseif (eq #\- ch) + then + (setf state state-readtag-!-comment) + else + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal character following ' ch) + then + (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + "CDATA cannot be constructed from entity reference/character data sequence") + else + (setf entity-source nil)) + (return) + elseif (eq #\] ch) then + (add-to-coll coll #\]) ;; come back here to check again + else (setf state state-readtag-!-conditional5) + (add-to-coll coll #\]) + (add-to-coll coll #\]) + (add-to-coll coll ch))) + + (#.state-readtag-!-comment + (if* (eq #\- ch) + then (setf state state-readtag-!-readcomment) + (setf tag-to-return :comment) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal token following ' ch) + then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + (concatenate 'string + (string tag-to-return) + " tag cannot be constructed from entity reference/character data sequence")) + else + (setf entity-source nil)) + (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal token following '--' comment terminator, starting at '--" + (compute-coll-string coll) + "'")) + )) + + (#.state-readtag + (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test + then + (add-to-coll coll ch) + else + (if* (xml-space-p ch) then + (setf tag-to-return-string (compute-coll-string coll)) + (setq tag-to-return + (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (clear-coll coll) + (setf state state-readtag2) + elseif (eq #\> ch) then + (setq tag-to-return + (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (clear-coll coll) + (return) + elseif (eq #\/ ch) then + (setq tag-to-return + (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (clear-coll coll) + (setf state state-readtag3) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal token name, starting at '" + (compute-coll-string coll) + "'")) + ))) + + (#.state-readtag2 + (if* (xml-space-p ch) then nil + elseif (eq #\> ch) then (return) + elseif (eq #\/ ch) then (setf state state-readtag3) + elseif (xml-name-start-char-p ch) then + (un-next-char ch) + (setf state state-readtag4) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal token, starting at '" + (compute-coll-string coll) + "' following element token start: " (string tag-to-return))) + )) + + (#.state-readtag4 + (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test + then + (add-to-coll coll ch) + elseif (eq #\= ch) then + (setq attrib-name (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (clear-coll coll) + (let ((name (symbol-name attrib-name))) + (when (and (>= (length name) 5) + (string= name "xmlns" :end1 5)) + (if* (= (length name) 5) + then + (setf ns-token :none) + elseif (eq (schar name 5) #\:) + then + (setf ns-token (subseq name 6))))) + (setf state state-readtag5) + elseif (xml-space-p ch) then + (setq attrib-name (compute-tag coll *package* + (iostruct-ns-to-package tokenbuf))) + (clear-coll coll) + (let ((name (symbol-name attrib-name))) + (when (and (>= (length name) 5) + (string= name "xmlns" :end1 5)) + (if* (= (length name) 5) + then + (setf ns-token :none) + else + (setf ns-token (subseq name 6))))) + (setf state state-readtag12) + else (let ((tmp (compute-coll-string coll))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "looking for attribute '=', found: '" + (compute-coll-string coll) + "' following attribute name: " tmp))) + )) + + (#.state-readtag12 + (if* (xml-space-p ch) then nil + elseif (eq #\= ch) then (setf state state-readtag5) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "looking for attribute '=', found: '" + (compute-coll-string coll) + "' following attribute name: " (string attrib-name))))) + + (#.state-readtag5 + ;; begin to collect attribute value + (if* (or (eq ch #\") + (eq ch #\')) + then (setq value-delim ch) + (let* ((tag-defaults (assoc tag-to-return attlist-data)) + (this-attrib (assoc attrib-name tag-defaults))) + (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA))) + (setf cdatap nil)) + ) + (setq state state-readtag6) + elseif (xml-space-p ch) then nil + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value not delimited by ' or \" : '" + (compute-coll-string coll) + "' following attribute: " (string attrib-name))) + )) + + (#.state-readtag6 + (let ((from-entity (and attrib-value-tokenbuf + (eq attrib-value-tokenbuf + (first (iostruct-entity-bufs tokenbuf)))))) + (when (not from-entity) (setf attrib-value-tokenbuf nil)) + (if* from-entity then + (if* (eq #\newline ch) then (setf ch #\space) + elseif (eq #\return ch) then (setf ch #\space) + elseif (eq #\tab ch) then (setf ch #\space) + )) + (if* (and (not from-entity) (eq ch value-delim)) + then (setq attrib-value (compute-coll-string coll)) + (when (not cdatap) + (setf attrib-value (normalize-attrib-value attrib-value))) + (clear-coll coll) + (push attrib-name attribs-to-return) + (push attrib-value attribs-to-return) + (when ns-token + (let ((package (assoc (parse-uri attrib-value) + (iostruct-uri-to-package tokenbuf) + :test 'uri=))) + (if* package then (setf package (rest package)) + else + (setf package + (let ((i 0) new-package) + (loop + (let* ((candidate (concatenate 'string + "net.xml.namespace." + (format nil "~s" i))) + (exists (find-package candidate))) + (if* exists + then (incf i) + else (setf new-package (make-package candidate)) + (setf (iostruct-uri-to-package tokenbuf) + (acons (parse-uri attrib-value) new-package + (iostruct-uri-to-package tokenbuf))) + (return new-package))))))) + (setf (iostruct-ns-to-package tokenbuf) + (acons ns-token package (iostruct-ns-to-package tokenbuf))) + ) + (if* (and (first (iostruct-ns-scope tokenbuf)) + (string= (first (first (iostruct-ns-scope tokenbuf))) + tag-to-return-string)) + then + (push ns-token (second (first (iostruct-ns-scope tokenbuf)))) + else + (push (list tag-to-return-string (list ns-token)) + (iostruct-ns-scope tokenbuf))) + (setf ns-token nil)) + (setq state state-readtag6a) + elseif (eq #\newline ch) then + (when (not (eq #\return last-ch)) (add-to-coll coll #\space)) + elseif (or (eq #\tab ch) (eq #\return ch)) then + (add-to-coll coll #\space) + elseif (eq #\& ch) + then (setq state state-readtag7) + (setf entity-source (first (iostruct-entity-bufs tokenbuf))) + elseif (and (xml-char-p ch) (not (eq #\< ch))) + then (add-to-coll coll ch) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value cannot contain '<': '" + (compute-coll-string coll) + "' following attribute: " (string attrib-name))) + ) + (setf last-ch ch))) + + (#.state-readtag6a + (if* (xml-space-p ch) then (setf state state-readtag2) + elseif (eq #\> ch) then (setf state state-readtag2) + (return) + elseif (eq #\/ ch) then (setf state state-readtag3) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal token, starting at '" + (compute-coll-string coll) + "' following element token start: " (string tag-to-return))) + )) + + (#.state-readtag7 + (if* (eq #\# ch) + then (setf state state-readtag8) + elseif (xml-name-start-char-p ch) + then (setf state state-readtag9) + (un-next-char ch) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value contains illegal reference name: '&" + (compute-coll-string coll) + "' in attribute value for: " (string attrib-name))) + )) + + (#.state-readtag8 + (if* (eq #\x ch) + then (setf state state-readtag10) + elseif (<= (char-code #\0) (char-code ch) (char-code #\9)) + then (setf state state-readtag11) + (un-next-char ch) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value contains illegal character reference code: '" + (compute-coll-string coll) + "' in attribute value for: " (string attrib-name))) + )) + + (#.state-readtag10 + (let ((code (char-code ch))) + (if* (eq #\; ch) + then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + (concatenate 'string + (string (code-char char-code)) + " reference cannot be constructed from entity reference/character data sequence")) + else + (setf entity-source nil)) + (add-to-coll coll (code-char char-code)) + (setf char-code 0) + (setq state state-readtag6) + elseif (<= (char-code #\0) code (char-code #\9)) + then (setf char-code (+ (* char-code 16) (- code (char-code #\0)))) + elseif (<= (char-code #\A) code (char-code #\F)) + then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A)))) + elseif (<= (char-code #\a) code (char-code #\f)) + then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a)))) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value contains illegal hexidecimal character reference code: '" + (compute-coll-string coll) + "' in attribute value for: " (string attrib-name))) + ))) + + (#.state-readtag11 + (let ((code (char-code ch))) + (if* (eq #\; ch) + then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + (concatenate 'string + (string (code-char char-code)) + " reference cannot be constructed from entity reference/character data sequence")) + else + (setf entity-source nil)) + (add-to-coll coll (code-char char-code)) + (setf char-code 0) + (setq state state-readtag6) + elseif (<= (char-code #\0) code (char-code #\9)) + then (setf char-code (+ (* char-code 10) (- code (char-code #\0)))) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "attribute value contains illegal decimal character reference code: '" + (compute-coll-string coll) + "' in attribute value for: " (string attrib-name))) + ))) + + (#.state-readtag9 + (if* (xml-name-char-p ch) + then (add-to-coll entity ch) + elseif (eq #\; ch) + then (let ((entity-symbol (compute-tag entity))) + (clear-coll entity) + (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then + (xml-error + (concatenate 'string + (string entity-symbol) + " reference cannot be constructed from entity reference/character data sequence")) + else + (setf entity-source nil)) + (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&) + elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<) + elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>) + elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\') + elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\") + else (let (p-value) + (if* (and (iostruct-do-entity tokenbuf) + (setf p-value + (assoc entity-symbol + (iostruct-general-entities tokenbuf)))) then + (setf p-value (rest p-value)) + (when (member entity-symbol (iostruct-entity-names tokenbuf)) + (xml-error (concatenate 'string + "entity:" + (string entity-symbol) + " in recursive reference"))) + (push entity-symbol (iostruct-entity-names tokenbuf)) + (if* (stringp p-value) then + (add-to-entity-buf entity-symbol p-value) + (when (not attrib-value-tokenbuf) + (setf attrib-value-tokenbuf + (first (iostruct-entity-bufs tokenbuf)))) + elseif (null external-callback) then + (setf (iostruct-do-entity tokenbuf) nil) + elseif p-value then + (let ((entity-stream (apply external-callback p-value))) + (if* entity-stream then + (let ((entity-buf (get-tokenbuf))) + (setf (tokenbuf-stream entity-buf) entity-stream) + (unicode-check entity-stream tokenbuf) + (push entity-buf + (iostruct-entity-bufs tokenbuf)) + ;; check for possible external textdecl + (let ((count 0) cch + (string " ch) then (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "expected '>' found '" + (compute-coll-string coll) + "' in element: " (string tag-to-return))) + )) + + (#.state-readtag-!-name + (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test + then + (add-to-coll coll ch) + else + (when (not (xml-space-p ch)) + (xml-error (concatenate 'string + "expecting whitespace following: ' ch) + then (return) + else (un-next-char ch) + (setf state state-!-contents))) + + (#.state-begin-dtd + (un-next-char ch) + (let ((val (parse-dtd tokenbuf nil external-callback))) + (setf (iostruct-seen-any-dtd tokenbuf) t) + (push (append (list :[) val) + contents-to-return)) + (setf state state-!-doctype-ext3)) + + (#.state-!-contents + (if* (xml-name-char-p ch) + then (add-to-coll coll ch) + elseif (eq #\> ch) + then (push (compute-coll-string coll) contents-to-return) + (clear-coll coll) + (return) + elseif (eq #\[ ch) + then (push (compute-tag coll) contents-to-return) + (clear-coll coll) + (setf state state-begin-dtd) + elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE)) + ;; look at tag-to-return and set state accordingly + then (push (compute-tag coll) contents-to-return) + (clear-coll coll) + (setf state state-!-doctype) + else (xml-error + (concatenate 'string + "illegal name: '" + (string tag-to-return) + "' in ch) then (return) + elseif (eq #\[ ch) + then (setf state state-begin-dtd) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal char in DOCTYPE token: '" + (compute-coll-string coll) "'")) + )) + + (#.state-!-doctype-ext3 + (if* (xml-space-p ch) then nil + elseif (eq #\> ch) then (return) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal char in DOCTYPE token following dtd: '" + (compute-coll-string coll) "'")) + )) + + (#.state-!-doctype + ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [ + (if* (xml-space-p ch) then nil + elseif (xml-name-start-char-p ch) + then + (setf state state-!-doctype-ext) + (un-next-char ch) + elseif (eq #\> ch) then (return) + elseif (eq #\[ ch) + then (setf state state-begin-dtd) + else (xml-error + (concatenate 'string + "illegal character: '" + (string ch) + "' in ch) + then (return) + elseif (eq #\? ch) then + (add-to-coll coll #\?) ;; come back here to try again + else (setf state state-readpi) + (add-to-coll coll #\?) + (add-to-coll coll ch))) + + (#.state-findattributename0 + (if* (xml-space-p ch) then (setf state state-findattributename) + elseif (eq ch empty-delim) + then (setf state state-noattributename) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "expected space or tag end before: '" + (compute-coll-string coll) "'")))) + (#.state-findattributename + ;; search until we find the start of an attribute name + ;; or the end of the tag + (if* (eq ch empty-delim) + then (setf state state-noattributename) + elseif (xml-space-p ch) + then nil ;; skip whitespace + elseif (xml-name-start-char-p ch) + then + (un-next-char ch) + (setf state state-attribname) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "illegal char in ch) + then + (return) ;; ready to build return token + else + (xml-error + (concatenate 'string + "expected '>' found: '" (string ch) "' in :~s ~s ~s ~s" state - tag-to-return - contents-to-return - ret)))) + (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element + (put-back-collector coll) + (if* attribs-to-return + then (values (cons tag-to-return + (nreverse attribs-to-return)) + (if (eq tag-to-return :xml) :xml :start-tag) :end-tag) + else + (values tag-to-return :start-tag :end-tag) + )) + (#.state-readtag-end-bracket + ;; this is a :commant tag + (let ((ret (compute-coll-string coll))) + (put-back-collector coll) + (values (cons tag-to-return (list ret)) :comment :nil))) + (#.state-pcdata + (let ((next-char (collector-next coll))) + (put-back-collector coll) + (if* (zerop next-char) + then (values nil :eof nil) + else (values (compute-coll-string coll) :pcdata pcdatap)))) + (#.state-readpi2 + (let ((ret (compute-coll-string coll))) + (put-back-collector coll) + (values (append (list :pi tag-to-return) (list ret)) :pi nil))) + ((#.state-readtag-!-conditional) + (put-back-collector coll) + (values (append (list tag-to-return) contents-to-return) :start-tag + :end-tag)) + ((#.state-!-contents + #.state-!-doctype + #.state-!-doctype-ext2 + #.state-!-doctype-ext3) + (put-back-collector coll) + (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag + :end-tag)) + (#.state-readtag3 + (put-back-collector coll) + (values (if* attribs-to-return + then (cons tag-to-return + (nreverse attribs-to-return)) + else tag-to-return) :start-tag :end-tag)) + ((#.state-readtag2 + #.state-readtag) + (put-back-collector coll) + (values (if* attribs-to-return + then (cons tag-to-return + (nreverse attribs-to-return)) + else tag-to-return) :start-tag nil)) + ((#.state-readtag-end2 + #.state-readtag-end3) + (put-back-collector coll) + (values tag-to-return :end-tag nil)) + (#.state-readtag-!-conditional7 + (let ((ret (compute-coll-string coll))) + (put-back-collector coll) + (values (append (list :cdata) (list ret)) :cdata nil))) + (t + ;; if ch is null that means we encountered unexpected EOF + (when (null ch) + (put-back-collector coll) + (xml-error "unexpected end of input")) + (print (list tag-to-return attribs-to-return)) + (let ((ret (compute-coll-string coll))) + (put-back-collector coll) + (error "need to support state :~s ~s ~s ~s" state + tag-to-return + contents-to-return + ret)))) ))) (defun swallow-xml-token (tokenbuf external-callback) (declare (ignorable old-coll) (optimize (speed 3) (safety 1))) (let ((xml (next-token tokenbuf external-callback nil))) (if* (and (eq (fourth xml) :standalone) (stringp (fifth xml)) - (equal (fifth xml) "yes")) then - (xml-error "external XML entity cannot be standalone document") + (equal (fifth xml) "yes")) then + (xml-error "external XML entity cannot be standalone document") elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml)) - (equal (seventh xml) "yes")) then - (xml-error "external XML entity cannot be standalone document")))) + (equal (seventh xml) "yes")) then + (xml-error "external XML entity cannot be standalone document")))) ;; return the string with entity references replaced by text ;; normalizing will happen later @@ -2051,19 +2051,19 @@ (if* (stringp (first value-list)) then (setf value-string (first value-list)) elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list))) (let ((tmp-result (parse-xml - (concatenate 'string - "") - :external-callback external-callback - :general-entities - (iostruct-general-entities tokenbuf)))) + (concatenate 'string + "") + :external-callback external-callback + :general-entities + (iostruct-general-entities tokenbuf)))) (if* (stringp (first value-list)) then - (setf (first value-list) - (third (first (first tmp-result)))) - elseif (eq (first value-list) :FIXED) then - (setf (second value-list) - (third (first (first tmp-result))))))) + (setf (first value-list) + (third (first (first tmp-result)))) + elseif (eq (first value-list) :FIXED) then + (setf (second value-list) + (third (first (first tmp-result))))))) value-list) (defun process-attlist (args attlist-data) @@ -2073,19 +2073,19 @@ (dolist (item (rest arg1)) ;;(format t "item: ~s~%" item) (when (eq :ATTLIST (first item)) - (let* ((name (second item)) - (name-data (assoc name attlist-data)) - (new-name-data (rest name-data))) - ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data) - (dolist (attrib-data (rest (rest item))) - ;;(format t "attrib-data: ~s~%" attrib-data) - #+ignore - (setf (rest (rest attrib-data)) - (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback)) - (when (not (assoc (first attrib-data) new-name-data)) - (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data)))) - (if* name-data then - (rplacd (assoc name attlist-data) (nreverse new-name-data)) - else (setf attlist-data (acons name (nreverse new-name-data) attlist-data)))))))) + (let* ((name (second item)) + (name-data (assoc name attlist-data)) + (new-name-data (rest name-data))) + ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data) + (dolist (attrib-data (rest (rest item))) + ;;(format t "attrib-data: ~s~%" attrib-data) + #+ignore + (setf (rest (rest attrib-data)) + (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback)) + (when (not (assoc (first attrib-data) new-name-data)) + (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data)))) + (if* name-data then + (rplacd (assoc name attlist-data) (nreverse new-name-data)) + else (setf attlist-data (acons name (nreverse new-name-data) attlist-data)))))))) (provide :pxml) diff --git a/pxml3.cl b/pxml3.cl index cad5557..bb83548 100644 --- a/pxml3.cl +++ b/pxml3.cl @@ -27,29 +27,29 @@ (defvar *debug-dtd* nil) (defun parse-dtd (tokenbuf - external external-callback) + external external-callback) (declare (optimize (speed 3) (safety 1))) (let ((guts) - (include-count 0)) + (include-count 0)) (loop (multiple-value-bind (val kind) - (next-dtd-token tokenbuf - external include-count external-callback) - (if* (eq kind :end-dtd) then - (return (nreverse guts)) - elseif (eq kind :include) then - (incf include-count) - elseif (eq kind :ignore) then nil - elseif (eq kind :include-end) then - (if* (> include-count 0) then (decf include-count) - else (xml-error "unexpected ']]>' token")) - else (when (iostruct-do-entity tokenbuf) (push val guts))))))) + (next-dtd-token tokenbuf + external include-count external-callback) + (if* (eq kind :end-dtd) then + (return (nreverse guts)) + elseif (eq kind :include) then + (incf include-count) + elseif (eq kind :ignore) then nil + elseif (eq kind :include-end) then + (if* (> include-count 0) then (decf include-count) + else (xml-error "unexpected ']]>' token")) + else (when (iostruct-do-entity tokenbuf) (push val guts))))))) (defparameter dtd-parser-states ()) (macrolet ((def-dtd-parser-state (var val) - `(progn (eval-when (compile load eval) (defconstant ,var ,val)) - (pushnew '(,val . ,var) dtd-parser-states :key #'car)))) + `(progn (eval-when (compile load eval) (defconstant ,var ,val)) + (pushnew '(,val . ,var) dtd-parser-states :key #'car)))) (def-dtd-parser-state state-dtdstart 0) (def-dtd-parser-state state-tokenstart 1) (def-dtd-parser-state state-dtd-? 2) @@ -146,2369 +146,2369 @@ ) (defun next-dtd-token (tokenbuf - external include-count external-callback) + external include-count external-callback) (declare #+allegro (:fbound parse-default-value) - #+lispworks (optimize (safety 0) (debug 3)) - #-lispworks (optimize (speed 3) (safety 1))) + #+lispworks (optimize (safety 0) (debug 3)) + #-lispworks (optimize (speed 3) (safety 1))) (macrolet ((add-to-entity-buf (entity-symbol p-value) - `(progn - (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value) - (iostruct-entity-bufs tokenbuf)))) + `(progn + (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value) + (iostruct-entity-bufs tokenbuf)))) - (un-next-char (ch) - `(push ,ch (iostruct-unget-char tokenbuf))) + (un-next-char (ch) + `(push ,ch (iostruct-unget-char tokenbuf))) - (clear-coll (coll) - `(setf (collector-next ,coll) 0)) + (clear-coll (coll) + `(setf (collector-next ,coll) 0)) - (add-to-coll (coll ch) - `(let ((.next. (collector-next ,coll))) - (if* (>= .next. (collector-max ,coll)) - then (grow-and-add ,coll ,ch) - else (setf (schar (collector-data ,coll) .next.) - ,ch) - (setf (collector-next ,coll) (1+ .next.))))) + (add-to-coll (coll ch) + `(let ((.next. (collector-next ,coll))) + (if* (>= .next. (collector-max ,coll)) + then (grow-and-add ,coll ,ch) + else (setf (schar (collector-data ,coll) .next.) + ,ch) + (setf (collector-next ,coll) (1+ .next.))))) - (to-preferred-case (ch) - ;; should check the case mode - `(char-downcase ,ch)) + (to-preferred-case (ch) + ;; should check the case mode + `(char-downcase ,ch)) - ) + ) (let ((state state-dtdstart) - (coll (get-collector)) - (entity (get-collector)) - (tag-to-return) - (contents-to-return) - (pending (list nil)) - (pending-type) - (value-delim) - (public-string) - (char-code 0) - (check-count 0) - (ignore-count 0) - (reference-save-state) - (prefp) - (entityp) - (pentityp) - (prev-state) - (ch)) + (coll (get-collector)) + (entity (get-collector)) + (tag-to-return) + (contents-to-return) + (pending (list nil)) + (pending-type) + (value-delim) + (public-string) + (char-code 0) + (check-count 0) + (ignore-count 0) + (reference-save-state) + (prefp) + (entityp) + (pentityp) + (prev-state) + (ch)) (loop - (setq ch (get-next-char tokenbuf)) - (when *debug-dtd* - (format t "~@~%" - ch (or (cdr (assoc state dtd-parser-states)) state) - contents-to-return pending pending-type - (iostruct-entity-names tokenbuf))) - (if* (null ch) - then (setf prev-state state) - (setf state :eof) - (return) ;; eof -- exit loop - ) + (setq ch (get-next-char tokenbuf)) + (when *debug-dtd* + (format t "~@~%" + ch (or (cdr (assoc state dtd-parser-states)) state) + contents-to-return pending pending-type + (iostruct-entity-names tokenbuf))) + (if* (null ch) + then (setf prev-state state) + (setf state :eof) + (return) ;; eof -- exit loop + ) - (case state - (#.state-dtdstart - (if* (and (eq #\] ch) - external (> include-count 0)) then - (setf state state-dtd-!-include3) - elseif (and (eq #\] ch) (not external)) then (return) - elseif (eq #\< ch) then (setf state state-tokenstart) - elseif (xml-space-p ch) then nil - elseif (eq #\% ch) then (external-param-reference tokenbuf coll external-callback) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD characters, starting at: '" - (compute-coll-string coll) - "'")) - )) - (#.state-dtd-!-include3 - (if* (eq #\] ch) then (setf state state-dtd-!-include4) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD token, starting at: ']" - (compute-coll-string coll) - "'")))) - (#.state-dtd-!-include4 - (if* (eq #\> ch) then (return) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD token, starting at: ']]" - (compute-coll-string coll) - "'")))) - #+ignore - (#.state-dtd-pref - (if* (xml-name-start-char-p ch) then - (add-to-coll coll ch) - (setf state state-dtd-pref2) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD parameter reference name, starting at: '" - (compute-coll-string coll) - "'")) - )) - (#.state-tokenstart - (if* (eq #\? ch) then (setf state state-dtd-?) - elseif (eq #\! ch) then (setf state state-dtd-!) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD characters, starting at: '<" - (compute-coll-string coll) - "'")) - )) - (#.state-dtd-? - (if* (xml-name-char-p ch) - then - (add-to-coll coll ch) - elseif (and external (eq #\% ch)) then - (external-param-reference tokenbuf coll external-callback) - else - (when (not (xml-space-p ch)) - (xml-error (concatenate 'string - "expecting name following: ' ch) - then - (push (compute-coll-string coll) contents-to-return) - (clear-coll coll) - (return) - else (setf state state-dtd-?-3) - (add-to-coll coll #\?) - (add-to-coll coll ch))) - (#.state-dtd-! - (if* (eq #\- ch) then (setf state state-dtd-comment) - elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-token) - (un-next-char ch) - elseif (and (eq #\[ ch) external) then - (setf state state-dtd-!-cond) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD characters, starting at: ' ch) then (decf ignore-count) - (when (= ignore-count 0) (return)) - else (un-next-char ch) - (setf state state-dtd-!-ignore3))) - (#.state-dtd-!-include - (if* (and (eq check-count 6) (eq ch #\E)) then - (setf state state-dtd-!-include2) - elseif (eq ch (elt "INCLUD" check-count)) then - (incf check-count) - else (xml-error " ch) - then (push (compute-coll-string coll) contents-to-return) - (clear-coll coll) - (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal token following '--' comment terminator, starting at '--" - (compute-coll-string coll) - "'")) - )) - (#.state-dtd-!-token - (if* (xml-name-char-p ch) then (add-to-coll coll ch) - elseif (and external (eq #\% ch)) then - (external-param-reference tokenbuf coll external-callback) - elseif (xml-space-p ch) then - (setf tag-to-return (compute-tag coll)) - (clear-coll coll) - (if* (eq tag-to-return :ELEMENT) then (setf state state-dtd-!-element) - elseif (eq tag-to-return :ATTLIST) then - (setf state state-dtd-!-attlist) - elseif (eq tag-to-return :ENTITY) then - (setf entityp t) - (setf state state-dtd-!-entity) - elseif (eq tag-to-return :NOTATION) then - (setf state state-dtd-!-notation) - else - (xml-error (concatenate 'string - "illegal DTD characters, starting at: ' ch)) then - (setf state state-!-dtd-system) - (return) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error - (concatenate 'string - "Expected space before: '" - (compute-coll-string coll) "'")) - )) - (#.state-!-dtd-system - (if* (xml-space-p ch) then nil - elseif (and external (eq #\% ch)) then - (external-param-reference tokenbuf coll external-callback) - elseif (or (eq #\" ch) (eq #\' ch)) then - (setf state state-!-dtd-system2) - (setf value-delim ch) - elseif (and (not entityp) - (eq #\> ch)) then (return) - else (xml-error - (concatenate 'string - "expected quote or double-quote got: '" - (string ch) - "' in ch) then (return) - else - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD - elseif (eq #\> ch) then - (push (compute-tag coll) contents-to-return) - (clear-coll coll) - (return) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD (length tmp) 0) - (when (null (first pending)) (setf pending (rest pending))) - (push tmp pending))) - (if* (> (length pending) 1) then - (push (nreverse pending) contents-to-return) - else (push (first pending) contents-to-return)) - (setf pending (list nil)) - (setf state state-dtd-!-entity5) - (clear-coll coll) - (if* pentityp then - (when (not (assoc (third contents-to-return) - (iostruct-parameter-entities tokenbuf))) - (setf (iostruct-parameter-entities tokenbuf) - (acons (third contents-to-return) - (first contents-to-return) - (iostruct-parameter-entities tokenbuf)))) - else - (when (not (assoc (second contents-to-return) - (iostruct-general-entities tokenbuf))) - (setf (iostruct-general-entities tokenbuf) - (acons (second contents-to-return) - (first contents-to-return) - (iostruct-general-entities tokenbuf))))) - elseif (eq #\& ch) then - (setf reference-save-state state-dtd-!-entity-value) - (setf state state-dtd-!-attdef-decl-value3) - elseif (eq #\% ch) then - (setf prefp t) - (setf reference-save-state state-dtd-!-entity-value) - (setf state state-dtd-!-attdef-decl-value3) - elseif (xml-char-p ch) - then (add-to-coll coll ch) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD contents following ch) then - (push (compute-tag coll *package*) - contents-to-return) - (clear-coll coll) - (return) - else (push (compute-tag coll) - contents-to-return) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD )) then - (setf count 6) - else (setf last-ch cch)) - elseif (< count 6) then - (when (and (= count 5) - (xml-space-p cch)) - (setf cch #\space)) - (if* (not (eq cch - (schar string count) - )) then - (loop - (when (= tmp-count count) - (return)) - (add-to-coll coll - (schar string - tmp-count)) - (incf tmp-count)) - (add-to-coll coll cch) - (setf count 10) - else (incf count)) - elseif (= count 6) then - (dotimes (i 6) - (add-to-coll coll (schar string i))) - (setf count 10) - else (add-to-coll coll cch)))) - (setf (iostruct-entity-names tokenbuf) - (rest (iostruct-entity-names tokenbuf))) - (close entity-stream) - (put-back-tokenbuf tmp-buf))))) - ) - (setf state state-dtdstart) - else nil - ))) - (setf state reference-save-state) - else (let ((tmp (compute-coll-string entity))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "reference not terminated by ';', starting at: '&" - tmp - (compute-coll-string coll) - "'"))) - )) - (#.state-dtd-!-attdef-decl-value6 - (let ((code (char-code ch))) - (if* (eq #\; ch) - then (add-to-coll coll (code-char char-code)) - (setf char-code 0) - (setq state state-dtd-!-attdef-decl-value) - elseif (<= (char-code #\0) code (char-code #\9)) - then (setf char-code (+ (* char-code 16) (- code (char-code #\0)))) - elseif (<= (char-code #\A) code (char-code #\F)) - then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A)))) - elseif (<= (char-code #\a) code (char-code #\f)) - then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a)))) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal hexidecimal character reference code, starting at: '" - (compute-coll-string coll) - "', calculated char code: " - (format nil "~s" char-code))) - ))) - (#.state-dtd-!-attdef-decl-value7 - (let ((code (char-code ch))) - (if* (eq #\; ch) - then (add-to-coll coll (code-char char-code)) - (setf char-code 0) - (setq state reference-save-state) - elseif (<= (char-code #\0) code (char-code #\9)) - then (setf char-code (+ (* char-code 10) (- code (char-code #\0)))) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal decimal character reference code, starting at: '" - (compute-coll-string coll) - "', calculated char code: " - (format nil "~s" char-code))) - ))) - (#.state-dtd-!-attdef-decl-type - (if* (xml-name-char-p ch) then (add-to-coll coll ch) - elseif (and external (eq #\% ch)) then - (external-param-reference tokenbuf coll external-callback) - elseif (or (xml-space-p ch) (eq #\> ch)) then - (let ((token (compute-tag coll))) - (when (and (not (eq :REQUIRED token)) - (not (eq :IMPLIED token)) - (not (eq :FIXED token))) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then - (setf contents-to-return - (append contents-to-return (list (nreverse pending)))) - (return) - else (setf contents-to-return - (append contents-to-return (list (nreverse pending)))) - (setf pending (list nil)) - (setf state state-dtd-!-attdef))) - (clear-coll coll) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then - ;; there only one name... - (setf (first contents-to-return) (first (first contents-to-return))) - (return) - elseif (eq #\* ch) then - (setf state state-dtd-!-element-type-paren-pcd5) - (setf (first contents-to-return) (nreverse (first contents-to-return))) - (if* (> (length (first contents-to-return)) 1) then - (setf (first contents-to-return) - (list (append (list :choice) - (first contents-to-return)))) - elseif (listp (first (first contents-to-return))) then - (setf (first contents-to-return) - (first (first contents-to-return)))) - (push :* (first contents-to-return)) - elseif (eq #\? ch) then - (setf state state-dtd-!-element-type-paren-pcd5) - (setf (first contents-to-return) (nreverse (first contents-to-return))) - (if* (> (length (first contents-to-return)) 1) then - (setf (first contents-to-return) - (list (append (list :choice) - (first contents-to-return)))) - elseif (listp (first (first contents-to-return))) then - (setf (first contents-to-return) - (first (first contents-to-return)))) - (push :? (first contents-to-return)) - elseif (eq #\+ ch) then - (setf state state-dtd-!-element-type-paren-pcd5) - (setf (first contents-to-return) (nreverse (first contents-to-return))) - (if* (> (length (first contents-to-return)) 1) then - (setf (first contents-to-return) - (list (append (list :choice) - (first contents-to-return)))) - elseif (listp (first (first contents-to-return))) then - (setf (first contents-to-return) - (first (first contents-to-return)))) - (push :+ (first contents-to-return)) - elseif (and external (eq #\% ch)) then - (external-param-reference tokenbuf coll external-callback) - elseif (xml-space-p ch) then - (setf state state-dtd-!-element-type-paren-pcd5) - (setf (first contents-to-return) (nreverse (first contents-to-return))) - (when (> (length (first contents-to-return)) 1) - (setf (first contents-to-return) - (list (append (list :\choice) - (first contents-to-return))))) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD include-count 0)) then + (setf state state-dtd-!-include3) + elseif (and (eq #\] ch) (not external)) then (return) + elseif (eq #\< ch) then (setf state state-tokenstart) + elseif (xml-space-p ch) then nil + elseif (eq #\% ch) then (external-param-reference tokenbuf coll external-callback) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD characters, starting at: '" + (compute-coll-string coll) + "'")) + )) + (#.state-dtd-!-include3 + (if* (eq #\] ch) then (setf state state-dtd-!-include4) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD token, starting at: ']" + (compute-coll-string coll) + "'")))) + (#.state-dtd-!-include4 + (if* (eq #\> ch) then (return) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD token, starting at: ']]" + (compute-coll-string coll) + "'")))) + #+ignore + (#.state-dtd-pref + (if* (xml-name-start-char-p ch) then + (add-to-coll coll ch) + (setf state state-dtd-pref2) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD parameter reference name, starting at: '" + (compute-coll-string coll) + "'")) + )) + (#.state-tokenstart + (if* (eq #\? ch) then (setf state state-dtd-?) + elseif (eq #\! ch) then (setf state state-dtd-!) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD characters, starting at: '<" + (compute-coll-string coll) + "'")) + )) + (#.state-dtd-? + (if* (xml-name-char-p ch) + then + (add-to-coll coll ch) + elseif (and external (eq #\% ch)) then + (external-param-reference tokenbuf coll external-callback) + else + (when (not (xml-space-p ch)) + (xml-error (concatenate 'string + "expecting name following: ' ch) + then + (push (compute-coll-string coll) contents-to-return) + (clear-coll coll) + (return) + else (setf state state-dtd-?-3) + (add-to-coll coll #\?) + (add-to-coll coll ch))) + (#.state-dtd-! + (if* (eq #\- ch) then (setf state state-dtd-comment) + elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-token) + (un-next-char ch) + elseif (and (eq #\[ ch) external) then + (setf state state-dtd-!-cond) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD characters, starting at: ' ch) then (decf ignore-count) + (when (= ignore-count 0) (return)) + else (un-next-char ch) + (setf state state-dtd-!-ignore3))) + (#.state-dtd-!-include + (if* (and (eq check-count 6) (eq ch #\E)) then + (setf state state-dtd-!-include2) + elseif (eq ch (elt "INCLUD" check-count)) then + (incf check-count) + else (xml-error " ch) + then (push (compute-coll-string coll) contents-to-return) + (clear-coll coll) + (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal token following '--' comment terminator, starting at '--" + (compute-coll-string coll) + "'")) + )) + (#.state-dtd-!-token + (if* (xml-name-char-p ch) then (add-to-coll coll ch) + elseif (and external (eq #\% ch)) then + (external-param-reference tokenbuf coll external-callback) + elseif (xml-space-p ch) then + (setf tag-to-return (compute-tag coll)) + (clear-coll coll) + (if* (eq tag-to-return :ELEMENT) then (setf state state-dtd-!-element) + elseif (eq tag-to-return :ATTLIST) then + (setf state state-dtd-!-attlist) + elseif (eq tag-to-return :ENTITY) then + (setf entityp t) + (setf state state-dtd-!-entity) + elseif (eq tag-to-return :NOTATION) then + (setf state state-dtd-!-notation) + else + (xml-error (concatenate 'string + "illegal DTD characters, starting at: ' ch)) then + (setf state state-!-dtd-system) + (return) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error + (concatenate 'string + "Expected space before: '" + (compute-coll-string coll) "'")) + )) + (#.state-!-dtd-system + (if* (xml-space-p ch) then nil + elseif (and external (eq #\% ch)) then + (external-param-reference tokenbuf coll external-callback) + elseif (or (eq #\" ch) (eq #\' ch)) then + (setf state state-!-dtd-system2) + (setf value-delim ch) + elseif (and (not entityp) + (eq #\> ch)) then (return) + else (xml-error + (concatenate 'string + "expected quote or double-quote got: '" + (string ch) + "' in ch) then (return) + else + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD + elseif (eq #\> ch) then + (push (compute-tag coll) contents-to-return) + (clear-coll coll) + (return) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD (length tmp) 0) + (when (null (first pending)) (setf pending (rest pending))) + (push tmp pending))) + (if* (> (length pending) 1) then + (push (nreverse pending) contents-to-return) + else (push (first pending) contents-to-return)) + (setf pending (list nil)) + (setf state state-dtd-!-entity5) + (clear-coll coll) + (if* pentityp then + (when (not (assoc (third contents-to-return) + (iostruct-parameter-entities tokenbuf))) + (setf (iostruct-parameter-entities tokenbuf) + (acons (third contents-to-return) + (first contents-to-return) + (iostruct-parameter-entities tokenbuf)))) + else + (when (not (assoc (second contents-to-return) + (iostruct-general-entities tokenbuf))) + (setf (iostruct-general-entities tokenbuf) + (acons (second contents-to-return) + (first contents-to-return) + (iostruct-general-entities tokenbuf))))) + elseif (eq #\& ch) then + (setf reference-save-state state-dtd-!-entity-value) + (setf state state-dtd-!-attdef-decl-value3) + elseif (eq #\% ch) then + (setf prefp t) + (setf reference-save-state state-dtd-!-entity-value) + (setf state state-dtd-!-attdef-decl-value3) + elseif (xml-char-p ch) + then (add-to-coll coll ch) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD contents following ch) then + (push (compute-tag coll *package*) + contents-to-return) + (clear-coll coll) + (return) + else (push (compute-tag coll) + contents-to-return) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD )) then + (setf count 6) + else (setf last-ch cch)) + elseif (< count 6) then + (when (and (= count 5) + (xml-space-p cch)) + (setf cch #\space)) + (if* (not (eq cch + (schar string count) + )) then + (loop + (when (= tmp-count count) + (return)) + (add-to-coll coll + (schar string + tmp-count)) + (incf tmp-count)) + (add-to-coll coll cch) + (setf count 10) + else (incf count)) + elseif (= count 6) then + (dotimes (i 6) + (add-to-coll coll (schar string i))) + (setf count 10) + else (add-to-coll coll cch)))) + (setf (iostruct-entity-names tokenbuf) + (rest (iostruct-entity-names tokenbuf))) + (close entity-stream) + (put-back-tokenbuf tmp-buf))))) + ) + (setf state state-dtdstart) + else nil + ))) + (setf state reference-save-state) + else (let ((tmp (compute-coll-string entity))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "reference not terminated by ';', starting at: '&" + tmp + (compute-coll-string coll) + "'"))) + )) + (#.state-dtd-!-attdef-decl-value6 + (let ((code (char-code ch))) + (if* (eq #\; ch) + then (add-to-coll coll (code-char char-code)) + (setf char-code 0) + (setq state state-dtd-!-attdef-decl-value) + elseif (<= (char-code #\0) code (char-code #\9)) + then (setf char-code (+ (* char-code 16) (- code (char-code #\0)))) + elseif (<= (char-code #\A) code (char-code #\F)) + then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A)))) + elseif (<= (char-code #\a) code (char-code #\f)) + then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a)))) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal hexidecimal character reference code, starting at: '" + (compute-coll-string coll) + "', calculated char code: " + (format nil "~s" char-code))) + ))) + (#.state-dtd-!-attdef-decl-value7 + (let ((code (char-code ch))) + (if* (eq #\; ch) + then (add-to-coll coll (code-char char-code)) + (setf char-code 0) + (setq state reference-save-state) + elseif (<= (char-code #\0) code (char-code #\9)) + then (setf char-code (+ (* char-code 10) (- code (char-code #\0)))) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal decimal character reference code, starting at: '" + (compute-coll-string coll) + "', calculated char code: " + (format nil "~s" char-code))) + ))) + (#.state-dtd-!-attdef-decl-type + (if* (xml-name-char-p ch) then (add-to-coll coll ch) + elseif (and external (eq #\% ch)) then + (external-param-reference tokenbuf coll external-callback) + elseif (or (xml-space-p ch) (eq #\> ch)) then + (let ((token (compute-tag coll))) + (when (and (not (eq :REQUIRED token)) + (not (eq :IMPLIED token)) + (not (eq :FIXED token))) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then + (setf contents-to-return + (append contents-to-return (list (nreverse pending)))) + (return) + else (setf contents-to-return + (append contents-to-return (list (nreverse pending)))) + (setf pending (list nil)) + (setf state state-dtd-!-attdef))) + (clear-coll coll) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then + ;; there only one name... + (setf (first contents-to-return) (first (first contents-to-return))) + (return) + elseif (eq #\* ch) then + (setf state state-dtd-!-element-type-paren-pcd5) + (setf (first contents-to-return) (nreverse (first contents-to-return))) + (if* (> (length (first contents-to-return)) 1) then + (setf (first contents-to-return) + (list (append (list :choice) + (first contents-to-return)))) + elseif (listp (first (first contents-to-return))) then + (setf (first contents-to-return) + (first (first contents-to-return)))) + (push :* (first contents-to-return)) + elseif (eq #\? ch) then + (setf state state-dtd-!-element-type-paren-pcd5) + (setf (first contents-to-return) (nreverse (first contents-to-return))) + (if* (> (length (first contents-to-return)) 1) then + (setf (first contents-to-return) + (list (append (list :choice) + (first contents-to-return)))) + elseif (listp (first (first contents-to-return))) then + (setf (first contents-to-return) + (first (first contents-to-return)))) + (push :? (first contents-to-return)) + elseif (eq #\+ ch) then + (setf state state-dtd-!-element-type-paren-pcd5) + (setf (first contents-to-return) (nreverse (first contents-to-return))) + (if* (> (length (first contents-to-return)) 1) then + (setf (first contents-to-return) + (list (append (list :choice) + (first contents-to-return)))) + elseif (listp (first (first contents-to-return))) then + (setf (first contents-to-return) + (first (first contents-to-return)))) + (push :+ (first contents-to-return)) + elseif (and external (eq #\% ch)) then + (external-param-reference tokenbuf coll external-callback) + elseif (xml-space-p ch) then + (setf state state-dtd-!-element-type-paren-pcd5) + (setf (first contents-to-return) (nreverse (first contents-to-return))) + (when (> (length (first contents-to-return)) 1) + (setf (first contents-to-return) + (list (append (list :\choice) + (first contents-to-return))))) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (push (first pending) contents-to-return) - (setf state state-dtd-!-element-type-paren3) - else (setf (first pending) (nreverse (first pending))) - (if* (> (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (if* (second pending) then - (push (first pending) (second pending)) - else (setf (second pending) (list (first pending)))) - (setf pending (rest pending)) - (setf state state-dtd-!-element-type-paren-choice-name3) - ) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (push (first pending) contents-to-return) + (setf state state-dtd-!-element-type-paren3) + else (setf (first pending) (nreverse (first pending))) + (if* (> (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (if* (second pending) then + (push (first pending) (second pending)) + else (setf (second pending) (list (first pending)))) + (setf pending (rest pending)) + (setf state state-dtd-!-element-type-paren-choice-name3) + ) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (push (first pending) contents-to-return) - (setf state state-dtd-!-element-type-paren3) - else (setf (first pending) (nreverse (first pending))) - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - (if* (second pending) then - (push (first pending) (second pending)) - else (setf (second pending) - ;; (list (first pending)) ;2001-03-22 - (first pending) ;2001-03-22 - )) - (setf pending (rest pending)) - (setf state state-dtd-!-element-type-paren-choice-name3) - ) - elseif (eq #\, ch) then - (when (and (first pending) (not (eq :seq (first pending-type)))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal '|' and ',' mix starting at '" - (compute-coll-string coll) - "'"))) - (push (compute-tag coll) (first pending)) - (clear-coll coll) - (push :seq pending-type) - (setf state state-dtd-!-element-type-paren-choice) - elseif (eq #\| ch) then - (when (and (first pending) (not (eq :choice (first pending-type)))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal '|' and ',' mix starting at '" - (compute-coll-string coll) - "'"))) - (push (compute-tag coll) (first pending)) - (clear-coll coll) - (push :choice pending-type) - (setf state state-dtd-!-element-type-paren-choice) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (push (first pending) contents-to-return) - (setf state state-dtd-!-element-type-paren3) - else (setf (first pending) (nreverse (first pending))) - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - (if* (second pending) then - (push (first pending) (second pending)) - else (setf (second pending) (list (first pending)))) - (setf state state-dtd-!-element-type-paren-choice-name3) - ) - (setf pending (rest pending)) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (push (first pending) contents-to-return) - (setf pending (rest pending)) - (setf state state-dtd-!-element-type-paren3) - else (setf (first pending) (nreverse (first pending))) - (if* (> (length (first pending)) 1) then - (push (first pending-type) (first pending)) - (setf pending-type (rest pending-type)) - else (setf (first pending) (first (first pending)))) - (if* (second pending) then - (push (first pending) (second pending)) - else (setf (second pending) (list (first pending)))) - (setf pending (rest pending)) - (setf state state-dtd-!-element-type-paren-choice) - ) - elseif (eq #\, ch) then - (when (and (rest (first pending)) (not (eq :seq (first pending-type)))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal '|' and ',' mix starting at '" - (compute-coll-string coll) - "'"))) - (push :seq pending-type) - (setf state state-dtd-!-element-type-paren-choice) - elseif (eq #\| ch) then - (when (and (rest (first pending)) (not (eq :choice (first pending-type)))) - (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal '|' and ',' mix starting at '" - (compute-coll-string coll) - "'"))) - (push :choice pending-type) - (setf state state-dtd-!-element-type-paren-choice) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD contents following ch) then (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD contents following ch) then (return) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD contents following (length (first contents-to-return)) 1) - (setf (first contents-to-return) - (list (append (list :choice) - (first contents-to-return))))) - (push :* (first contents-to-return)) - else (clear-coll coll) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "illegal DTD contents in ch) then - (let ((token (compute-tag coll))) - (when (not (or (eq token :EMPTY) (eq token :ANY))) - (xml-error (concatenate 'string - "illegal DTD ch) then (return) - else (xml-error (concatenate 'string - "expected '>', got '" - (string ch) - "' in DTD (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (push (first pending) contents-to-return) + (setf state state-dtd-!-element-type-paren3) + else (setf (first pending) (nreverse (first pending))) + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + (if* (second pending) then + (push (first pending) (second pending)) + else (setf (second pending) + ;; (list (first pending)) ;2001-03-22 + (first pending) ;2001-03-22 + )) + (setf pending (rest pending)) + (setf state state-dtd-!-element-type-paren-choice-name3) + ) + elseif (eq #\, ch) then + (when (and (first pending) (not (eq :seq (first pending-type)))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal '|' and ',' mix starting at '" + (compute-coll-string coll) + "'"))) + (push (compute-tag coll) (first pending)) + (clear-coll coll) + (push :seq pending-type) + (setf state state-dtd-!-element-type-paren-choice) + elseif (eq #\| ch) then + (when (and (first pending) (not (eq :choice (first pending-type)))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal '|' and ',' mix starting at '" + (compute-coll-string coll) + "'"))) + (push (compute-tag coll) (first pending)) + (clear-coll coll) + (push :choice pending-type) + (setf state state-dtd-!-element-type-paren-choice) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (push (first pending) contents-to-return) + (setf state state-dtd-!-element-type-paren3) + else (setf (first pending) (nreverse (first pending))) + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + (if* (second pending) then + (push (first pending) (second pending)) + else (setf (second pending) (list (first pending)))) + (setf state state-dtd-!-element-type-paren-choice-name3) + ) + (setf pending (rest pending)) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (push (first pending) contents-to-return) + (setf pending (rest pending)) + (setf state state-dtd-!-element-type-paren3) + else (setf (first pending) (nreverse (first pending))) + (if* (> (length (first pending)) 1) then + (push (first pending-type) (first pending)) + (setf pending-type (rest pending-type)) + else (setf (first pending) (first (first pending)))) + (if* (second pending) then + (push (first pending) (second pending)) + else (setf (second pending) (list (first pending)))) + (setf pending (rest pending)) + (setf state state-dtd-!-element-type-paren-choice) + ) + elseif (eq #\, ch) then + (when (and (rest (first pending)) (not (eq :seq (first pending-type)))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal '|' and ',' mix starting at '" + (compute-coll-string coll) + "'"))) + (push :seq pending-type) + (setf state state-dtd-!-element-type-paren-choice) + elseif (eq #\| ch) then + (when (and (rest (first pending)) (not (eq :choice (first pending-type)))) + (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal '|' and ',' mix starting at '" + (compute-coll-string coll) + "'"))) + (push :choice pending-type) + (setf state state-dtd-!-element-type-paren-choice) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD contents following ch) then (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD contents following ch) then (return) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD contents following (length (first contents-to-return)) 1) + (setf (first contents-to-return) + (list (append (list :choice) + (first contents-to-return))))) + (push :* (first contents-to-return)) + else (clear-coll coll) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "illegal DTD contents in ch) then + (let ((token (compute-tag coll))) + (when (not (or (eq token :EMPTY) (eq token :ANY))) + (xml-error (concatenate 'string + "illegal DTD ch) then (return) + else (xml-error (concatenate 'string + "expected '>', got '" + (string ch) + "' in DTD include-count 0) (not (eq prev-state state-dtdstart))) then - (xml-error "unexpected end of input while processing external DTD")) - (values nil :end-dtd)) - (t - (print (list tag-to-return contents-to-return)) - (error "need to support dtd state:~s" state))) + (#.state-dtdstart + (when (and (null ch) (not external)) + (xml-error "unexpected end of input while parsing DTD")) + (if* (null tag-to-return) then (values nil :end-dtd) + else (error "process other return state"))) + ((#.state-dtd-!-element-type-end #.state-dtd-!-element-type-token + #.state-dtd-!-element-type-paren-pcd4 #.state-dtd-!-element-type-paren-pcd6 + #.state-dtd-!-element-type-paren-pcd5 #.state-dtd-!-element-type-paren2 + #.state-dtd-!-element-type-paren3) + (values (append (list tag-to-return) (nreverse contents-to-return)) + nil)) + ((#.state-dtd-!-attdef-decl-type #.state-dtd-!-attlist-name + #.state-dtd-!-attdef) + (values (append (list tag-to-return) contents-to-return) + nil)) + ((#.state-dtd-!-entity5 #.state-!-dtd-system3 + #.state-!-dtd-system7 #.state-!-dtd-system4 + #.state-!-dtd-system ;; this is actually a !NOTATION + #.state-dtd-?-4 ;; PI + #.state-dtd-comment4 ;; comment + ) + (let ((ret (append (list tag-to-return) (nreverse contents-to-return)))) + (values ret + nil))) + #+ignore + (#.state-dtd-pref2 + (values (nreverse contents-to-return) nil)) + (#.state-dtd-!-include2 + (values nil :include)) + (#.state-dtd-!-include4 + (values nil :include-end)) + (#.state-dtd-!-ignore7 + (values nil :ignore)) + (:eof + (if* (not external) then + (xml-error "unexpected end of input while processing DTD internal subset") + elseif (or (> include-count 0) (not (eq prev-state state-dtdstart))) then + (xml-error "unexpected end of input while processing external DTD")) + (values nil :end-dtd)) + (t + (print (list tag-to-return contents-to-return)) + (error "need to support dtd state:~s" state))) ) )) (defun external-param-reference (tokenbuf old-coll external-callback) (declare #+allegro (:fbound next-token) - #+lispworks (optimize (safety 0) (debug 3)) - (ignorable old-coll) - #-lispworks (optimize (speed 3) (safety 1))) + #+lispworks (optimize (safety 0) (debug 3)) + (ignorable old-coll) + #-lispworks (optimize (speed 3) (safety 1))) (setf (iostruct-seen-parameter-reference tokenbuf) t) (macrolet ((add-to-entity-buf (entity-symbol p-value) - `(progn - (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value) - (iostruct-entity-bufs tokenbuf)))) - (clear-coll (coll) - `(setf (collector-next ,coll) 0)) - (un-next-char (ch) - `(push ,ch (iostruct-unget-char tokenbuf))) - (add-to-coll (coll ch) - `(let ((.next. (collector-next ,coll))) - (if* (>= .next. (collector-max ,coll)) - then (grow-and-add ,coll ,ch) - else (setf (schar (collector-data ,coll) .next.) - ,ch) - (setf (collector-next ,coll) (1+ .next.)))))) + `(progn + (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value) + (iostruct-entity-bufs tokenbuf)))) + (clear-coll (coll) + `(setf (collector-next ,coll) 0)) + (un-next-char (ch) + `(push ,ch (iostruct-unget-char tokenbuf))) + (add-to-coll (coll ch) + `(let ((.next. (collector-next ,coll))) + (if* (>= .next. (collector-max ,coll)) + then (grow-and-add ,coll ,ch) + else (setf (schar (collector-data ,coll) .next.) + ,ch) + (setf (collector-next ,coll) (1+ .next.)))))) (let ((ch (get-next-char tokenbuf)) - (coll (get-collector)) - p-value entity-symbol) + (coll (get-collector)) + p-value entity-symbol) (add-to-coll coll ch) (when (not (xml-name-start-char-p ch)) - (dotimes (i 15) - (add-to-coll coll ch) - (setq ch (get-next-char tokenbuf)) - (if* (null ch) - then (return))) - (xml-error (concatenate 'string - "Illegal DTD parameter entity name starting at: " - (compute-coll-string coll)))) + (dotimes (i 15) + (add-to-coll coll ch) + (setq ch (get-next-char tokenbuf)) + (if* (null ch) + then (return))) + (xml-error (concatenate 'string + "Illegal DTD parameter entity name starting at: " + (compute-coll-string coll)))) (loop - (setf ch (get-next-char tokenbuf)) - (if* (eq #\; ch) then - (setf entity-symbol (compute-tag coll)) - (clear-coll coll) - #+ignore (format t "entity symbol: ~s entities: ~s match: ~s~%" - entity-symbol (iostruct-parameter-entities tokenbuf) - (assoc entity-symbol - (iostruct-parameter-entities tokenbuf))) - (if* (and (iostruct-do-entity tokenbuf) - (setf p-value - (assoc entity-symbol - (iostruct-parameter-entities tokenbuf)))) then - (setf p-value (rest p-value)) - (when (member entity-symbol (iostruct-entity-names tokenbuf)) - (xml-error (concatenate 'string - "entity:" - (string entity-symbol) - " in recursive reference"))) - (push entity-symbol (iostruct-entity-names tokenbuf)) - (if* (stringp p-value) then - (setf p-value (concatenate 'string " " p-value " ")) - (add-to-entity-buf entity-symbol p-value) - elseif (null external-callback) then - (setf (iostruct-do-entity tokenbuf) nil) - elseif p-value then - (let ((entity-stream (apply external-callback p-value))) - (when entity-stream - (let ((entity-buf (get-tokenbuf))) - (setf (tokenbuf-stream entity-buf) entity-stream) - (unicode-check entity-stream tokenbuf) - (add-to-entity-buf entity-symbol " ") - (push entity-buf - (iostruct-entity-bufs tokenbuf)) - (let ((count 0) cch - (string "