--- /dev/null
+*******************************************************************************
+merge from trunk to acl6 branch (for 6.1.beta)
+command: ../../join.sh trunk trunk_to_acl6_merge2 trunk_to_acl6_merge3 xmlutils
+*******************************************************************************
+
+*******************************************************************************
+merge from trunk to acl6 branch
+command: ../../join.sh trunk trunk_to_acl6_merge1 trunk_to_acl6_merge2 xmlutils
+*******************************************************************************
+
+*******************************************************************************
+merge from trunk to acl6 branch
+command: ../../join.sh trunk acl6 trunk_to_acl6_merge1 xmlutils
+*******************************************************************************
+
+2001-06-08 Steve Haflich <smh@romeo>
+
+ * pxml.htm: Added mention that it is necessary to load or require
+ the module. Cleaned up a little html formatting.
+
+2001-05-30 John Foderaro <jkf@tiger.franz.com>
+
+ * phtml.cl - add :,_,- and . to valid attribute name characters.
+
+2001-03-23 Steve Haflich <smh@romeo>
+
+ * pxml3.cl: state-dtd-!-element-type-paren-choice-name2 was
+ missing code to handle decls such as
+ <!ELEMENT item1 ((item2 | item3)+ , item4)>
+ which was presumably overlooked in the merge of
+ *-choice and *-seq into a single set of parser states
+ It is quite clear that the dtd parsr does not return correct
+ tree structure in all cases, but this fix may allow it at
+ least to accept legal dtds without signalling error.
+ Also made minor *debug-dtd* enhancements.
+ * pxml[0,1,2,3]: Added dribble-bug version number tracking.
+
+2001-02-05 Steve Jacobson <sdj>
+
+ phtml.cl: symbols mapped to preferred case at runtime (as opposed to
+ a compile time macro determining the case mapping)
+
+2000-12-20 Steve Jacobson <sdj>
+
+ pxml-test.cl: NameSpace example change to reflect URI module fix
+ pxml.htm: NameSpace example change
+ pxml2.cl: bug10165 fix
+ pxml3.cl: bug10165 fix
+
+2000-12-05 Steve Jacobson <sdj>
+
+ phtml.cl: add user visible change comment; fix comment spelling error
+ pxml.htm: change ANSI notes to reflect code changes
+ pxml1.cl: use symbol-name where needed to support ANSI case usage
+ pxml2.cl: use symbol-name where needed to support ANSI case usage
+ pxml3.cl: add two declarations to prevent forward referencing compile warnings
+
+2000-10-27 Steve Jacobson <sdj>
+
+ phtml.cl: check callbacks arg when tag has no body
+ REMEMBER TO ADD TEST TO PHTML-TEST!!!
+
+2000-10-14 Steve Jacobson <sdj>
+
+ phtml-test.cl: add test related to raw mode infinite loop
+ phtml.cl: remove risk of negative buffer index
+ caused by multiple un-next-char calls in raw mode
+ fixed it by moving first-pass parse buffer to tokenbuf
+ removed :script from *in-line* to prevent infinite loop
+ (it should not have been there, anyway)
+ fixed :table tag-auto-close-stop typo
+ don't reopen char format tags within raw mode tags
+ pxml1.cl: changes required by 6.0 unicode changes
+ pxml2.cl: show char code in debug output
+
+
+2000-09-05 Steve Jacobson <sdj>
+
+ New files:
+
+ phtml.htm: HTML version of doc file
+ pxml.htm: HTML version of doc file
+
+ Changed files:
+
+ pxml-test.cl: Add namespace example; change because of xml-error change
+ pxml.txt: Namespace changes and other edits
+ pxml1.cl: namespace support; add format string to xml-error 'error call
+ pxml2.cl: namespace support
+
+2000-08-16 Steve Jacobson <sdj>
+
+ pxml-test.cl: cleanup for distribution
+ *.cl: add AllegroServe license text
+
+2000-08-10 Steve Jacobson <sdj>
+
+ phtml-test.cl: add tests for latest changes
+ phtml.cl: allow underscore as tag character
+ fix <![if ..]> --> (:! "if ..]") bug ('[' lost)
+ add collect-rogue-tags & no-body-tags arguments to
+ support 2 pass parse for really bad pages (e.g. New
+ York Times page)
+ special inline character formatting close/reopen strategy
+ preserves any attributes (smh reported bug)
+ phtml.txt: new argument and <! processing documentation
+ pxml0.cl,pxml1.cl,pxml2.cl,pxml3.cl: continued development - now in a state
+ to be used by others
+ pxml-test.cl: new file (pending task: add xmltest directories to cvs -
+ it will take some work because
+ binary files must be indentified)
+ pxml.txt: new documentation file
+
+2000-07-24 Steve Jacobson <sdj>
+
+ * phtml.cl: handle <script></script> pair with no text between the tags
+ correctly. Change works for any "raw" mode tag.
+
+ * phtml-test.cl: add test for the above change
+
+2000-07-17 Kevin Layer <layer@ultra>
+
+ * *.cl: add rcs id's
+
+2000-07-17 Steve Jacobson
+
+ * phtml.cl: more robust handling of illegal attribute value HTML:
+ parse-html "<frame src= foo.html>") => ((frame :src "foo.html"))
+ (skip spaces after '=' to look for attribute value)
+
+ <frame> is not a paired tag; it's standalone like <img>
+
+ * phtml-test.cl: add tests for the above two changes
+
+
+2000-06-29 Steve Jacobson
+
+ * phtml: export phtml-internal
+
+ * phtml.txt: phtml-internal now exported;
+ describe read-sequence-func return value
+
+2000-06-26 Steve Jacobson <sdj>
+
+ * phtml.cl: let colon be permissable tag name character;
+ parse <!...> xml type tags in raw mode with no contents;
+ parse :script & :style correctly when there are attributes
+ present
+
+ * phtml-test.cl: add tests for above changes
+
+2000-06-23 Steve Jacobson <sdj>
+
+ * phtml.cl: add autoclose property to :p tag
+
+ * phtml-test.cl: adjust test to reflect that <P> tags can't nest
+
+ * phtml.txt: new file: preliminary documentation
+
+2000-06-20 Steve Jacobson <sdj>
+
+ * phtml.cl: identify end of input errors
+
+ * phtml-test.cl: add end of input error test
+
+2000-06-10 Steve Jacobson <sdj>
+
+ * phtml.cl: made input buffer usage thread-safe, without increasing
+ consing.
+
+Fri May 26 22:55:52 PST 2000 Duane Rettig <duane@beta>
+
+ * Makefile: set SHELL variable
+
+2000-05-24 Steve Jacobson <sdj>
+
+ * phtml-test.cl: added more character format (<b>,<i>,etc.) tests
+ test changes to reflect new callback API
+
+ * phtml.cl: removed element-callback support; replaced it with
+ parse-html :callbacks argument
+ took *entity-mapping* out of source until we decide to
+ add entity processing
+ added some more tags to "character formating" group
+ changed "character formating" tag parsing to both
+ coerce parse results to HTML 4.0 spec and also
+ prevent generating syntax equivalent yet unexpected
+ results
+
+2000-05-17 Steve Jacobson <sdj>
+
+ * phtml.cl: output keyword symbols in upper case when phtml.cl is compiled in
+ :CASE-INSENSITIVE-UPPER lisp.
+ runtime raw mode (<style>,<script>) change to be case insensitive
+
+2000-05-12 Steve Jacobson <sdj>
+
+ * phtml-test.cl: added tests to check things like <b><i>bold and italic</b>just italic</i>
+ * phtml.cl: accept all characters in undelimited attribute values except for whitespance
+ and >
+ handle "interleved" character style tags in a sensible way
+
+2000-05-09 Steve Jacobson <sdj>
+
+ * phtml-test.cl: added STYLE test
+ * phtml.cl: replaced declaim optimization with declarations in each function
+ allow / in undelimited attribute value
+ don't treat :comment as special, but treat :style as special "raw" mode
+
+
+2000-04-21 Steve Jacobson <sdj>
+
+ phtml.cl
+
+ * add :callback-only keyword to parse-html methods and change
+ code to speed processing when set to non-nil
+
+ phtml-test.cl
+
+ * complete transition to using test harness
+ * add :callback-only tests and callback test with nested elements
+
+2000-04-19 Steve Jacobson <sdj>
+
+ phtml.cl:
+
+ * take out stale comments
+ * use net.html.parser package - exported symbols are:
+ parse-html element-callback
+ * phtml methods renamed parse-html
+ * setf'able element-callback method registers function to be invoked
+ with parse tree output that starts with specified HTML tag
+ * support raw mode in <comment> and <script> tags
+ * parse incorrect HTML in a manner that reflects writer's intentions:
+ <a href=mailto:lmcelroy@performigence.com>lmcelroy@performigence.com</a>
+ is processed as if it read:
+ <a href="mailto:lmcelroy@performigence.com">lmcelroy@performigence.com</a>
+ * :colgroup was incorrectly identified as a tag that can't have a body
+ * fixes resulting from test suite
+
+ phtml-test.cl:
+
+ * reflect above package and symbol changes
+ * use test harness
+ * add tests for element-callback methods
+ * user::testit invokes test suite
+
+
+2000-03-28 Steve Jacobson <sdj>
+
+ * take out comments that are no longer relevant
+ * :object shouldn't have tag-no-end property
+ * :dd also closes :dt
+ * first *if clause in close-off-tags has to check that candidate
+ tag may close off more than just the current tag
+
+ * New file: phtml-test.cl - test by:
+ (lhtml-equal (phtml *test-string*) *expected-result*)
+
+2000-03-28 John Foderaro <jkf@tiger.franz.com>
+
+ * add stream when user supplied read-sequence function is called
+
+2000-03-23 Steve Jacobson <sdj>
+
+ * added state-rawdata lexer state, which can be triggered
+ by calling parser to put lexer in mode where subsequent text
+ is gobbled up into ocdata until the specified delimiter is seen
+
+ * added read-sequence-func arg to next token, so user can
+ optionally supply function to fetch next sequence of chars;
+ turned phtml stream method into internal function so method args
+ don't change - user who wishes to supply read-sequence-func can
+ either invoke phtml-internal or add a method that calls
+ phtml-internal with custome read-sequence-func argument
+
+ * added knowledge to parser to reduce number of pcdata outputs that
+ only contain whitespace
+
+ * added large number of tag-auto-close and some tag-auto-close-stop
+ properties to html symbols so parser properly handles all tags
+ with optional end tags
--- /dev/null
+;; $Id: build.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+(in-package :user)
+
+(let ((filenames
+ (list
+ "pxml0"
+ "pxml1"
+ "pxml3"
+ "pxml2")))
+ (dolist (f filenames)
+ (compile-file-if-needed (concatenate 'string f ".cl"))
+ (load (concatenate 'string f ".fasl")))
+
+ (with-open-file (out "pxml.fasl"
+ :element-type '(unsigned-byte 8)
+ :direction :output
+ :if-exists :supersede
+ :if-does-not-exist :create)
+ (dolist (file filenames)
+ (with-open-file (in (concatenate 'string file ".fasl")
+ :element-type '(unsigned-byte 8))
+ (format t "~%; ~s" file)
+ (let ((buf (make-array 2048 :element-type '(unsigned-byte 8))))
+ (loop as x = (read-sequence buf in)
+ until (= x 0)
+ do (write-sequence buf out :end x)))))))
+
--- /dev/null
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+
+;; $Id: phtml-test.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+(eval-when (compile load eval)
+ (require :tester))
+
+(defpackage :user (:use :util.test :net.html.parser)) ;; assumes phtml.cl loaded
+(in-package :user)
+
+(defvar *test-string*)
+(defvar *test-string2*)
+(defvar *test-string3*)
+(defvar *expected-result*)
+(defvar *expected-result2*)
+(defvar *expected-result3*)
+
+
+;; it uses a fake pp tag to test nesting for callbacks...
+(setf *test-string*
+ "<html>
+ <!-- this should be <h1>one</h1> string -->
+ <head>
+ <style> this should be <h1>one</h1> string </STYLE>
+ <title> this is some title text </title>
+ <body> this is some body text
+ <a name=\"this is an anchor\">with some text </a>
+ <!-- testing allowing looser attribute parsing -->
+ <a href= mailto:lmcelroy@performigence.com>lmcelroy@performigence.com
+ </a>
+ <br>
+ this is some more text
+ <bogus> tests parser 'looseness'</bogus>
+ <select>
+ <option>1
+ <option>2 </select>
+ <ul>
+ <li>item 1
+ <li>item 2 </ul>
+ <dl>
+ <dt>a term
+ <dd>its definition
+ <dt>another term
+ <dd>another definition</dl>
+ <table>
+ <colgroup>
+ <col align=\"right\">
+ <col align=\"center\">
+ <thead>
+ <tr>
+ <th> this cell is aligned right
+ <th> this cell is centered
+ <tfoot>
+ <tr>
+ <th> this cell is aligned right
+ <th> this cell is centered
+ <tbody>
+ <tr>
+ <td> this cell is aligned right
+ <td> this cell is centered
+ <tbody>
+ <tr>
+ <td> this cell is aligned right
+ <td> this cell is centered </table>
+ <pp>
+ <object>
+ <pp>Navigate the site:
+ <map name=\"mainmap\">
+ <area shape=rect coords=\"0,100,100,200\">
+ <area shape=rect coords=\"100,100,100,200\"> </map> </object> </pp>
+ <abbr>WWW</abbr> is an abbreviation
+ <b>force</b>
+ <pp>whitespace only")
+
+(setf *expected-result*
+ '((:html
+ (:comment "this should be <h1>one</h1> string")
+ (:head
+ (:style "this should be <h1>one</h1> string")
+ (:title "this is some title text"))
+ (:body
+ "this is some body text"
+ ((:a :name "this is an anchor") "with some text")
+ (:comment "testing allowing looser attribute parsing")
+ ((:a :href "mailto:lmcelroy@performigence.com")
+ "lmcelroy@performigence.com")
+ :br
+ "this is some more text"
+ (:bogus "tests parser 'looseness'")
+ (:select
+ (:option "1")
+ (:option "2"))
+ (:ul
+ (:li "item 1")
+ (:li "item 2"))
+ (:dl
+ (:dt "a term")
+ (:dd "its definition")
+ (:dt "another term")
+ (:dd "another definition"))
+ (:table
+ (:colgroup
+ ((:col :align "right"))
+ ((:col :align "center")))
+ (:thead
+ (:tr
+ (:th "this cell is aligned right")
+ (:th "this cell is centered")))
+ (:tfoot
+ (:tr
+ (:th "this cell is aligned right")
+ (:th "this cell is centered")))
+ (:tbody
+ (:tr
+ (:td "this cell is aligned right")
+ (:td "this cell is centered")))
+ (:tbody
+ (:tr
+ (:td "this cell is aligned right")
+ (:td "this cell is centered"))))
+ (:pp
+ (:object
+ (:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect" :coords "100,100,100,200"))))))
+ (:abbr "WWW")
+ "is an abbreviation"
+ (:b "force")
+ (:pp "whitespace only")
+ ))))
+
+(setf *test-string2*
+ "<i><b id=1>text</i> more text</b>
+ <!doctype this is some text>
+ <![if xxx]>
+ <i><b>text</i></b> more text
+ <b>text<p>more text</b> yet more text</p>
+ <ul><li><b>text<li>more text</ul></b>
+ prev<b><a href=foo>bar</a>baz</b>
+ <b>foo<a>bar</a>baz</b>
+ <b>foo<a>bar</b>baz</a>
+ <b>foo<script>bar</script><a>baz</a></b>
+ <b>foo<i>bar</i>baz</b>
+ <script a=b> some text if (year < 1000) year += 1900; more text </script>
+ <script a=b></script>
+ <frameset><frame foo><frame bar></frameset>"
+ )
+
+(setf *expected-result2*
+ '((:i ((:b :id "1") "text")) ((:b :id "1") " more text")
+ (:!doctype "this is some text")
+ (:! "[if xxx]")
+ (:i (:b "text")) (:b) " more text"
+ (:b "text") (:p (:b "more text") " yet more text")
+ (:ul (:li (:b "text")) (:li (:b "more text"))) (:b)
+ "prev" (:b ((:a :href "foo") "bar") "baz")
+ (:b "foo" (:a "bar") "baz")
+ (:b "foo") (:a (:b "bar") "baz")
+ (:b "foo") (:script "bar") (:b (:a "baz"))
+ (:b "foo" (:i "bar") "baz")
+ ((:script :a "b") " some text if (year < 1000) year += 1900; more text ")
+ ((:script :a "b"))
+ (:frameset ((:frame :foo "foo")) ((:frame :bar "bar")))
+ ))
+
+(setf *test-string3*
+ "<ICMETA URL='nytimes.html'>
+<NYT_HEADER version='1.0' type='homepage'>
+<body bgcolor='#ffffff' background='back5.gif'
+vlink='4' link='6'>
+<NYT_BANNER version='1.0' type='homepage'>
+<table border=0 cellspacing=0 cellpadding=0>
+<tr>
+<td bgcolor=0 rowspan=4 width=126 align=left valign=center>
+<NYT_AD version='1.0' location=''>
+<A HREF='ads.gif' target='top'>
+<IMG SRC='http://ads2.gif' BORDER=0 WIDTH=120 HEIGHT=90 ALT='E-Mail Updates from NYTimes.com' ></A>
+</NYT_AD>")
+
+(setf *expected-result3*
+ '(((:icmeta :url "nytimes.html")) ((:nyt_header :version "1.0" :type "homepage"))
+ ((:body :bgcolor "#ffffff" :background "back5.gif" :vlink "4" :link "6")
+ ((:nyt_banner :version "1.0" :type "homepage"))
+ ((:table :border "0" :cellspacing "0" :cellpadding "0")
+ (:tr
+ ((:td :bgcolor "0" :rowspan "4" :width "126" :align "left" :valign "center")
+ ((:nyt_ad :version "1.0" :location "")
+ ((:a :href "ads.gif" :target "top")
+ ((:img :src "http://ads2.gif" :border "0" :width "120" :height "90" :alt
+ "E-Mail Updates from NYTimes.com"))))))))))
+
+
+(defmethod lhtml-equal ((a t) (b t))
+ (equal a b))
+
+(defmethod lhtml-equal ((a list) (b list))
+ (let ((i 0) (j 0))
+ (loop
+ (if* (and (= i (length a)) (= j (length b))) then (return t)
+ elseif (and (< i (length a)) (white-space-p (nth i a))) then
+ (incf i)
+ elseif (white-space-p (nth j b)) then
+ (incf j)
+ elseif (and (= i (length a)) (/= j (length b))) then
+ (return
+ (loop
+ (when (= j (length b)) (return t))
+ (when (not (white-space-p (nth j b))) (return nil))
+ (incf j)))
+ elseif (and (/= i (length a)) (= j (length b))) then
+ (return
+ (loop
+ (when (= i (length a)) (return t))
+ (when (not (white-space-p (nth i a))) (return nil))
+ (incf i)))
+ elseif (not (lhtml-equal (nth i a) (nth j b))) then
+ (return nil)
+ else
+ (incf i)
+ (incf j)))))
+
+(defmethod lhtml-equal ((a string) (b string))
+ (let ((i 0) (j 0))
+ ;; skip white space in beginning
+ (loop
+ (let ((char (elt a i)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return)))
+ (incf i))
+ (loop
+ (let ((char (elt b j)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return)))
+ (incf j))
+ (loop
+ (when (and (= i (length a)) (= j (length b))) (return t))
+ (when (and (= i (length a)) (/= j (length b)))
+ (return
+ (loop
+ (when (= j (length b)) (return t))
+ (let ((char (elt b j)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return t)))
+ (incf j))))
+ (when (and (/= i (length a)) (= j (length b)))
+ (return
+ (loop
+ (when (= i (length a)) (return t))
+ (let ((char (elt a i)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return t)))
+ (incf i))))
+ (when (not (eq (elt a i) (elt b j))) (return nil))
+ (incf i)
+ (incf j))))
+
+(defmethod white-space-p ((a t))
+ nil)
+
+(defmethod white-space-p ((a string))
+ (let ((i 0)
+ (length (length a)))
+ (loop
+ (when (= i length) (return t))
+ (let ((char (elt a i)))
+ (when (and (not (eq char #\space))
+ (not (eq char #\tab))
+ (not (eq char #\return))
+ (not (eq char #\linefeed)))
+ (return nil)))
+ (incf i))))
+
+;;------------------------------------------------
+
+(defvar *callback-called* 0)
+
+(let ((*pass* 0))
+ (defun callback-test-func (arg)
+ ;; incf *callback-called* so we know exactly how many times this is
+ ;; called
+ (incf *callback-called*)
+ (if* (= *pass* 0)
+ then
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '((:a :name "this is an anchor")
+ "with some text")))
+ else
+ (setf *pass* 0)
+ (test t (lhtml-equal arg
+ '((:a :href
+ "mailto:lmcelroy@performigence.com")
+ "lmcelroy@performigence.com"))))))
+
+(let ((*pass* 0))
+ (defun nested-callback (arg)
+ ;; incf *callback-called* so we know exactly how many times this is
+ ;; called
+ (incf *callback-called*)
+ (if* (= *pass* 0)
+ then
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '(:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect" :coords "100,100,100,200"))))))
+ elseif (= *pass* 1)
+ then
+ (incf *pass*)
+ (test t (lhtml-equal arg
+ '(:pp
+ (:object
+ (:pp "Navigate the site:"
+ ((:map :name "mainmap")
+ ((:area :shape "rect" :coords "0,100,100,200"))
+ ((:area :shape "rect"
+ :coords "100,100,100,200"))))))))
+ else
+ (setf *pass* 0)
+ (test t (lhtml-equal arg
+ '(:pp "whitespace only"))))))
+
+(defun testit ()
+ (let ((util.test:*test-errors* 0)
+ (util.test:*test-successes* 0))
+ (test t (lhtml-equal (parse-html *test-string2*) *expected-result2*))
+ (setf *callback-called* 0)
+ (test t (lhtml-equal (parse-html *test-string*) *expected-result*))
+ (test 0 *callback-called*)
+ ;;(setf (element-callback :a) 'callback-test-func)
+ (setf *callback-called* 0)
+ (test t (lhtml-equal (parse-html *test-string*
+ :callbacks (acons :a 'callback-test-func nil))
+ *expected-result*))
+ (test 2 *callback-called*)
+ (setf *callback-called* 0)
+ (test t (lhtml-equal (parse-html *test-string*) *expected-result*))
+ (test 0 *callback-called*)
+ (setf *callback-called* 0)
+ ;; make sure function is OK arg
+ ;;(setf (element-callback :a) (symbol-function 'callback-test-func))
+ (test t (lhtml-equal
+ (parse-html *test-string*
+ :callbacks (acons :a (symbol-function 'callback-test-func) nil))
+ *expected-result*))
+ (test 2 *callback-called*)
+ ;; try with :callback-only t
+ (setf *callback-called* 0)
+ ;;(setf (element-callback :a) 'callback-test-func)
+ (parse-html *test-string* :callback-only t
+ :callbacks (acons :a 'callback-test-func nil)) ;; won't return parse output
+ (test 2 *callback-called*)
+ ;; try nested callback
+ (setf *callback-called* 0)
+ ;;(setf (element-callback :p) 'nested-callback)
+ (test t (lhtml-equal (parse-html *test-string*
+ :callbacks (acons :pp 'nested-callback nil))
+ *expected-result*))
+ (test 3 *callback-called*)
+ (setf *callback-called* 0)
+ (parse-html *test-string* :callback-only t
+ :callbacks (acons :pp 'nested-callback nil))
+ (test 3 *callback-called*)
+ (test-error (parse-html "b<a"))
+ (test t (lhtml-equal
+ (multiple-value-bind (res rogues)
+ (parse-html *test-string3* :collect-rogue-tags t)
+ (declare (ignorable res))
+ (parse-html *test-string3* :no-body-tags rogues))
+ *expected-result3*))
+ (format t "End test: ~s, ~d errors, ~d successes~%"
+ "parse-html" util.test:*test-errors* util.test:*test-successes*)
+ ))
--- /dev/null
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+
+;; $Id: phtml.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+;; phtml.cl - parse html
+
+;; Change Log
+;;
+;; 02/05/01 symbols mapped to preferred case at runtime (as opposed to
+;; a compile time macro determining the case mapping)
+;;
+;; 10/27/00 :callbacks arg now processed correctly for tags with no body
+;;
+;; 10/14/00 add first-pass member to tokenbuf structure; used to remove
+;; multiple un-next-char calls in raw mode
+;; removed :script from *in-line* (incorect and led to infinite loop)
+;; char format reopen not done in :script and :style
+;; fixed :table/:th tag-auto-close-stop typo
+
+
+; do character entity stuff
+;
+
+(defpackage net.html.parser
+ (:use :lisp :clos :excl)
+ (:export
+ #:phtml-internal
+ #:parse-html))
+
+(in-package :net.html.parser)
+
+(defmacro tag-auto-close (tag) `(get ,tag 'tag-auto-close))
+(defmacro tag-auto-close-stop (tag) `(get ,tag 'tag-auto-close-stop))
+(defmacro tag-no-end (tag) `(get ,tag 'tag-no-end))
+
+; only subelements allowed in this element, no strings
+(defmacro tag-no-pcdata (tag) `(get ,tag 'tag-no-pcdata))
+
+;; given :foo or (:foo ...) return :foo
+(defmacro tag-name (expr)
+ `(let ((.xx. ,expr))
+ (if* (consp .xx.)
+ then (car .xx.)
+ else .xx.)))
+
+
+
+
+
+(eval-when (compile load eval)
+ (defconstant state-pcdata 0) ; scanning for chars or a tag
+ (defconstant state-readtagfirst 1)
+ (defconstant state-readtag 2)
+ (defconstant state-findattribname 3)
+ (defconstant state-attribname 4)
+ (defconstant state-attribstartvalue 5)
+ (defconstant state-attribvaluedelim 6)
+ (defconstant state-attribvaluenodelim 7)
+ (defconstant state-readcomment 8)
+ (defconstant state-readcomment-one 9)
+ (defconstant state-readcomment-two 10)
+ (defconstant state-findvalue 11)
+ (defconstant state-rawdata 12)
+)
+
+
+(defstruct collector
+ next ; next index to set
+ max ; 1+max index to set
+ data ; string vector
+ )
+
+;; keep a cache of collectors on this list
+
+(defparameter *collectors* (list nil nil nil nil))
+
+(defun get-collector ()
+ (declare (optimize (speed 3) (safety 1)))
+ (let (col)
+ (mp::without-scheduling
+ (do* ((cols *collectors* (cdr cols))
+ (this (car cols) (car cols)))
+ ((null cols))
+ (if* this
+ then (setf (car cols) nil)
+ (setq col this)
+ (return))))
+ (if* col
+ then (setf (collector-next col) 0)
+ col
+ else (make-collector
+ :next 0
+ :max 100
+ :data (make-string 100)))))
+
+(defun put-back-collector (col)
+ (declare (optimize (speed 3) (safety 1)))
+ (mp::without-scheduling
+ (do ((cols *collectors* (cdr cols)))
+ ((null cols)
+ ; toss it away
+ nil)
+ (if* (null (car cols))
+ then (setf (car cols) col)
+ (return)))))
+
+
+
+(defun grow-and-add (coll ch)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; increase the size of the data portion of the collector and then
+ ;; add the given char at the end
+ (let* ((odata (collector-data coll))
+ (ndata (make-string (* 2 (length odata)))))
+ (dotimes (i (length odata))
+ (setf (schar ndata i) (schar odata i)))
+ (setf (collector-data coll) ndata)
+ (setf (collector-max coll) (length ndata))
+ (let ((next (collector-next coll)))
+ (setf (schar ndata next) ch)
+ (setf (collector-next coll) (1+ next)))))
+
+
+
+
+
+
+
+;; character characteristics
+(defconstant char-tagcharacter 1) ; valid char for a tag
+(defconstant char-attribnamechar 2) ; valid char for an attribute name
+(defconstant char-attribundelimattribvalue 4) ; valid for undelimited value
+(defconstant char-spacechar 8)
+
+(defparameter *characteristics*
+ ;; array of bits describing character characteristics
+ (let ((arr (make-array 128 :initial-element 0)))
+ (declare (optimize (speed 3) (safety 1)))
+ (macrolet ((with-range ((var from to) &rest body)
+ `(do ((,var (char-code ,from) (1+ ,var))
+ (mmax (char-code ,to)))
+ ((> ,var mmax))
+ ,@body))
+
+ (addit (index charistic)
+ `(setf (svref arr ,index)
+ (logior (svref arr ,index)
+ ,charistic)))
+ )
+
+ (with-range (i #\A #\Z)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ (with-range (i #\a #\z)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ (with-range (i #\0 #\9)
+ (addit i (+ char-tagcharacter
+ char-attribnamechar
+ char-attribundelimattribvalue)))
+
+ ;; let colon be legal tag character
+ (addit (char-code #\:) (+ char-attribnamechar
+ char-tagcharacter))
+
+ ;; NY times special tags have _
+ (addit (char-code #\_) (+ char-attribnamechar
+ char-tagcharacter))
+
+ ; now the unusual cases
+ (addit (char-code #\-) (+ char-attribnamechar
+ char-attribundelimattribvalue))
+ (addit (char-code #\.) (+ char-attribnamechar
+ char-attribundelimattribvalue))
+
+ ;; adding all typeable chars except for whitespace and >
+ (addit (char-code #\:) char-attribundelimattribvalue)
+ (addit (char-code #\@) char-attribundelimattribvalue)
+ (addit (char-code #\/) char-attribundelimattribvalue)
+ (addit (char-code #\!) char-attribundelimattribvalue)
+ (addit (char-code #\#) char-attribundelimattribvalue)
+ (addit (char-code #\$) char-attribundelimattribvalue)
+ (addit (char-code #\%) char-attribundelimattribvalue)
+ (addit (char-code #\^) char-attribundelimattribvalue)
+ (addit (char-code #\&) char-attribundelimattribvalue)
+ (addit (char-code #\() char-attribundelimattribvalue)
+ (addit (char-code #\)) char-attribundelimattribvalue)
+ (addit (char-code #\_) char-attribundelimattribvalue)
+ (addit (char-code #\=) char-attribundelimattribvalue)
+ (addit (char-code #\+) char-attribundelimattribvalue)
+ (addit (char-code #\\) char-attribundelimattribvalue)
+ (addit (char-code #\|) char-attribundelimattribvalue)
+ (addit (char-code #\{) char-attribundelimattribvalue)
+ (addit (char-code #\}) char-attribundelimattribvalue)
+ (addit (char-code #\[) char-attribundelimattribvalue)
+ (addit (char-code #\]) char-attribundelimattribvalue)
+ (addit (char-code #\;) char-attribundelimattribvalue)
+ (addit (char-code #\') char-attribundelimattribvalue)
+ (addit (char-code #\") char-attribundelimattribvalue)
+ (addit (char-code #\,) char-attribundelimattribvalue)
+ (addit (char-code #\<) char-attribundelimattribvalue)
+ (addit (char-code #\?) char-attribundelimattribvalue)
+
+ ; i'm not sure what can be in a tag name but we know that
+ ; ! and - must be there since it's used in comments
+
+ (addit (char-code #\-) char-tagcharacter)
+ (addit (char-code #\!) char-tagcharacter)
+
+ ; spaces
+ (addit (char-code #\space) char-spacechar)
+ (addit (char-code #\tab) char-spacechar)
+ (addit (char-code #\return) char-spacechar)
+ (addit (char-code #\linefeed) char-spacechar)
+
+ )
+
+
+
+ arr))
+
+
+(defun char-characteristic (char bit)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; return true if the given char has the given bit set in
+ ;; the characteristic array
+ (let ((code (char-code char)))
+ (if* (<= 0 code 127)
+ then ; in range
+ (not (zerop (logand (svref *characteristics* code) bit))))))
+
+
+(defstruct tokenbuf
+ cur ;; next index to use to grab from tokenbuf
+ max ;; index one beyond last character
+ data ;; character array
+ first-pass ;; previously parsed tokens
+ )
+
+;; cache of tokenbuf structs
+(defparameter *tokenbufs* (list nil nil nil nil))
+
+(defun get-tokenbuf ()
+ (declare (optimize (speed 3) (safety 1)))
+ (let (buf)
+ (mp::without-scheduling
+ (do* ((bufs *tokenbufs* (cdr bufs))
+ (this (car bufs) (car bufs)))
+ ((null bufs))
+ (if* this
+ then (setf (car bufs) nil)
+ (setq buf this)
+ (return))))
+ (if* buf
+ then (setf (tokenbuf-cur buf) 0)
+ (setf (tokenbuf-max buf) 0)
+ buf
+ else (make-tokenbuf
+ :cur 0
+ :max 0
+ :data (make-array 1024 :element-type 'character)))))
+
+(defun put-back-tokenbuf (buf)
+ (declare (optimize (speed 3) (safety 1)))
+ (mp::without-scheduling
+ (do ((bufs *tokenbufs* (cdr bufs)))
+ ((null bufs)
+ ; toss it away
+ nil)
+ (if* (null (car bufs))
+ then (setf (car bufs) buf)
+ (return)))))
+
+(defun to-preferred-case (ch)
+ (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
+ then (char-upcase ch)
+ else (char-downcase ch)))
+
+
+(defun next-token (stream ignore-strings raw-mode-delimiter
+ read-sequence-func tokenbuf)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; return two values:
+ ;; the next token from the stream.
+ ;; the kind of token (:pcdata, :start-tag, :end-tag, :eof)
+ ;;
+ ;; if read-sequence-func is non-nil,
+ ;; read-sequence-func is called to fetch the next character
+ (macrolet ((next-char (stream)
+ `(let ((cur (tokenbuf-cur tokenbuf))
+ (tb (tokenbuf-data tokenbuf)))
+ (if* (>= cur (tokenbuf-max tokenbuf))
+ then ; fill buffer
+ (if* (zerop (setf (tokenbuf-max tokenbuf)
+ (if* read-sequence-func
+ then (funcall read-sequence-func tb stream)
+ else (read-sequence tb stream))))
+ then (setq cur nil) ; eof
+ else (setq cur 0)))
+ (if* cur
+ then (prog1 (schar tb cur)
+ (setf (tokenbuf-cur tokenbuf) (1+ cur))))))
+
+
+ (un-next-char (stream ch)
+ `(decf (tokenbuf-cur tokenbuf)))
+
+ (clear-coll (coll)
+ `(setf (collector-next coll) 0))
+
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
+
+ )
+
+ (let ((state (if* raw-mode-delimiter then state-rawdata else state-pcdata))
+ (coll (get-collector))
+ (ch)
+
+ (value-delim)
+
+ (tag-to-return)
+ (attribs-to-return)
+
+ (end-tag)
+
+ (attrib-name)
+ (attrib-value)
+
+ (name-length 0) ;; count only when it could be a comment
+
+ (raw-length 0)
+ (xml-bailout)
+ )
+
+ (loop
+
+ (setq ch (next-char stream))
+ ;;(format t "ch: ~s state: ~s~%" ch state)
+
+ (if* (null ch)
+ then (return) ; eof -- exit loop
+ )
+
+
+ (case state
+ (#.state-pcdata
+ ; collect everything until we see a <
+ (if* (eq ch #\<)
+ then ; if we've collected nothing then get a tag
+ (if* (> (collector-next coll) 0)
+ then ; have collected something, return this string
+ (un-next-char stream ch) ; push back the <
+ (return)
+ else ; collect a tag
+ (setq state state-readtagfirst))
+ else ; we will check for & here eventually
+ (if* (not (eq ch #\return))
+ then (add-to-coll coll ch))))
+
+ (#.state-readtagfirst
+ ; starting to read a tag name
+ (if* (eq #\/ ch)
+ then ; end tag
+ (setq end-tag t)
+ else (if* (eq #\! ch) ; possible comment
+ then (setf xml-bailout t)
+ (setq name-length 0))
+ (un-next-char stream ch))
+ (setq state state-readtag))
+
+ (#.state-readtag
+ ;; reading the whole tag name
+ (if* (char-characteristic ch char-tagcharacter)
+ then (add-to-coll coll (to-preferred-case ch))
+ (incf name-length)
+ (if* (and (eq name-length 3)
+ (coll-has-comment coll))
+ then (clear-coll coll)
+ (setq state state-readcomment))
+
+ else (setq tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (if* (eq ch #\>)
+ then (return) ; we're done
+ elseif xml-bailout then
+ (un-next-char stream ch)
+ (return)
+ else (if* (eq tag-to-return :!--)
+ then ; a comment
+ (setq state state-readcomment)
+ else (un-next-char stream ch)
+ (setq state state-findattribname)))))
+
+ (#.state-findattribname
+ ;; search until we find the start of an attribute name
+ ;; or the end of the tag
+ (if* (eq ch #\>)
+ then ; end of the line
+ (return)
+ elseif (eq ch #\=)
+ then ; value for previous attribute name
+ ; (syntax "foo = bar" is bogus I think but it's
+ ; used some places, here is where we handle this
+ (pop attribs-to-return)
+ (setq attrib-name (pop attribs-to-return))
+ (setq state state-findvalue)
+ elseif (char-characteristic ch char-attribnamechar)
+ then (un-next-char stream ch)
+ (setq state state-attribname)
+ else nil ; ignore other things
+ ))
+
+ (#.state-findvalue
+ ;; find the start of the value
+ (if* (char-characteristic ch char-spacechar)
+ thenret ; keep looking
+ elseif (eq ch #\>)
+ then ; no value, set the value to be the
+ ; name as a string
+ (setq attrib-value
+ (string-downcase (string attrib-name)))
+
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (un-next-char stream ch)
+ (setq state state-findattribname)
+ else (un-next-char stream ch)
+ (setq state state-attribstartvalue)))
+
+
+ (#.state-attribname
+ ;; collect attribute name
+
+ (if* (char-characteristic ch char-attribnamechar)
+ then (add-to-coll coll (to-preferred-case ch))
+ elseif (eq #\= ch)
+ then ; end of attribute name, value is next
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribstartvalue)
+ else ; end of attribute name with no value,
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq attrib-value
+ (string-downcase (string attrib-name)))
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (un-next-char stream ch)
+ (setq state state-findattribname)))
+
+ (#.state-attribstartvalue
+ ;; begin to collect value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (setq state state-attribvaluedelim)
+ ;; gobble spaces; assume since we've seen a '=' there really is a value
+ elseif (eq #\space ch) then nil
+ else (un-next-char stream ch)
+ (setq state state-attribvaluenodelim)))
+
+ (#.state-attribvaluedelim
+ (if* (eq ch value-delim)
+ then (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattribname)
+ else (add-to-coll coll ch)))
+
+ (#.state-attribvaluenodelim
+ ;; an attribute value not delimited by ' or " and thus restricted
+ ;; in the possible characters
+ (if* (char-characteristic ch char-attribundelimattribvalue)
+ then (add-to-coll coll ch)
+ else (un-next-char stream ch)
+ (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattribname)))
+
+ (#.state-readcomment
+ ;; a comment ends on the first --, but we'll look for -->
+ ;; since that's what most people expect
+ (if* (eq ch #\-)
+ then (setq state state-readcomment-one)
+ else (add-to-coll coll ch)))
+
+ (#.state-readcomment-one
+ ;; seen one -, looking for ->
+
+ (if* (eq ch #\-)
+ then (setq state state-readcomment-two)
+ else ; not a comment end, put back the -'s
+ (add-to-coll coll #\-)
+ (add-to-coll coll ch)
+ (setq state state-readcomment)))
+
+ (#.state-readcomment-two
+ ;; seen two -'s, looking for >
+
+ (if* (eq ch #\>)
+ then ; end of the line
+ (return)
+ elseif (eq ch #\-)
+ then ; still at two -'s, have to put out first
+ (add-to-coll coll #\-)
+ else ; put out two hypens and back to looking for a hypen
+ (add-to-coll coll #\-)
+ (add-to-coll coll #\-)
+ (setq state state-readcomment)))
+
+ (#.state-rawdata
+ ;; collect everything until we see the delimiter
+ (if* (eq (to-preferred-case ch) (elt raw-mode-delimiter raw-length))
+ then
+ (incf raw-length)
+ (when (= raw-length (length raw-mode-delimiter))
+ ;; push the end tag back so it can then be lexed
+ ;; but don't do it for xml stuff
+ (when (/= (length raw-mode-delimiter) 1)
+ (push :end-tag (tokenbuf-first-pass tokenbuf))
+ (if* (equal raw-mode-delimiter "</STYLE>")
+ then (push :STYLE (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</style>")
+ then (push :style (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</SCRIPT>")
+ then (push :SCRIPT (tokenbuf-first-pass tokenbuf))
+ elseif (equal raw-mode-delimiter "</script>")
+ then (push :script (tokenbuf-first-pass tokenbuf))
+ else (error "unexpected raw-mode-delimiter"))
+ )
+ ;; set state to state-pcdata for next section
+ (return))
+ else
+ ;; push partial matches into data string
+ (dotimes (i raw-length)
+ (add-to-coll coll (elt raw-mode-delimiter i)))
+ (setf raw-length 0)
+ (add-to-coll coll ch)))
+
+ ))
+
+
+ ;; out of the loop.
+ ;; if we're in certain states then it means we should return a value
+ ;;
+ (case state
+ ((#.state-pcdata #.state-rawdata)
+ ;; return the buffer as a string
+ (if* (zerop (collector-next coll))
+ then (values nil (if (eq state state-pcdata) :eof :pcdata))
+ else (values (prog1
+ (if* (null ignore-strings)
+ then (compute-coll-string coll))
+ (put-back-collector coll))
+ :pcdata)))
+
+ (#.state-readtag
+ (when (null tag-to-return)
+ (error "unexpected end of input encountered"))
+ ;; we've read a tag with no attributes
+ (put-back-collector coll)
+ (values tag-to-return
+ (if* end-tag
+ then :end-tag
+ else (if* xml-bailout then :xml else :start-tag))
+ ))
+
+ (#.state-findattribname
+ ;; returning a tag with possible attributes
+ (put-back-collector coll)
+ (if* end-tag
+ then ; ignore any attributes
+ (values tag-to-return :end-tag)
+ elseif attribs-to-return
+ then (values (cons tag-to-return
+ (nreverse attribs-to-return))
+ :start-tag)
+ else (values tag-to-return :start-tag)))
+
+ (#.state-readcomment-two
+ ;; returning a comment
+ (values (prog1 (if* (null ignore-strings)
+ then (compute-coll-string coll))
+ (put-back-collector coll))
+ :comment))
+
+ (t
+ (if* (null ch) then (error "unexpected end of input encountered")
+ else (error "internal error, can't be here in state ~d" state)))))))
+
+
+(defvar *kwd-package* (find-package :keyword))
+
+(defun compute-tag (coll)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; compute the symbol named by what's in the collector
+ (excl::intern* (collector-data coll) (collector-next coll) *kwd-package*))
+
+
+
+(defun compute-coll-string (coll)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; return the string that's in the collection
+ (let ((str (make-string (collector-next coll)))
+ (from (collector-data coll)))
+ (dotimes (i (collector-next coll))
+ (setf (schar str i) (schar from i)))
+
+ str))
+
+(defun coll-has-comment (coll)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; true if the collector has exactly "!--" in it
+ (and (eq 3 (collector-next coll))
+ (let ((data (collector-data coll)))
+ (and (eq #\! (schar data 0))
+ (eq #\- (schar data 1))
+ (eq #\- (schar data 2))))))
+
+
+;;;;;;;;;;; quick and dirty parse
+
+; the elements with no body and thus no end tag
+(dolist (opt '(:area :base :basefont :bgsound :br :button :col
+ ;;:colgroup - no, this is an element with contents
+ :embed :hr :img :frame
+ :input :isindex :keygen :link :meta
+ :plaintext :spacer :wbr))
+ (setf (tag-no-end opt) t))
+
+(defvar *in-line* '(:tt :i :b :big :small :em :strong :dfn :code :samp :kbd
+ :var :cite :abbr :acronym :a :img :object :br :map
+ :q :sub :sup :span :bdo :input :select :textarea :label :button :font))
+
+(defvar *ch-format* '(:i :b :tt :big :small :strike :s :u
+ :em :strong :font))
+
+(defvar *known-tags* '(:!doctype :a :acronym :address :applet :area :b :base :basefont
+ :bdo :bgsound :big :blink :blockquote :body :br :button :caption
+ :center :cite :code :col :colgroup :comment :dd :del :dfn :dir
+ :div :dl :dt :em :embed :fieldset :font :form :frame :frameset
+ :h1 :h2 :h3 :h4 :h5 :h6 :head :hr :html :i :iframe :img :input
+ :ins :isindex :kbd :label :layer :legend :li :link :listing :map
+ :marquee :menu :meta :multicol :nobr :noframes :noscript :object
+ :ol :option :p :param :plaintext :pre :q :samp :script :select
+ :small :spacer :span :s :strike :strong :style :sub :sup :table
+ :tbody :td :textarea :tfoot :th :thead :title :tr :tt :u :ul :var
+ :wbr :xmp))
+
+; the elements whose start tag can end a previous tag
+
+(setf (tag-auto-close :tr) '(:tr :td :th :colgroup))
+(setf (tag-auto-close-stop :tr) '(:table))
+
+(setf (tag-auto-close :td) '(:td :th))
+(setf (tag-auto-close-stop :td) '(:table))
+
+(setf (tag-auto-close :th) '(:td :th))
+(setf (tag-auto-close-stop :th) '(:table))
+
+(setf (tag-auto-close :dt) '(:dt :dd))
+(setf (tag-auto-close-stop :dt) '(:dl))
+
+(setf (tag-auto-close :li) '(:li))
+(setf (tag-auto-close-stop :li) '(:ul :ol))
+
+;; new stuff to close off tags with optional close tags
+(setf (tag-auto-close :address) '(:head :p))
+(setf (tag-auto-close :blockquote) '(:head :p))
+(setf (tag-auto-close :body) '(:body :frameset :head))
+
+(setf (tag-auto-close :dd) '(:dd :dt))
+(setf (tag-auto-close-stop :dd) '(:dl))
+
+(setf (tag-auto-close :dl) '(:head :p))
+(setf (tag-auto-close :div) '(:head :p))
+(setf (tag-auto-close :fieldset) '(:head :p))
+(setf (tag-auto-close :form) '(:head :p))
+(setf (tag-auto-close :frameset) '(:body :frameset :head))
+(setf (tag-auto-close :hr) '(:head :p))
+(setf (tag-auto-close :h1) '(:head :p))
+(setf (tag-auto-close :h2) '(:head :p))
+(setf (tag-auto-close :h3) '(:head :p))
+(setf (tag-auto-close :h4) '(:head :p))
+(setf (tag-auto-close :h5) '(:head :p))
+(setf (tag-auto-close :h6) '(:head :p))
+(setf (tag-auto-close :noscript) '(:head :p))
+(setf (tag-auto-close :ol) '(:head :p))
+
+(setf (tag-auto-close :option) '(:option))
+(setf (tag-auto-close-stop :option) '(:select))
+
+(setf (tag-auto-close :p) '(:head :p))
+
+(setf (tag-auto-close :pre) '(:head :p))
+(setf (tag-auto-close :table) '(:head :p))
+
+(setf (tag-auto-close :tbody) '(:colgroup :tfoot :tbody :thead))
+(setf (tag-auto-close-stop :tbody) '(:table))
+
+(setf (tag-auto-close :tfoot) '(:colgroup :tfoot :tbody :thead))
+(setf (tag-auto-close-stop :tfoot) '(:table))
+
+(setf (tag-auto-close :thead) '(:colgroup :tfoot :tbody :thead))
+(setf (tag-auto-close-stop :thead) '(:table))
+
+(setf (tag-auto-close :ul) '(:head :p))
+
+(setf (tag-no-pcdata :table) t)
+(setf (tag-no-pcdata :tr) t)
+
+
+(defmethod parse-html ((p stream) &key callback-only callbacks collect-rogue-tags
+ no-body-tags)
+ (declare (optimize (speed 3) (safety 1)))
+ (phtml-internal p nil callback-only callbacks collect-rogue-tags
+ no-body-tags))
+
+(defmacro tag-callback (tag)
+ `(rest (assoc ,tag callbacks)))
+
+(defun phtml-internal (p read-sequence-func callback-only callbacks collect-rogue-tags
+
+ no-body-tags)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((raw-mode-delimiter nil)
+ (pending nil)
+ (current-tag :start-parse)
+ (last-tag :start-parse)
+ (current-callback-tags nil)
+ (pending-ch-format nil)
+ (closed-pending-ch-format nil)
+ (new-opens nil)
+ (tokenbuf (get-tokenbuf))
+ (guts)
+ (rogue-tags)
+ )
+ (labels ((close-off-tags (name stop-at collect-rogues)
+ ;; close off an open 'name' tag, but search no further
+ ;; than a 'stop-at' tag.
+ (if* (member (tag-name current-tag) name :test #'eq)
+ then ;; close current tag(s)
+ (loop
+ (when (and collect-rogues
+ (not (member (tag-name current-tag)
+ *known-tags*)))
+ (push (tag-name current-tag) rogue-tags))
+ (close-current-tag)
+ (when (or (member (tag-name current-tag)
+ *ch-format*)
+ (not (member
+ (tag-name current-tag) name :test #'eq)))
+ (return)))
+ elseif (member (tag-name current-tag) stop-at :test #'eq)
+ then nil
+ else ; search if there is a tag to close
+ (dolist (ent pending)
+ (if* (member (tag-name (car ent)) name :test #'eq)
+ then ; found one to close
+ (loop
+ (when (and collect-rogues
+ (not (member (tag-name current-tag)
+ *known-tags*)))
+ (push (tag-name current-tag) rogue-tags))
+ (close-current-tag)
+ (if* (member (tag-name current-tag) name
+ :test #'eq)
+ then (close-current-tag)
+ (return)))
+ (return)
+ elseif (member (tag-name (car ent)) stop-at
+ :test #'eq)
+ then (return) ;; do nothing
+ ))))
+
+ (close-current-tag ()
+ ;; close off the current tag and open the pending tag
+ (when (member (tag-name current-tag) *ch-format* :test #'eq)
+ (push current-tag closed-pending-ch-format)
+ )
+ (let (element)
+ (if* (tag-no-pcdata (tag-name current-tag))
+ then (setq element `(,current-tag
+ ,@(strip-rev-pcdata guts)))
+ else (setq element `(,current-tag ,@(nreverse guts))))
+ (let ((callback (tag-callback (tag-name current-tag))))
+ (when callback
+ (setf current-callback-tags (rest current-callback-tags))
+ (funcall callback element)))
+ (let* ((prev (pop pending)))
+ (setq current-tag (car prev)
+ guts (cdr prev))
+ (push element guts))))
+
+ (save-state ()
+ ;; push the current tag state since we're starting
+ ;; a new open tag
+ (push (cons current-tag guts) pending))
+
+
+ (strip-rev-pcdata (stuff)
+ ;; reverse the list stuff, omitting all the strings
+ (let (res)
+ (dolist (st stuff)
+ (if* (not (stringp st)) then (push st res)))
+ res))
+ (check-in-line (check-tag)
+ (setf new-opens nil)
+ (let (val kind (i 0)
+ (length (length (tokenbuf-first-pass tokenbuf))))
+ (loop
+ (if* (< i length) then
+ (setf val (nth i (tokenbuf-first-pass tokenbuf)))
+ (setf kind (nth (+ i 1) (tokenbuf-first-pass tokenbuf)))
+ (setf i (+ i 2))
+ (if* (= i length) then (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ else
+ (multiple-value-setq (val kind)
+ (get-next-token t))
+ (push val (tokenbuf-first-pass tokenbuf))
+ (push kind (tokenbuf-first-pass tokenbuf))
+ )
+ (when (eq kind :eof)
+ (if* (= i length) then
+ (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ (return))
+ (when (and (eq val check-tag) (eq kind :end-tag))
+ (if* (= i length) then
+ (setf (tokenbuf-first-pass tokenbuf)
+ (nreverse (tokenbuf-first-pass tokenbuf))))
+ (return))
+ (when (member val *ch-format* :test #'eq)
+ (if* (eq kind :start-tag) then (push val new-opens)
+ elseif (member val new-opens :test #'eq) then
+ (setf new-opens (remove val new-opens :count 1))
+ else (close-off-tags (list val) nil nil)
+ )))))
+
+ (get-next-token (force)
+ (if* (or force (null (tokenbuf-first-pass tokenbuf))) then
+ (multiple-value-bind (val kind)
+ (next-token p nil raw-mode-delimiter read-sequence-func
+ tokenbuf)
+ (values val kind))
+ else
+ (let ((val (first (tokenbuf-first-pass tokenbuf)))
+ (kind (second (tokenbuf-first-pass tokenbuf))))
+ (setf (tokenbuf-first-pass tokenbuf)
+ (rest (rest (tokenbuf-first-pass tokenbuf))))
+ (values val kind))))
+ )
+ (loop
+ (multiple-value-bind (val kind)
+ (get-next-token nil)
+ ;;(format t "val: ~s kind: ~s~%" val kind)
+ (case kind
+ (:pcdata
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (if* (member last-tag *in-line*)
+ then
+ (push val guts)
+ else
+ (when (dotimes (i (length val) nil)
+ (when (not (char-characteristic (elt val i)
+ char-spacechar))
+ (return t)))
+ (push val guts))))
+ (when (and (= (length raw-mode-delimiter) 1) ;; xml tag...
+ (or (and callback-only current-callback-tags)
+ (not callback-only)))
+ (close-off-tags (list last-tag) nil nil))
+ (setf raw-mode-delimiter nil)
+ )
+
+ (:xml
+ (setf last-tag val)
+ (setf raw-mode-delimiter ">")
+ (let* ((name (tag-name val)))
+ (when (and callback-only (tag-callback name))
+ (push name current-callback-tags))
+ (save-state)
+ (setq current-tag val)
+ (setq guts nil)
+ ))
+
+ (:start-tag
+ (setf last-tag val)
+ (if* (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style)))
+ then
+ (setf raw-mode-delimiter
+ (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
+ then "</STYLE>"
+ else "</style>"))
+ elseif (or (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script)))
+ then
+ (setf raw-mode-delimiter
+ (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER)
+ then "</SCRIPT>"
+ else "</script>")))
+ ; maybe this is an end tag too
+ (let* ((name (tag-name val))
+ (auto-close (tag-auto-close name))
+ (auto-close-stop nil)
+ (no-end (or (tag-no-end name) (member name no-body-tags))))
+ (when (and callback-only (tag-callback name))
+ (push name current-callback-tags))
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (if* auto-close
+ then (setq auto-close-stop (tag-auto-close-stop name))
+ (close-off-tags auto-close auto-close-stop nil))
+ (when (and pending-ch-format (not no-end))
+ (if* (member name *ch-format* :test #'eq) then nil
+ elseif (member name *in-line* :test #'eq) then
+ ;; close off only tags that are within *in-line* block
+ (check-in-line name)
+ else ;; close ALL pending char tags and then reopen
+ (dolist (this-tag (reverse pending-ch-format))
+ (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil))
+ ))
+ (if* no-end
+ then ; this is a singleton tag
+ (let ((callback (tag-callback (tag-name (if* (atom val)
+ then val
+ else (first val))))))
+ (when callback
+ (funcall callback (if* (atom val)
+ then val
+ else (list val)))))
+ (push (if* (atom val)
+ then val
+ else (list val))
+ guts)
+ else (save-state)
+ (setq current-tag val)
+ (setq guts nil))
+ (if* (member name *ch-format* :test #'eq)
+ then (push val pending-ch-format)
+ else (when (not
+ (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style))
+ (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script))))
+ (dolist (tmp (reverse closed-pending-ch-format))
+ (save-state)
+ (setf current-tag tmp)
+ (setf guts nil)))
+ )
+ (when (not
+ (or (eq last-tag :style)
+ (and (listp last-tag) (eq (first last-tag) :style))
+ (eq last-tag :script)
+ (and (listp last-tag) (eq (first last-tag) :script))))
+ (setf closed-pending-ch-format nil))
+ )))
+
+ (:end-tag
+ (setf raw-mode-delimiter nil)
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (close-off-tags (list val) nil nil)
+ (when (member val *ch-format* :test #'eq)
+ (setf pending-ch-format
+ (remove val pending-ch-format :count 1
+ :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
+ (setf closed-pending-ch-format
+ (remove val closed-pending-ch-format :count 1
+ :test #'(lambda (x y) (eq x (if (listp y) (first y) y)))))
+ )
+ (dolist (tmp (reverse closed-pending-ch-format))
+ (save-state)
+ (setf current-tag tmp)
+ (setf guts nil))
+ (setf closed-pending-ch-format nil)
+ ))
+
+ (:comment
+ (setf raw-mode-delimiter nil)
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (push `(:comment ,val) guts)))
+
+ (:eof
+ (setf raw-mode-delimiter nil)
+ ;; close off all tags
+ (when (or (and callback-only current-callback-tags)
+ (not callback-only))
+ (close-off-tags '(:start-parse) nil collect-rogue-tags))
+ (put-back-tokenbuf tokenbuf)
+ (if collect-rogue-tags
+ (return (values (cdar guts) rogue-tags))
+ (return (cdar guts))))))))))
+
+
+
+(defmethod parse-html (file &key callback-only callbacks collect-rogue-tags
+ no-body-tags)
+ (declare (optimize (speed 3) (safety 1)))
+ (with-open-file (p file :direction :input)
+ (parse-html p :callback-only callback-only :callbacks callbacks
+ :collect-rogue-tags collect-rogue-tags
+ :no-body-tags no-body-tags)))
+
+
+(defmethod parse-html ((str string) &key callback-only callbacks collect-rogue-tags
+ no-body-tags)
+ (declare (optimize (speed 3) (safety 1)))
+ (parse-html (make-string-input-stream str)
+ :callback-only callback-only :callbacks callbacks
+ :collect-rogue-tags collect-rogue-tags
+ :no-body-tags no-body-tags))
+
+
+
+
+
+
+
+
+
+;;;;;;;;;;;; test
+
+;;;(defun doit (ignore-data)
+;;; (with-open-file (p "readme.htm")
+;;; (loop
+;;; (multiple-value-bind (val kind) (next-token p ignore-data)
+;;; ;(format t "~s -> ~s~%" kind val)
+;;;
+;;; (if* (eq kind :eof) then (return))))))
+;;;
+;;;(defun pdoit (&optional (file "testa.html"))
+;;; (with-open-file (p file)
+;;; (parse-html p)))
+;;;
+;;;
+;;;;; requires http client module to work
+;;;(defun getparse (host path)
+;;; (parse-html (httpr-body
+;;; (parse-response
+;;; (simple-get host path)))))
+
+(provide :phtml)
--- /dev/null
+<html>
+
+<head>
+<title>A Lisp Based HTML Parser</title>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+</head>
+
+<body>
+
+<p><big><strong><big>A Lisp Based HTML Parser</big></strong></big></p>
+
+<p><a href="#intro">Introduction/Simple Example</a><br>
+<a href="#lhtml">LHTML parse output format</a><br>
+<a href="#case">Case mode notes</a><br>
+<a href="#comment">Parsing HTML comments</a><br>
+<a href="#script">Parsing <SCRIPT> and <STYLE> tags</a><br>
+<a href="#sgml">Parsing SGML <! tags</a><br>
+<a href="#illegal">Parsing Illegal and Deprecated Tags</a><br>
+<a href="#default">Default Attribute Values</a><br>
+<a href="#char">Parsing Interleaved Character Formatting Tags</a><br>
+<a href="#reference">parse-html reference</a><br>
+ <a href="#methods">methods</a><br>
+ <a href="#internal">phtml-internal</a></p>
+
+<p><a name="intro"></a>The <strong>parse-html</strong> generic function processes HTML
+input, returning a list of HTML tags, attributes, and text. Here is a simple example:<br>
+<br>
+(parse-html "<HTML><br>
+
+<HEAD><br>
+
+<TITLE>Example HTML input</TITLE><br>
+
+<BODY><br>
+
+<P>Here is some text with a <B>bold</B> word<br>and a <A
+HREF=\"help.html\">link</P><br>
+
+</HTML>")</p>
+
+<p>generates:<br>
+<br>
+((:html (:head (:title "Example HTML input"))<br>
+ (:body (:p "Here is some text with a " (:b "bold") "
+word" :br "and a " <br>
+
+((:a :href "help.html") "link")))))<br>
+</p>
+
+<p>The output format is known as LHTML format; it is the same format that the<br>
+aserve htmlgen macro accepts. <br>
+<br>
+<a name="lhtml"></a><strong><big>LHTML format</big></strong><br>
+<br>
+LHTML is a list representation of HTML tags and content.<br>
+<br>
+Each list member may be:
+
+<ol>
+ <li>a string containing text content, such as "Here is some text with a "<br>
+ </li>
+ <li>a keyword package symbol representing a HTML tag with no associated attributes <br>
+ or content, such as :br.<br>
+ </li>
+ <li>a list representing an HTML tag with associated attributes and/or content,<br>
+ such as (:b "bold") or ((:a :href "help.html") "link"). If
+ the HTML tag<br>
+ does not have associated attributes, then the first list member will be a<br>
+ keyword package symbol representing the HTML tag, and the other elements will <br>
+ represent the content, which can be a string (text content), a keyword package symbol
+ (HTML<br>
+ tag with no attributes or content), or list (nested HTML tag with<br>
+ associated attributes and/or content). If there are associated attributes,<br>
+ then the first list member will be a list containing a keyword package symbol<br>
+ followed by two list members for each associated attribute; the first member is a keyword<br>
+ package symbol representing the attribute, and the next member is a string corresponding<br>
+ to the attribute value.<br>
+ </li>
+</ol>
+
+<p><a name="case"></a><strong>Case Mode and LHTML</strong></p>
+
+<p>If excl:*current-case-mode* is :CASE-INSENSITIVE-UPPER, keyword package symbols will be<br>
+in upper case; otherwise, they will be in lower case.</p>
+
+<p><a name="comment"></a><strong>HTML Comments</strong></p>
+
+<p>HTML comments are represented use a :comment symbol. For example,<br>
+<br>
+(parse-html "<!-- this is a comment-->")<br>
+<br>
+--> ((:comment " this is a comment"))</p>
+
+<p><a name="script"></a><strong>HTML <SCRIPT> and <STYLE> tags</strong></p>
+
+<p>All <SCRIPT> and <STYLE> content is not parsed; it is returned as text
+content.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<SCRIPT>this <B>will not</B> be
+parsed</SCRIPT>")<br>
+<br>
+--> ((:script "this <B>will not</B> be parsed"))</p>
+
+<p><a name="sgml"></a><strong>XML and SGML <! tags</strong></p>
+
+<p>Since, some HTML pages contain special XML/SGML tags, non-comment tags<br>
+starting with '<!' are treated specially:<br>
+<br>
+(parse-html "<!doctype this is some text>")<br>
+<br>
+--> ((:!doctype " this is some text"))</p>
+
+<p><a name="illegal"></a><strong>Illegal and Deprecated HTML</strong></p>
+
+<p>There is plenty of illegal and deprecated HTML on the web that popular browsers<br>
+nonetheless successfully display. The parse-html parser is generous - it will not<br>
+raise an error condition upon encountering most input. In particular, it does not<br>
+maintain a list of legal HTML tags and will successfully parse nonsense input.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<this> <is> <some> <nonsense>
+<input>")<br>
+<br>
+--> ((:this (:is (:some (:nonsense :input)))))<br>
+<br>
+In some situations, you may prefer a two-pass parse that results in a parse where<br>
+deep nesting related to unrecognized tags is minimized:<br>
+<br>
+(let ((string "<this> <is> <some> <nonsense> </some>
+<input>"))<br>
+ (multiple-value-bind (res rogues)<br>
+ (parse-html string
+:collect-rogue-tags t)<br>
+ (declare (ignorable
+res))<br>
+ (parse-html string
+:no-body-tags rogues)))<br>
+<br>
+--> (:this :is (:some (:nonsense)) :input)<br>
+<br>
+See the <strong>:collect-rogue-tags</strong> and <strong>:no-body-tags</strong> argument
+descriptions in the reference<br>
+section below for more information.</p>
+
+<p><a name="default"></a><strong>Default Attribute values</strong></p>
+
+<p>As per the HTML 4.0 specification, attributes without specified values are given a
+lower case<br>
+string value that matches the attribute name.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<P here ARE some attributes>")<br>
+<br>
+--> (((:p :here "here" :are "are" :some "some"
+:attributes "attributes")))</p>
+
+<p><a name="char"></a><strong>Interleaved Character Formatting Tags</strong></p>
+
+<p>Existing HTML pages often have character format tags that are interleaved among<br>
+other tags. Such interleaving is removed in a manner consistent with the HTML 4.0<br>
+specification.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<P>Here is <B>bold text<P>that spans</B>two
+paragraphs")<br>
+<br>
+--> ((:p "Here is " (:b "bold text")) (:p (:b "that
+spans") "two paragraphs"))</p>
+
+<hr>
+
+<p><a name="reference"></a><strong><big>parse-html Reference</big></strong><br>
+<br>
+parse-html [Generic function]<br>
+<br>
+Arguments: input-source &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+Returns LHTML output, as described above.<br>
+<br>
+The callbacks argument, if non-nil, should be an association list. Each list member's<br>
+car (first) element specifies a keyword package symbol, and each list member's cdr (rest)<br>
+element specifies a function object or a symbol naming a function. The function should<br>
+expect one argument. The function will be invoked once for each time the HTML tag<br>
+corresponding to the specified keyword package symbol is encountered in the HTML input;
+the<br>
+argument will be an LHTML list containing the tag, along with associated attributes and<br>
+content. The default callbacks argument value is nil.<br>
+<br>
+The callback-only argument, if non-nil, directs parse-html to not generate a complete
+LHTML<br>
+output. Instead, LHTML lists will only be generated when necessary as arguments for
+functions<br>
+specified in the callbacks association list. This results in faster parser execution. The
+default<br>
+callback-only argument value is nil.<br>
+<br>
+The collect-rogue-tags argument, if non-nil, directs parse-html to return an additional
+value, <br>
+a list containing any unrecognized tags closed by the end of input.<br>
+<br>
+The no-body-tags argument, if non-nil, should be a list containing unknown tags that, if<br>
+encountered, will be treated as a tag with no body or content, and thus, no associated end<br>
+tag. Typically, the argument is a list or modified list resulting from an earlier
+parse-html<br>
+execution with the :collect-rogue-tags argument specified as non-nil.<br>
+<br>
+<a name="methods"></a><strong>parse-html Methods</strong><br>
+<br>
+parse-html (p stream) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+parse-html (str string) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+parse-html (file t) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+The t method assumes the argument is a pathname suitable<br>
+for use with the with-open-file macro.<br>
+<br>
+<br>
+<a name="internal"></a><strong>phtml-internal [Function]</strong><br>
+<br>
+Arguments: stream read-sequence-func callback-only callbacks<br>
+collect-rogue-tags no-body-tags<br>
+<br>
+This function may be used when more control is needed for supplying<br>
+the HTML input. The read-sequence-func argument, if non-nil, should be a function<br>
+object or a symbol naming a function. When phtml-internal requires another buffer<br>
+of HTML input, it will invoke the read-sequence-func function with two arguments -<br>
+the first argument is an internal buffer character array and the second argument is<br>
+the phtml-internal stream argument. If read-sequence-fun is nil, phtml-internal<br>
+will invoke read-sequence to fill the buffer. The read-sequence-func function must<br>
+return the number of character array elements successfully stored in the buffer.<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+</p>
+</body>
+</html>
--- /dev/null
+<html>
+
+<head>
+<title>A Lisp Based HTML Parser</title>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+</head>
+
+<body>
+
+<p><big><strong><big>A Lisp Based HTML Parser</big></strong></big></p>
+
+<p><a href="#intro">Introduction/Simple Example</a><br>
+<a href="#lhtml">LHTML parse output format</a><br>
+<a href="#case">Case mode notes</a><br>
+<a href="#comment">Parsing HTML comments</a><br>
+<a href="#script">Parsing <SCRIPT> and <STYLE> tags</a><br>
+<a href="#sgml">Parsing SGML <! tags</a><br>
+<a href="#illegal">Parsing Illegal and Deprecated Tags</a><br>
+<a href="#default">Default Attribute Values</a><br>
+<a href="#char">Parsing Interleaved Character Formatting Tags</a><br>
+<a href="#reference">parse-html reference</a><br>
+ <a href="#methods">methods</a><br>
+ <a href="#internal">phtml-internal</a></p>
+
+<p><a name="intro"></a>The <strong>parse-html</strong> generic function processes HTML
+input, returning a list of HTML tags, attributes, and text. Here is a simple example:<br>
+<br>
+(parse-html "<HTML><br>
+
+<HEAD><br>
+
+<TITLE>Example HTML input</TITLE><br>
+
+<BODY><br>
+
+<P>Here is some text with a <B>bold</B> word<br>and a <A
+HREF=\"help.html\">link</P><br>
+
+</HTML>")</p>
+
+<p>generates:<br>
+<br>
+((:html (:head (:title "Example HTML input"))<br>
+ (:body (:p "Here is some text with a " (:b "bold") "
+word" :br "and a " <br>
+
+((:a :href "help.html") "link")))))<br>
+</p>
+
+<p>The output format is known as LHTML format; it is the same format that the<br>
+aserve htmlgen macro accepts. <br>
+<br>
+<a name="lhtml"></a><strong><big>LHTML format</big></strong><br>
+<br>
+LHTML is a list representation of HTML tags and content.<br>
+<br>
+Each list member may be:
+
+<ol>
+ <li>a string containing text content, such as "Here is some text with a "<br>
+ </li>
+ <li>a keyword package symbol representing a HTML tag with no associated attributes <br>
+ or content, such as :br.<br>
+ </li>
+ <li>a list representing an HTML tag with associated attributes and/or content,<br>
+ such as (:b "bold") or ((:a :href "help.html") "link"). If
+ the HTML tag<br>
+ does not have associated attributes, then the first list member will be a<br>
+ keyword package symbol representing the HTML tag, and the other elements will <br>
+ represent the content, which can be a string (text content), a keyword package symbol
+ (HTML<br>
+ tag with no attributes or content), or list (nested HTML tag with<br>
+ associated attributes and/or content). If there are associated attributes,<br>
+ then the first list member will be a list containing a keyword package symbol<br>
+ followed by two list members for each associated attribute; the first member is a keyword<br>
+ package symbol representing the attribute, and the next member is a string corresponding<br>
+ to the attribute value.<br>
+ </li>
+</ol>
+
+<p><a name="case"></a><strong>Case Mode and LHTML</strong></p>
+
+<p>If excl:*current-case-mode* is :CASE-INSENSITIVE-UPPER, keyword package symbols will be<br>
+in upper case; otherwise, they will be in lower case.</p>
+
+<p><a name="comment"></a><strong>HTML Comments</strong></p>
+
+<p>HTML comments are represented use a :comment symbol. For example,<br>
+<br>
+(parse-html "<!-- this is a comment-->")<br>
+<br>
+--> ((:comment " this is a comment"))</p>
+
+<p><a name="script"></a><strong>HTML <SCRIPT> and <STYLE> tags</strong></p>
+
+<p>All <SCRIPT> and <STYLE> content is not parsed; it is returned as text
+content.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<SCRIPT>this <B>will not</B> be
+parsed</SCRIPT>")<br>
+<br>
+--> ((:script "this <B>will not</B> be parsed"))</p>
+
+<p><a name="sgml"></a><strong>XML and SGML <! tags</strong></p>
+
+<p>Since, some HTML pages contain special XML/SGML tags, non-comment tags<br>
+starting with '<!' are treated specially:<br>
+<br>
+(parse-html "<!doctype this is some text>")<br>
+<br>
+--> ((:!doctype " this is some text"))</p>
+
+<p><a name="illegal"></a><strong>Illegal and Deprecated HTML</strong></p>
+
+<p>There is plenty of illegal and deprecated HTML on the web that popular browsers<br>
+nonetheless successfully display. The parse-html parser is generous - it will not<br>
+raise an error condition upon encountering most input. In particular, it does not<br>
+maintain a list of legal HTML tags and will successfully parse nonsense input.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<this> <is> <some> <nonsense>
+<input>")<br>
+<br>
+--> ((:this (:is (:some (:nonsense :input)))))<br>
+<br>
+In some situations, you may prefer a two-pass parse that results in a parse where<br>
+deep nesting related to unrecognized tags is minimized:<br>
+<br>
+(let ((string "<this> <is> <some> <nonsense> </some>
+<input>"))<br>
+ (multiple-value-bind (res rogues)<br>
+ (parse-html string
+:collect-rogue-tags t)<br>
+ (declare (ignorable
+res))<br>
+ (parse-html string
+:no-body-tags rogues)))<br>
+<br>
+--> (:this :is (:some (:nonsense)) :input)<br>
+<br>
+See the <strong>:collect-rogue-tags</strong> and <strong>:no-body-tags</strong> argument
+descriptions in the reference<br>
+section below for more information.</p>
+
+<p><a name="default"></a><strong>Default Attribute values</strong></p>
+
+<p>As per the HTML 4.0 specification, attributes without specified values are given a
+lower case<br>
+string value that matches the attribute name.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<P here ARE some attributes>")<br>
+<br>
+--> (((:p :here "here" :are "are" :some "some"
+:attributes "attributes")))</p>
+
+<p><a name="char"></a><strong>Interleaved Character Formatting Tags</strong></p>
+
+<p>Existing HTML pages often have character format tags that are interleaved among<br>
+other tags. Such interleaving is removed in a manner consistent with the HTML 4.0<br>
+specification.<br>
+<br>
+For example,<br>
+<br>
+(parse-html "<P>Here is <B>bold text<P>that spans</B>two
+paragraphs")<br>
+<br>
+--> ((:p "Here is " (:b "bold text")) (:p (:b "that
+spans") "two paragraphs"))</p>
+
+<hr>
+
+<p><a name="reference"></a><strong><big>parse-html Reference</big></strong><br>
+<br>
+parse-html [Generic function]<br>
+<br>
+Arguments: input-source &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+Returns LHTML output, as described above.<br>
+<br>
+The callbacks argument, if non-nil, should be an association list. Each list member's<br>
+car (first) element specifies a keyword package symbol, and each list member's cdr (rest)<br>
+element specifies a function object or a symbol naming a function. The function should<br>
+expect one argument. The function will be invoked once for each time the HTML tag<br>
+corresponding to the specified keyword package symbol is encountered in the HTML input;
+the<br>
+argument will be an LHTML list containing the tag, along with associated attributes and<br>
+content. The default callbacks argument value is nil.<br>
+<br>
+The callback-only argument, if non-nil, directs parse-html to not generate a complete
+LHTML<br>
+output. Instead, LHTML lists will only be generated when necessary as arguments for
+functions<br>
+specified in the callbacks association list. This results in faster parser execution. The
+default<br>
+callback-only argument value is nil.<br>
+<br>
+The collect-rogue-tags argument, if non-nil, directs parse-html to return an additional
+value, <br>
+a list containing any unrecognized tags closed by the end of input.<br>
+<br>
+The no-body-tags argument, if non-nil, should be a list containing unknown tags that, if<br>
+encountered, will be treated as a tag with no body or content, and thus, no associated end<br>
+tag. Typically, the argument is a list or modified list resulting from an earlier
+parse-html<br>
+execution with the :collect-rogue-tags argument specified as non-nil.<br>
+<br>
+<a name="methods"></a><strong>parse-html Methods</strong><br>
+<br>
+parse-html (p stream) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+parse-html (str string) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+parse-html (file t) &key callbacks callback-only<br>
+ collect-rogue-tags
+no-body-tags<br>
+<br>
+The t method assumes the argument is a pathname suitable<br>
+for use with the with-open-file macro.<br>
+<br>
+<br>
+<a name="internal"></a><strong>phtml-internal [Function]</strong><br>
+<br>
+Arguments: stream read-sequence-func callback-only callbacks<br>
+collect-rogue-tags no-body-tags<br>
+<br>
+This function may be used when more control is needed for supplying<br>
+the HTML input. The read-sequence-func argument, if non-nil, should be a function<br>
+object or a symbol naming a function. When phtml-internal requires another buffer<br>
+of HTML input, it will invoke the read-sequence-func function with two arguments -<br>
+the first argument is an internal buffer character array and the second argument is<br>
+the phtml-internal stream argument. If read-sequence-fun is nil, phtml-internal<br>
+will invoke read-sequence to fill the buffer. The read-sequence-func function must<br>
+return the number of character array elements successfully stored in the buffer.<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+<br>
+</p>
+</body>
+</html>
--- /dev/null
+Preliminary HTML Parser documentation
+
+Pending tasks:
+
+ . integrate with aserve components, such as htmlgen and LHTML description
+
+
+Description
+
+The parse-html function processes HTML input, returning a list of HTML tags,
+attributes, and text. Here is a simple example:
+
+(parse-html "<HTML>
+ <HEAD>
+ <TITLE>Example HTML input</TITLE>
+ <BODY>
+ <P>Here is some text with a <B>bold</B> word<br>and a <A HREF=\"help.html\">link</P>
+ </HTML>")
+
+-->
+
+((:html (:head (:title "Example HTML input"))
+ (:body (:p "Here is some text with a " (:b "bold") " word" :br "and a "
+ ((:a :href "help.html") "link")))))
+
+
+The output format is known as LHTML format; it is the same format that the
+aserve htmlgen macro accepts.
+
+Here is a description of LHTML:
+
+LHTML is a list representation of HTML tags and content.
+
+Each list member may be:
+
+a. a string containing text content, such as "Here is some text with a "
+
+b. a keyword package symbol representing a HTML tag with no associated attributes
+ or content, such as :br.
+
+c. a list representing an HTML tag with associated attributes and/or content,
+ such as (:b "bold") or ((:a :href "help.html") "link"). If the HTML tag
+ does not have associated attributes, then the first list member will be a
+ keyword package symbol representing the HTML tag, and the other elements will
+ represent the content, which can be a string (text content), a keyword package symbol (HTML
+ tag with no attributes or content), or list (nested HTML tag with
+ associated attributes and/or content). If there are associated attributes,
+ then the first list member will be a list containing a keyword package symbol
+ followed by two list members for each associated attribute; the first member is a keyword
+ package symbol representing the attribute, and the next member is a string corresponding
+ to the attribute value.
+
+Here are some additional details about parse-html output:
+
+1. If excl:*current-case-mode* is :CASE-INSENSITIVE-UPPER, keyword package symbols will be
+ in upper case; otherwise, they will be in lower case.
+
+2. HTML comments are represented use a :comment symbol. For example,
+
+ (parse-html "<!-- this is a comment-->")
+
+--> ((:comment " this is a comment"))
+
+3. All <SCRIPT> and <STYLE> content is not parsed; it is returned as text content.
+
+ For example,
+
+ (parse-html "<SCRIPT>this <B>will not</B> be parsed</SCRIPT>")
+
+--> ((:script "this <B>will not</B> be parsed"))
+
+4. Since, some HTML pages contain special XML/SGML tags, non-comment tags
+ starting with '<!' are treated specially:
+
+ (parse-html "<!doctype this is some text>")
+
+--> ((:!doctype " this is some text"))
+
+5. There is plenty of illegal and deprecated HTML on the web that popular browsers
+ nonetheless successfully display. The parse-html parser is generous - it will not
+ raise an error condition upon encountering most input. In particular, it does not
+ maintain a list of legal HTML tags and will successfully parse nonsense input.
+
+ For example,
+
+ (parse-html "<this> <is> <some> <nonsense> <input>")
+
+--> ((:this (:is (:some (:nonsense :input)))))
+
+ In some situations, you may prefer a two-pass parse that results in a parse where
+ deep nesting related to unrecognized tags is minimized:
+
+ (let ((string "<this> <is> <some> <nonsense> </some> <input>"))
+ (multiple-value-bind (res rogues)
+ (parse-html string :collect-rogue-tags t)
+ (declare (ignorable res))
+ (parse-html string :no-body-tags rogues)))
+
+--> (:this :is (:some (:nonsense)) :input)
+
+ See the :collect-rogue-tags and :no-body-tags argument descriptions in the reference
+ section below for more information.
+
+6. As per the HTML 4.0 specification, attributes without specified values are given a lower case
+ string value that matches the attribute name.
+
+ For example,
+
+ (parse-html "<P here ARE some attributes>")
+
+--> (((:p :here "here" :are "are" :some "some" :attributes "attributes")))
+
+7. Existing HTML pages often have character format tags that are interleaved among
+ other tags. Such interleaving is removed in a manner consistent with the HTML 4.0
+ specification.
+
+ For example,
+
+ (parse-html "<P>Here is <B>bold text<P>that spans</B>two paragraphs")
+
+--> ((:p "Here is " (:b "bold text")) (:p (:b "that spans") "two paragraphs"))
+
+-----------------------------------------------------
+
+parse-html reference
+
+parse-html [Generic function]
+
+Arguments: input-source &key callbacks callback-only
+ collect-rogue-tags no-body-tags
+
+Returns LHTML output, as described above.
+
+The callbacks argument, if non-nil, should be an association list. Each list member's
+car (first) element specifies a keyword package symbol, and each list member's cdr (rest)
+element specifies a function object or a symbol naming a function. The function should
+expect one argument. The function will be invoked once for each time the HTML tag
+corresponding to the specified keyword package symbol is encountered in the HTML input; the
+argument will be an LHTML list containing the tag, along with associated attributes and
+content. The default callbacks argument value is nil.
+
+The callback-only argument, if non-nil, directs parse-html to not generate a complete LHTML
+output. Instead, LHTML lists will only be generated when necessary as arguments for functions
+specified in the callbacks association list. This results in faster parser execution. The default
+callback-only argument value is nil.
+
+The collect-rogue-tags argument, if non-nil, directs parse-html to return an additional value,
+a list containing any unrecognized tags closed by the end of input.
+
+The no-body-tags argument, if non-nil, should be a list containing unknown tags that, if
+encountered, will be treated as a tag with no body or content, and thus, no associated end
+tag. Typically, the argument is a list or modified list resulting from an earlier parse-html
+execution with the :collect-rogue-tags argument specified as non-nil.
+
+
+
+parse-html Methods
+
+parse-html (p stream) &key callbacks callback-only
+ collect-rogue-tags no-body-tags
+
+parse-html (str string) &key callbacks callback-only
+ collect-rogue-tags no-body-tags
+
+parse-html (file t) &key callbacks callback-only
+ collect-rogue-tags no-body-tags
+
+The t method assumes the argument is a pathname suitable
+for use with the with-open-file macro.
+
+
+phtml-internal [Function]
+
+Arguments: stream read-sequence-func callback-only callbacks
+ collect-rogue-tags no-body-tags
+
+This function may be used when more control is needed for supplying
+the HTML input. The read-sequence-func argument, if non-nil, should be a function
+object or a symbol naming a function. When phtml-internal requires another buffer
+of HTML input, it will invoke the read-sequence-func function with two arguments -
+the first argument is an internal buffer character array and the second argument is
+the phtml-internal stream argument. If read-sequence-fun is nil, phtml-internal
+will invoke read-sequence to fill the buffer. The read-sequence-func function must
+return the number of character array elements successfully stored in the buffer.
+
+
+
+
+
+
+
--- /dev/null
+;;
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+
+;; Change Log
+;;
+;; 10/14/00 add namespace example; xml-error related change
+
+(eval-when (compile load eval)
+ (require :tester))
+
+(defpackage :user (:use :net.uri :net.xml.parser)) ;; assumes pxml.cl loaded
+(in-package :user)
+
+;; these functions are used in the OASIS xmltest subdirectories
+;; see pxml.txt for more information
+
+(defun file-callback (filename token &optional public)
+ (declare (ignorable token public))
+ ;;(format t "filename: ~s token: ~s public: ~s~%" filename token public)
+ (ignore-errors (open (uri-path filename))))
+
+(defun test-one-file (int external-callback)
+ (let ((filename (concatenate 'string (format nil "~3,'0d" int) ".xml")))
+ (equalp (with-open-file (p filename)
+ (parse-xml p :external-callback external-callback
+ :content-only t))
+ (with-open-file (p (concatenate 'string "out/" filename))
+ (parse-xml p)))))
+
+(defun test-some-files (max &key skip-list external-callback)
+ (dotimes (i max)
+ (if* (member (+ 1 i) skip-list) then
+ (format t "i: ~s skipping...~%" (+ 1 i))
+ else
+ (format t "i: ~s equalp: ~s~%" (+ 1 i) (test-one-file (+ 1 i) external-callback)))))
+
+;; have to be in valid/sa directory when this is run
+(defun test-sa-files ()
+ (test-some-files 119 :external-callback 'file-callback :skip-list (list 52 64 89)))
+
+;; have to be in valid/ext-sa directory when this is run
+(defun test-ext-sa-files ()
+ (test-some-files 14 :external-callback 'file-callback ))
+
+;; have to be in valid/not-sa directory when this is run
+(defun test-not-sa-files ()
+ (test-some-files 31 :external-callback 'file-callback ))
+
+(defun test-one-bad-file (filename external-callback)
+ (ignore-errors
+ (with-open-file (p filename)
+ (parse-xml p :external-callback external-callback
+ :content-only t))))
+
+(defun test-some-bad-files (max external-callback)
+ (dotimes (i max)
+ (let* ((index (+ 1 i))
+ (filename (concatenate 'string (format nil "~3,'0d" index) ".xml")))
+ (multiple-value-bind (val error)
+ (test-one-bad-file filename external-callback)
+ (format t "i: ~s error: ~s~%"
+ index (if error
+ (simple-condition-format-arguments error) val))))))
+
+;; have to be in not-wf/sa directory when this is run
+(defun test-not-wf-sa-files ()
+ (test-some-bad-files 186 'file-callback))
+
+;; have to be in not-wf/ext-sa directory when this is run
+(defun test-not-wf-ext-sa-files ()
+ (test-some-bad-files 3 'file-callback))
+
+;; have to be in not-wf/not-sa directory when this is run
+(defun test-not-wf-not-sa-files ()
+ (test-some-bad-files 8 'file-callback))
+
+;; the next stuff is used in the .txt file for documentation
+
+(defvar *xml-example-external-url*
+ "<!ENTITY ext1 'this is some external entity %param1;'>")
+
+(defun example-callback (var-name token &optional public)
+ (declare (ignorable token public))
+ (setf var-name (uri-path var-name))
+ (if* (equal var-name "null") then nil
+ else
+ (let ((string (eval (intern var-name (find-package :user)))))
+ (make-string-input-stream string))))
+
+(defvar *xml-example-string*
+ "<?xml version='1.0' encoding='utf-8'?>
+<!-- the following XML input is well-formed but its validity has not been checked ... -->
+<?piexample this is an example processing instruction tag ?>
+<!DOCTYPE example SYSTEM '*xml-example-external-url*' [
+ <!ELEMENT item1 (item2* | (item3+ , item4))>
+ <!ELEMENT item2 ANY>
+ <!ELEMENT item3 (#PCDATA)>
+ <!ELEMENT item4 (#PCDATA)>
+ <!ATTLIST item1
+ att1 CDATA #FIXED 'att1-default'
+ att2 ID #REQUIRED
+ att3 ( one | two | three ) 'one'
+ att4 NOTATION ( four | five ) 'four' >
+ <!ENTITY % param1 'text'>
+ <!ENTITY nentity SYSTEM 'null' NDATA somedata>
+ <!NOTATION notation SYSTEM 'notation-processor'>
+]>
+<item1 att2='1'><item3>&ext1;</item3></item1>")
+
+(defvar *xml-example-string2*)
+(defvar *xml-example-string3*)
+
+;; bug fix testing
+(setf *xml-example-string2*
+ "<!DOCTYPE example [
+<!ELEMENT item1 (item2* | (item3+ , item4))>
+]>
+<item1/>")
+
+(setf *xml-example-string3*
+ "<!DOCTYPE example [
+<!ELEMENT item1 (item2* | (item3+ , item4*))>
+]>
+<item1/>")
+
+(defvar *xml-example-string4*)
+
+(setf *xml-example-string4*
+ "<bibliography
+ xmlns:bib='http://www.bibliography.org/XML/bib.ns'
+ xmlns='urn:com:books-r-us'>
+ <bib:book owner='Smith'>
+ <bib:title>A Tale of Two Cities</bib:title>
+ <bib:bibliography
+ xmlns:bib='http://www.franz.com/XML/bib.ns'
+ xmlns='urn:com:books-r-us'>
+ <bib:library branch='Main'>UK Library</bib:library>
+ <bib:date calendar='Julian'>1999</bib:date>
+ </bib:bibliography>
+ <bib:date calendar='Julian'>1999</bib:date>
+ </bib:book>
+ </bibliography>")
\ No newline at end of file
--- /dev/null
+<html>
+
+<head>
+<title>A Lisp Based XML Parser</title>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+</head>
+
+<body>
+
+<p><strong><big><big>A Lisp Based XML Parser</big></big></strong></p>
+
+<p><a href="#intro">Introduction/Simple Example</a><br>
+<a href="#lxml">LXML parse output format</a><br>
+<a href="#props">parse-xml non-validating parser properties</a><br>
+<a href="#modern">case and international character support issues</a><br>
+<a href="#keyword">parse-xml and packages</a><br>
+<a href="#namespace">parse-xml, the XML Namespace specification, and packages</a><br>
+<a href="#unicode-scalar">ACL does not support Unicode 4 byte scalar values</a><br>
+<a href="#big-endian">only little-endian Unicode tested in ACL 6.0 beta</a><br>
+<a href="#debug">debugging aids</a><br>
+<a href="#conformance">XML Conformance test results</a><br>
+<a href="#build">Compiling and Loading the parser</a><br>
+<a href="#reference">parse-xml reference</a></p>
+
+<p><a name="intro"></a>The <strong>parse-xml </strong>generic function processes XML
+input, returning a list of XML tags,<br>
+attributes, and text. Here is a simple example:<br>
+<br>
+(parse-xml "<item1><item2 att1='one'/>this is some
+text</item1>")<br>
+<br>
+--><br>
+<br>
+((item1 ((item2 att1 "one")) "this is some text"))<br>
+<br>
+The output format is known as LXML format.<br>
+<br>
+<a name="lxml"></a><strong>LXML Format</strong><br>
+<br>
+LXML is a list representation of XML tags and content.<br>
+<br>
+Each list member may be:<br>
+<br>
+a. a string containing text content, such as "Here is some text with a "<br>
+<br>
+b. a list representing a XML tag with associated attributes and/or content,
+such as ('item1 "text") or (('item1 :att1 "help.html")
+"link"). If the XML tag
+does not have associated attributes, then the first list member will be a
+symbol representing the XML tag, and the other elements will
+represent the content, which can be a string (text content), a symbol (XML
+tag with no attributes or content), or list (nested XML tag with
+associated attributes and/or content). If there are associated attributes,
+then the first list member will be a list containing a symbol
+followed by two list members for each associated attribute; the first member is a
+symbol representing the attribute, and the next member is a string corresponding
+to the attribute value.<br>
+<br>
+c. XML comments and or processing instructions - see the more detailed example below for
+further information.</p>
+
+<p><a name="props"></a><strong>Non Validating Parser Properties</strong></p>
+
+<p>Parse-xml is a non-validating XML parser. It will detect non-well-formed XML input.
+When<br>
+processing valid XML input, parse-xml will optionally produce the same output as a
+validating <br>
+parser would, including the processing of an external DTD subset and external entity
+declarations.<br>
+<br>
+By default, parse-xml outputs a DTD parse along with the parsed XML contents. The DTD
+parse may<br>
+be optionally suppressed. The following example shows DTD parsed output components:</p>
+
+<p>(defvar *xml-example-external-url*<br>
+ "<!ENTITY ext1 'this is some external entity %param1;'>")<br>
+<br>
+(defun example-callback (var-name token &optional public)<br>
+ (declare (ignorable token public))<br>
+ (setf var-name (uri-path var-name))<br>
+ (if* (equal var-name "null") then nil<br>
+ else<br>
+ (let ((string (eval (intern var-name (find-package
+:user)))))<br>
+ (make-string-input-stream string))))<br>
+<br>
+(defvar *xml-example-string*<br>
+"<?xml version='1.0' encoding='utf-8'?><br>
+<!-- the following XML input is well-formed but its validity has not been checked ...
+--><br>
+<?piexample this is an example processing instruction tag ?><br>
+<!DOCTYPE example SYSTEM '*xml-example-external-url*' [<br>
+ <!ELEMENT item1 (item2* | (item3+ , item4))><br>
+ <!ELEMENT item2 ANY><br>
+ <!ELEMENT item3 (#PCDATA)><br>
+ <!ELEMENT item4 (#PCDATA)><br>
+ <!ATTLIST item1<br>
+ att1 CDATA #FIXED 'att1-default'<br>
+ att2 ID #REQUIRED<br>
+ att3 ( one | two | three ) 'one'<br>
+ att4 NOTATION ( four | five ) 'four' ><br>
+ <!ENTITY % param1 'text'><br>
+ <!ENTITY nentity SYSTEM 'null' NDATA somedata><br>
+ <!NOTATION notation SYSTEM 'notation-processor'><br>
+ ]><br>
+<item1 att2='1'><item3>&ext1;</item3></item1>")<br>
+<br>
+(pprint (parse-xml *xml-example-string* :external-callback 'example-callback))<br>
+<br>
+--><br>
+<br>
+((:xml :version "1.0" :encoding "utf-8")<br>
+ (:comment " the following XML input is well-formed but may or may not be valid
+")<br>
+ (:pi :piexample "this is an example processing instruction tag ")<br>
+ (:DOCTYPE :example<br>
+ (:[ (:ELEMENT :item1 (:choice (:* :item2) (:seq (:+ :item3) :item4))) <br>
+ (:ELEMENT :item2 :ANY)<br>
+ (:ELEMENT :item3 :PCDATA) (:ELEMENT :item4
+:PCDATA)<br>
+ (:ATTLIST item1 (att1 :CDATA :FIXED
+"att1-default") (att2 :ID :REQUIRED)<br>
+ (att3
+(:enumeration :one :two :three) "one") <br>
+ (att4 (:NOTATION
+:four :five) "four"))<br>
+ (:ENTITY :param1 :param "text") <br>
+ (:ENTITY :nentity :SYSTEM "null"
+:NDATA :somedata)<br>
+ (:NOTATION :notation :SYSTEM
+"notation-processor"))<br>
+ (:external (:ENTITY :ext1 "this is some external entity
+text")))<br>
+ ((item1 att1 "att1-default" att2 "1" att3 "one"
+att4 "four") <br>
+ (item3 "this is some external entity
+text")))<br>
+<br>
+<br>
+<strong><big>Usage Notes</big></strong><br>
+<br>
+<ol>
+<li><a name="modern"></a>The parse-xml function has been primarily compiled and tested in a
+modern ACL. However, in an ANSI Lisp with wide character support, it DOES pass the valid
+component of the conformance suite in the same manner as it does in a Modern Lisp. The
+parser's successful operation in all potential situations depends on wide character support.
+<br><br>
+</li>
+<li><a name="keyword"></a>The parser uses the keyword package for DTD tokens and other
+special XML tokens. Since element and attribute token symbols are usually interned
+in the current package, it is not recommended to execute parse-xml
+when the current package is the keyword package.
+<br><br>
+</li>
+<li><a name="namespace"></a>The XML parser supports the XML Namespaces specification. The
+parser recognizes a "xmlns" attribute and attribute names starting with
+"xmlns:".
+As per the specification, the parser expects that the associated value
+is an URI string. The parser then associates XML Namespace prefixes with a
+Lisp package provided via the parse-xml :uri-to-package option or, if
+necessary, a package created on the fly. The following example demonstrates
+this behavior:<br>
+
+<p>(setf *xml-example-string4*<br>
+ "<bibliography<br>
+ xmlns:bib='http://www.bibliography.org/XML/bib.ns'<br>
+ xmlns='urn:com:books-r-us'><br>
+ <bib:book owner='Smith'><br>
+ <bib:title>A Tale of Two Cities</bib:title><br>
+ <bib:bibliography<br>
+ xmlns:bib='http://www.franz.com/XML/bib.ns'<br>
+ xmlns='urn:com:books-r-us'><br>
+ <bib:library branch='Main'>UK
+Library</bib:library><br>
+ <bib:date calendar='Julian'>1999</bib:date><br>
+ </bib:bibliography><br>
+ <bib:date calendar='Julian'>1999</bib:date><br>
+ </bib:book><br>
+</bibliography>")<br>
+<br>
+(setf *uri-to-package* nil)<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"http://www.bibliography.org/XML/bib.ns"</a>)<br>
+ (make-package "bib") *uri-to-package*))<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"</a>urn:com:books-r-us<a
+href="http://www.bibliography.org/XML/bib.ns">"</a>)<br>
+ (make-package "royal") *uri-to-package*))<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"</a>http://www.franz.com/XML/bib.ns<a
+href="http://www.bibliography.org/XML/bib.ns">"</a>)<br>
+ (make-package "franz-ns") *uri-to-package*))<br>
+(pprint (multiple-value-list<br>
+ (parse-xml
+*xml-example-string4*<br>
+ :uri-to-package
+*uri-to-package*)))<br>
+<br>
+--><br>
+((((bibliography |xmlns:bib| <a href="http://www.bibliography.org/XML/bib.ns">"http://www.bibliography.org/XML/bib.ns"</a><br>
+ xmlns "urn:com:books-r-us")<br>
+ "<br>
+ "<br>
+ ((bib::book royal::owner "Smith") "<br>
+ " (bib::title "A Tale of Two
+Cities") "<br>
+ "<br>
+ ((bib::bibliography royal::|xmlns:bib|<br>
+ "http://www.franz.com/XML/bib.ns" royal::xmlns<br>
+ "urn:com:books-r-us")<br>
+ "<br>
+ " ((franz-ns::library royal::branch
+"Main") "UK Library") "<br>
+ " ((franz-ns::date royal::calendar
+"Julian") "1999") "<br>
+ ")<br>
+ "<br>
+ " ((bib::date royal::calendar
+"Julian") "1999") "<br>
+ ")<br>
+ "<br>
+ "))<br>
+((#<uri http://www.franz.com/XML/bib.ns> . #<The franz-ns package>)<br>
+ (#<uri urn:com:books-r-us> . #<The royal package>)<br>
+ (#<uri http://www.bibliography.org/XML/bib.ns> . #<The bib package>)))<br>
+<br>
+</li>
+<li>In the absence of XML Namespace attributes, element and attribute symbols are interned
+in the current package. Note that this implies that attributes and elements referenced
+in DTD content will be interned in the current package.
+</li>
+<li>The parse-xml function has been tested using the OASIS conformance test suite (see
+details below). The test suite has wide coverage across possible XML and DTD syntax,
+but there may be some syntax paths that have not yet been tested or completely
+supported. Here is a list of currently known syntax parsing issues:
+<ul>
+<li><a name="unicode-scalar"></a>ACL does not support 4 byte Unicode scalar values, so
+input containing such data
+will not be processed correctly. (Note, however, that parse-xml does correctly detect
+and process wide Unicode input.)
+</li>
+<li><a name="big-endian"></a>The OASIS tests that contain wide Unicode all use a
+little-endian encoded Unicode.
+Changes to the unicode-check function are required to also support big-endian encoded
+Unicode. (Note also that this issue may be resolved by an ACL 6.0 final release change.)
+</li>
+<li>An initial <?xml declaration in external entity files is skipped without a check
+being made to see if the <?xml declaration is itself incorrect.
+</li>
+</ul>
+</li>
+<li><a name="debug"></a>When investigating possible parser errors or examining more closely
+where the parser
+determined that the input was non-well-formed, the net.xml.parser internal symbols
+*debug-xml* and *debug-dtd* are useful. When not bound to nil, these variables cause
+lexical analysis and intermediate parsing results to be output to *standard-output*.
+</li>
+<li><a name="loading"></a>It is necessary to load the <b>pxml</b> module before using it.
+Typically this can be done by evaluating <b>(require :pxml)</b>.
+</li>
+</ol>
+<a name="conformance"></a><strong>XML Conformance Test Suite</strong><br>
+<br>
+Using the OASIS test suite <a href="http://www.oasis-open.org">(http://www.oasis-open.org)</a>,
+here are the current parse-xml results:<br>
+<br>
+xmltest/invalid: Not tested, since parse-xml is a non-validating parser<br>
+<br>
+not-wf/<br>
+<br>
+ ext.sa: 3 tests; all pass<br>
+ not-sa: 8 tests; all pass<br>
+ sa: 186 tests; the following fail:<br>
+<br>
+ 170.xml: fails because ACL does not support 4
+byte Unicode scalar values<br>
+<br>
+valid/<br>
+<br>
+ ext-sa: 14 tests; all pass<br>
+ not-sa: 31 tests; all pass<br>
+ sa: 119 tests: the following fail:<br>
+<br>
+ 052.xml, 064.xml, 089.xml: fails because ACL
+does not support 4 byte <br>
+
+Unicode scalar values<br>
+<br>
+<a name="build"></a><big><strong>Compiling and Loading</strong></big><br>
+<br>
+Load build.cl into a modern ACL session will result in a pxml.fasl file that can
+subsequently be<br>
+loaded in a modern ACL to provide XML parsing functionality.<br>
+<br>
+-------------------------------------------------------------------------------------------<br>
+<br>
+<a name="reference"></a><big><strong>parse-xml reference</strong></big><br>
+<br>
+parse-xml [Generic
+function]<br>
+<br>
+Arguments: input-source &key external-callback content-only <br>
+ general-entities
+parameter-entities<br>
+ uri-to-package<br>
+<br>
+Returns multiple values:<br>
+<ol>
+<li>LXML and parsed DTD output, as described above.</li>
+<li>An association list containing the uri-to-package argument conses (if any)
+and conses associated with any XML Namespace packages created during the
+parse (see uri-to-package argument description, below).</li>
+</ol>
+The external-callback argument, if specified, is a function object or symbol
+that parse-xml will execute when encountering an external DTD subset
+or external entity DTD declaration. Here is an example which shows that
+arguments the function should expect, and the value it should return:
+<br><pre>
+(defun file-callback (uri-object token &optional public)
+ ;; The uri-object is an ACL URI object created from
+ ;; the XML input. In this example, this function
+ ;; assumes that all uri's will be file specifications.
+ ;;
+ ;; The token argument identifies what token is associated
+ ;; with the external parse (for example :DOCTYPE for external
+ ;; DTD subset
+ ;;
+ ;; The public argument contains the associated PUBLIC string,
+ ;; when present
+ ;;
+ (declare (ignorable token public))
+ ;; An open stream is returned on success,
+ ;; a nil return value indicates that the external
+ ;; parse should not occur.
+ ;; Note that parse-xml will close the open stream before exiting.
+ (ignore-errors (open (uri-path uri-object))))
+</pre>
+<p>
+The general-entities argument is an association list containing general entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+<p>
+The parameter-entities argument is an association list containing parameter entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+<p>
+The uri-to-package argument is an association list containing uri objects and package
+objects. Typically, the uri objects correspond to XML Namespace attribute values, and
+the package objects correspond to the desired package for interning symbols associated
+with the uri namespace. If the parser encounters an uri object not contained in this list,
+it will generate a new package. The first generated package will be named
+net.xml.namespace.0,
+the second will be named net.xml.namespace.1, and so on.
+<h3>parse-xml methods</h3>
+<pre>
+(parse-xml (p stream) &key
+ external-callback content-only
+ general-entities
+ parameter-entities
+ uri-to-package)
+
+(parse-xml (str string) &key
+ external-callback content-only
+ general-entities
+ parameter-entities
+ uri-to-package)
+</pre>
+An easy way to parse a file containing XML input:
+<pre>
+(with-open-file (p "example.xml")
+ (parse-xml p :content-only p))
+</pre>
+<h3>net.xml.parser unexported special variables:</h3>
+<p>
+*debug-xml*<br>
+<br>
+When true, parse-xml generates XML lexical state and intermediary
+parse result debugging output.
+<p>
+*debug-dtd*<br>
+<br>
+When true, parse-xml generates DTD lexical state and intermediary
+parse result debugging output.
+</body>
+</html>
--- /dev/null
+<html>
+
+<head>
+<title>A Lisp Based XML Parser</title>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+</head>
+
+<body>
+
+<p><strong><big><big>A Lisp Based XML Parser</big></big></strong></p>
+
+<p><a href="#intro">Introduction/Simple Example</a><br>
+<a href="#lxml">LXML parse output format</a><br>
+<a href="#props">parse-xml non-validating parser properties</a><br>
+<a href="#modern">case and international character support issues</a><br>
+<a href="#keyword">parse-xml and packages</a><br>
+<a href="#namespace">parse-xml, the XML Namespace specification, and packages</a><br>
+<a href="#unicode-scalar">ACL does not support Unicode 4 byte scalar values</a><br>
+<a href="#big-endian">only little-endian Unicode tested in ACL 6.0 beta</a><br>
+<a href="#debug">debugging aids</a><br>
+<a href="#conformance">XML Conformance test results</a><br>
+<a href="#build">Compiling and Loading the parser</a><br>
+<a href="#reference">parse-xml reference</a></p>
+
+<p><a name="intro"></a>The <strong>parse-xml </strong>generic function processes XML
+input, returning a list of XML tags,<br>
+attributes, and text. Here is a simple example:<br>
+<br>
+(parse-xml "<item1><item2 att1='one'/>this is some
+text</item1>")<br>
+<br>
+--><br>
+<br>
+((item1 ((item2 att1 "one")) "this is some text"))<br>
+<br>
+The output format is known as LXML format.<br>
+<br>
+<a name="lxml"></a><strong>LXML Format</strong><br>
+<br>
+LXML is a list representation of XML tags and content.<br>
+<br>
+Each list member may be:<br>
+<br>
+a. a string containing text content, such as "Here is some text with a "<br>
+<br>
+b. a list representing a XML tag with associated attributes and/or content,
+such as ('item1 "text") or (('item1 :att1 "help.html")
+"link"). If the XML tag
+does not have associated attributes, then the first list member will be a
+symbol representing the XML tag, and the other elements will
+represent the content, which can be a string (text content), a symbol (XML
+tag with no attributes or content), or list (nested XML tag with
+associated attributes and/or content). If there are associated attributes,
+then the first list member will be a list containing a symbol
+followed by two list members for each associated attribute; the first member is a
+symbol representing the attribute, and the next member is a string corresponding
+to the attribute value.<br>
+<br>
+c. XML comments and or processing instructions - see the more detailed example below for
+further information.</p>
+
+<p><a name="props"></a><strong>Non Validating Parser Properties</strong></p>
+
+<p>Parse-xml is a non-validating XML parser. It will detect non-well-formed XML input.
+When<br>
+processing valid XML input, parse-xml will optionally produce the same output as a
+validating <br>
+parser would, including the processing of an external DTD subset and external entity
+declarations.<br>
+<br>
+By default, parse-xml outputs a DTD parse along with the parsed XML contents. The DTD
+parse may<br>
+be optionally suppressed. The following example shows DTD parsed output components:</p>
+
+<p>(defvar *xml-example-external-url*<br>
+ "<!ENTITY ext1 'this is some external entity %param1;'>")<br>
+<br>
+(defun example-callback (var-name token &optional public)<br>
+ (declare (ignorable token public))<br>
+ (setf var-name (uri-path var-name))<br>
+ (if* (equal var-name "null") then nil<br>
+ else<br>
+ (let ((string (eval (intern var-name (find-package
+:user)))))<br>
+ (make-string-input-stream string))))<br>
+<br>
+(defvar *xml-example-string*<br>
+"<?xml version='1.0' encoding='utf-8'?><br>
+<!-- the following XML input is well-formed but its validity has not been checked ...
+--><br>
+<?piexample this is an example processing instruction tag ?><br>
+<!DOCTYPE example SYSTEM '*xml-example-external-url*' [<br>
+ <!ELEMENT item1 (item2* | (item3+ , item4))><br>
+ <!ELEMENT item2 ANY><br>
+ <!ELEMENT item3 (#PCDATA)><br>
+ <!ELEMENT item4 (#PCDATA)><br>
+ <!ATTLIST item1<br>
+ att1 CDATA #FIXED 'att1-default'<br>
+ att2 ID #REQUIRED<br>
+ att3 ( one | two | three ) 'one'<br>
+ att4 NOTATION ( four | five ) 'four' ><br>
+ <!ENTITY % param1 'text'><br>
+ <!ENTITY nentity SYSTEM 'null' NDATA somedata><br>
+ <!NOTATION notation SYSTEM 'notation-processor'><br>
+ ]><br>
+<item1 att2='1'><item3>&ext1;</item3></item1>")<br>
+<br>
+(pprint (parse-xml *xml-example-string* :external-callback 'example-callback))<br>
+<br>
+--><br>
+<br>
+((:xml :version "1.0" :encoding "utf-8")<br>
+ (:comment " the following XML input is well-formed but may or may not be valid
+")<br>
+ (:pi :piexample "this is an example processing instruction tag ")<br>
+ (:DOCTYPE :example<br>
+ (:[ (:ELEMENT :item1 (:choice (:* :item2) (:seq (:+ :item3) :item4))) <br>
+ (:ELEMENT :item2 :ANY)<br>
+ (:ELEMENT :item3 :PCDATA) (:ELEMENT :item4
+:PCDATA)<br>
+ (:ATTLIST item1 (att1 :CDATA :FIXED
+"att1-default") (att2 :ID :REQUIRED)<br>
+ (att3
+(:enumeration :one :two :three) "one") <br>
+ (att4 (:NOTATION
+:four :five) "four"))<br>
+ (:ENTITY :param1 :param "text") <br>
+ (:ENTITY :nentity :SYSTEM "null"
+:NDATA :somedata)<br>
+ (:NOTATION :notation :SYSTEM
+"notation-processor"))<br>
+ (:external (:ENTITY :ext1 "this is some external entity
+text")))<br>
+ ((item1 att1 "att1-default" att2 "1" att3 "one"
+att4 "four") <br>
+ (item3 "this is some external entity
+text")))<br>
+<br>
+<br>
+<strong><big>Usage Notes</big></strong><br>
+<br>
+<ol>
+<li><a name="modern"></a>The parse-xml function has been primarily compiled and tested in a
+modern ACL. However, in an ANSI Lisp with wide character support, it DOES pass the valid
+component of the conformance suite in the same manner as it does in a Modern Lisp. The
+parser's successful operation in all potential situations depends on wide character support.
+<br><br>
+</li>
+<li><a name="keyword"></a>The parser uses the keyword package for DTD tokens and other
+special XML tokens. Since element and attribute token symbols are usually interned
+in the current package, it is not recommended to execute parse-xml
+when the current package is the keyword package.
+<br><br>
+</li>
+<li><a name="namespace"></a>The XML parser supports the XML Namespaces specification. The
+parser recognizes a "xmlns" attribute and attribute names starting with
+"xmlns:".
+As per the specification, the parser expects that the associated value
+is an URI string. The parser then associates XML Namespace prefixes with a
+Lisp package provided via the parse-xml :uri-to-package option or, if
+necessary, a package created on the fly. The following example demonstrates
+this behavior:<br>
+
+<p>(setf *xml-example-string4*<br>
+ "<bibliography<br>
+ xmlns:bib='http://www.bibliography.org/XML/bib.ns'<br>
+ xmlns='urn:com:books-r-us'><br>
+ <bib:book owner='Smith'><br>
+ <bib:title>A Tale of Two Cities</bib:title><br>
+ <bib:bibliography<br>
+ xmlns:bib='http://www.franz.com/XML/bib.ns'<br>
+ xmlns='urn:com:books-r-us'><br>
+ <bib:library branch='Main'>UK
+Library</bib:library><br>
+ <bib:date calendar='Julian'>1999</bib:date><br>
+ </bib:bibliography><br>
+ <bib:date calendar='Julian'>1999</bib:date><br>
+ </bib:book><br>
+</bibliography>")<br>
+<br>
+(setf *uri-to-package* nil)<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"http://www.bibliography.org/XML/bib.ns"</a>)<br>
+ (make-package "bib") *uri-to-package*))<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"</a>urn:com:books-r-us<a
+href="http://www.bibliography.org/XML/bib.ns">"</a>)<br>
+ (make-package "royal") *uri-to-package*))<br>
+(setf *uri-to-package*<br>
+ (acons (parse-uri <a href="http://www.bibliography.org/XML/bib.ns">"</a>http://www.franz.com/XML/bib.ns<a
+href="http://www.bibliography.org/XML/bib.ns">"</a>)<br>
+ (make-package "franz-ns") *uri-to-package*))<br>
+(pprint (multiple-value-list<br>
+ (parse-xml
+*xml-example-string4*<br>
+ :uri-to-package
+*uri-to-package*)))<br>
+<br>
+--><br>
+((((bibliography |xmlns:bib| <a href="http://www.bibliography.org/XML/bib.ns">"http://www.bibliography.org/XML/bib.ns"</a><br>
+ xmlns "urn:com:books-r-us")<br>
+ "<br>
+ "<br>
+ ((bib::book royal::owner "Smith") "<br>
+ " (bib::title "A Tale of Two
+Cities") "<br>
+ "<br>
+ ((bib::bibliography royal::|xmlns:bib|<br>
+ "http://www.franz.com/XML/bib.ns" royal::xmlns<br>
+ "urn:com:books-r-us")<br>
+ "<br>
+ " ((franz-ns::library royal::branch
+"Main") "UK Library") "<br>
+ " ((franz-ns::date royal::calendar
+"Julian") "1999") "<br>
+ ")<br>
+ "<br>
+ " ((bib::date royal::calendar
+"Julian") "1999") "<br>
+ ")<br>
+ "<br>
+ "))<br>
+((#<uri http://www.franz.com/XML/bib.ns> . #<The franz-ns package>)<br>
+ (#<uri urn:com:books-r-us> . #<The royal package>)<br>
+ (#<uri http://www.bibliography.org/XML/bib.ns> . #<The bib package>)))<br>
+<br>
+</li>
+<li>In the absence of XML Namespace attributes, element and attribute symbols are interned
+in the current package. Note that this implies that attributes and elements referenced
+in DTD content will be interned in the current package.
+</li>
+<li>The parse-xml function has been tested using the OASIS conformance test suite (see
+details below). The test suite has wide coverage across possible XML and DTD syntax,
+but there may be some syntax paths that have not yet been tested or completely
+supported. Here is a list of currently known syntax parsing issues:
+<ul>
+<li><a name="unicode-scalar"></a>ACL does not support 4 byte Unicode scalar values, so
+input containing such data
+will not be processed correctly. (Note, however, that parse-xml does correctly detect
+and process wide Unicode input.)
+</li>
+<li><a name="big-endian"></a>The OASIS tests that contain wide Unicode all use a
+little-endian encoded Unicode.
+Changes to the unicode-check function are required to also support big-endian encoded
+Unicode. (Note also that this issue may be resolved by an ACL 6.0 final release change.)
+</li>
+<li>An initial <?xml declaration in external entity files is skipped without a check
+being made to see if the <?xml declaration is itself incorrect.
+</li>
+</ul>
+</li>
+<li><a name="debug"></a>When investigating possible parser errors or examining more closely
+where the parser
+determined that the input was non-well-formed, the net.xml.parser internal symbols
+*debug-xml* and *debug-dtd* are useful. When not bound to nil, these variables cause
+lexical analysis and intermediate parsing results to be output to *standard-output*.
+</li>
+<li><a name="loading"></a>It is necessary to load the <b>pxml</b> module before using it.
+Typically this can be done by evaluating <b>(require :pxml)</b>.
+</li>
+</ol>
+<a name="conformance"></a><strong>XML Conformance Test Suite</strong><br>
+<br>
+Using the OASIS test suite <a href="http://www.oasis-open.org">(http://www.oasis-open.org)</a>,
+here are the current parse-xml results:<br>
+<br>
+xmltest/invalid: Not tested, since parse-xml is a non-validating parser<br>
+<br>
+not-wf/<br>
+<br>
+ ext.sa: 3 tests; all pass<br>
+ not-sa: 8 tests; all pass<br>
+ sa: 186 tests; the following fail:<br>
+<br>
+ 170.xml: fails because ACL does not support 4
+byte Unicode scalar values<br>
+<br>
+valid/<br>
+<br>
+ ext-sa: 14 tests; all pass<br>
+ not-sa: 31 tests; all pass<br>
+ sa: 119 tests: the following fail:<br>
+<br>
+ 052.xml, 064.xml, 089.xml: fails because ACL
+does not support 4 byte <br>
+
+Unicode scalar values<br>
+<br>
+<a name="build"></a><big><strong>Compiling and Loading</strong></big><br>
+<br>
+Load build.cl into a modern ACL session will result in a pxml.fasl file that can
+subsequently be<br>
+loaded in a modern ACL to provide XML parsing functionality.<br>
+<br>
+-------------------------------------------------------------------------------------------<br>
+<br>
+<a name="reference"></a><big><strong>parse-xml reference</strong></big><br>
+<br>
+parse-xml [Generic
+function]<br>
+<br>
+Arguments: input-source &key external-callback content-only <br>
+ general-entities
+parameter-entities<br>
+ uri-to-package<br>
+<br>
+Returns multiple values:<br>
+<ol>
+<li>LXML and parsed DTD output, as described above.</li>
+<li>An association list containing the uri-to-package argument conses (if any)
+and conses associated with any XML Namespace packages created during the
+parse (see uri-to-package argument description, below).</li>
+</ol>
+The external-callback argument, if specified, is a function object or symbol
+that parse-xml will execute when encountering an external DTD subset
+or external entity DTD declaration. Here is an example which shows that
+arguments the function should expect, and the value it should return:
+<br><pre>
+(defun file-callback (uri-object token &optional public)
+ ;; The uri-object is an ACL URI object created from
+ ;; the XML input. In this example, this function
+ ;; assumes that all uri's will be file specifications.
+ ;;
+ ;; The token argument identifies what token is associated
+ ;; with the external parse (for example :DOCTYPE for external
+ ;; DTD subset
+ ;;
+ ;; The public argument contains the associated PUBLIC string,
+ ;; when present
+ ;;
+ (declare (ignorable token public))
+ ;; An open stream is returned on success,
+ ;; a nil return value indicates that the external
+ ;; parse should not occur.
+ ;; Note that parse-xml will close the open stream before exiting.
+ (ignore-errors (open (uri-path uri-object))))
+</pre>
+<p>
+The general-entities argument is an association list containing general entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+<p>
+The parameter-entities argument is an association list containing parameter entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+<p>
+The uri-to-package argument is an association list containing uri objects and package
+objects. Typically, the uri objects correspond to XML Namespace attribute values, and
+the package objects correspond to the desired package for interning symbols associated
+with the uri namespace. If the parser encounters an uri object not contained in this list,
+it will generate a new package. The first generated package will be named
+net.xml.namespace.0,
+the second will be named net.xml.namespace.1, and so on.
+<h3>parse-xml methods</h3>
+<pre>
+(parse-xml (p stream) &key
+ external-callback content-only
+ general-entities
+ parameter-entities
+ uri-to-package)
+
+(parse-xml (str string) &key
+ external-callback content-only
+ general-entities
+ parameter-entities
+ uri-to-package)
+</pre>
+An easy way to parse a file containing XML input:
+<pre>
+(with-open-file (p "example.xml")
+ (parse-xml p :content-only p))
+</pre>
+<h3>net.xml.parser unexported special variables:</h3>
+<p>
+*debug-xml*<br>
+<br>
+When true, parse-xml generates XML lexical state and intermediary
+parse result debugging output.
+<p>
+*debug-dtd*<br>
+<br>
+When true, parse-xml generates DTD lexical state and intermediary
+parse result debugging output.
+</body>
+</html>
--- /dev/null
+Description
+
+The parse-xml function processes XML input, returning a list of XML tags,
+attributes, and text. Here is a simple example:
+
+(parse-xml "<item1><item2 att1='one'/>this is some text</item1>")
+
+-->
+
+((item1 ((item2 att1 "one")) "this is some text"))
+
+The output format is known as LXML format.
+
+Here is a description of LXML:
+
+LXML is a list representation of XML tags and content.
+
+Each list member may be:
+
+a. a string containing text content, such as "Here is some text with a "
+
+b. a list representing a XML tag with associated attributes and/or content,
+ such as ('item1 "text") or (('item1 :att1 "help.html") "link"). If the XML tag
+ does not have associated attributes, then the first list member will be a
+ symbol representing the XML tag, and the other elements will
+ represent the content, which can be a string (text content), a symbol (XML
+ tag with no attributes or content), or list (nested XML tag with
+ associated attributes and/or content). If there are associated attributes,
+ then the first list member will be a list containing a symbol
+ followed by two list members for each associated attribute; the first member is a
+ symbol representing the attribute, and the next member is a string corresponding
+ to the attribute value.
+
+c. XML comments and or processing instructions - see the more detailed example below for
+ further information.
+
+Parse-xml is a non-validating XML parser. It will detect non-well-formed XML input. When
+processing valid XML input, parse-xml will optionally produce the same output as a validating
+parser would, including the processing of an external DTD subset and external entity declarations.
+
+By default, parse-xml outputs a DTD parse along with the parsed XML contents. The DTD parse may
+be optionally suppressed. The following example shows DTD parsed output components:
+
+(defvar *xml-example-external-url*
+ "<!ENTITY ext1 'this is some external entity %param1;'>")
+
+(defun example-callback (var-name token &optional public)
+ (declare (ignorable token public))
+ (setf var-name (uri-path var-name))
+ (if* (equal var-name "null") then nil
+ else
+ (let ((string (eval (intern var-name (find-package :user)))))
+ (make-string-input-stream string))))
+
+(defvar *xml-example-string*
+ "<?xml version='1.0' encoding='utf-8'?>
+<!-- the following XML input is well-formed but its validity has not been checked ... -->
+<?piexample this is an example processing instruction tag ?>
+<!DOCTYPE example SYSTEM '*xml-example-external-url*' [
+ <!ELEMENT item1 (item2* | (item3+ , item4))>
+ <!ELEMENT item2 ANY>
+ <!ELEMENT item3 (#PCDATA)>
+ <!ELEMENT item4 (#PCDATA)>
+ <!ATTLIST item1
+ att1 CDATA #FIXED 'att1-default'
+ att2 ID #REQUIRED
+ att3 ( one | two | three ) 'one'
+ att4 NOTATION ( four | five ) 'four' >
+ <!ENTITY % param1 'text'>
+ <!ENTITY nentity SYSTEM 'null' NDATA somedata>
+ <!NOTATION notation SYSTEM 'notation-processor'>
+]>
+<item1 att2='1'><item3>&ext1;</item3></item1>")
+
+(pprint (parse-xml *xml-example-string* :external-callback 'example-callback))
+
+-->
+
+((:xml :version "1.0" :encoding "utf-8")
+ (:comment " the following XML input is well-formed but may or may not be valid ")
+ (:pi :piexample "this is an example processing instruction tag ")
+ (:DOCTYPE :example
+ (:[ (:ELEMENT :item1 (:choice (:* :item2) (:seq (:+ :item3) :item4)))
+ (:ELEMENT :item2 :ANY)
+ (:ELEMENT :item3 :PCDATA) (:ELEMENT :item4 :PCDATA)
+ (:ATTLIST item1 (att1 :CDATA :FIXED "att1-default") (att2 :ID :REQUIRED)
+ (att3 (:enumeration :one :two :three) "one")
+ (att4 (:NOTATION :four :five) "four"))
+ (:ENTITY :param1 :param "text")
+ (:ENTITY :nentity :SYSTEM "null" :NDATA :somedata)
+ (:NOTATION :notation :SYSTEM "notation-processor"))
+ (:external (:ENTITY :ext1 "this is some external entity text")))
+ ((item1 att1 "att1-default" att2 "1" att3 "one" att4 "four")
+ (item3 "this is some external entity text")))
+
+
+Usage Notes:
+
+1. The parse-xml function has been compiled and tested only in a
+ modern ACL. Its successful operation depends on both the mixed
+ case support and wide character support found in modern ACL.
+
+2. The parser uses the keyword package for DTD tokens and other
+ special XML tokens. Since element and attribute token symbols are usually interned
+ in the current package, it is not recommended to execute parse-xml
+ when the current package is the keyword package.
+
+3. The XML parser supports the XML Namespaces specification. The parser
+ recognizes a "xmlns" attribute and attribute names starting with "xmlns:".
+ As per the specification, the parser expects that the associated value
+ is an URI string. The parser then associates XML Namespace prefixes with a
+ Lisp package provided via the parse-xml :uri-to-package option or, if
+ necessary, a package created on the fly. The following example demonstrates
+ this behavior:
+
+ (setf *xml-example-string4*
+ "<bibliography
+ xmlns:bib='http://www.bibliography.org/XML/bib.ns'
+ xmlns='urn:royal-mail.gov.uk/XML/ns/postal.ns,1999'>
+ <bib:book owner='Smith'>
+ <bib:title>A Tale of Two Cities</bib:title>
+ <bib:bibliography
+ xmlns:bib='http://www.franz.com/XML/bib.ns'
+ xmlns='urn:royal-mail2.gov.uk/XML/ns/postal.ns,1999'>
+ <bib:library branch='Main'>UK Library</bib:library>
+ <bib:date calendar='Julian'>1999</bib:date>
+ </bib:bibliography>
+ <bib:date calendar='Julian'>1999</bib:date>
+ </bib:book>
+ </bibliography>")
+
+ (setf *uri-to-package* nil)
+ (setf *uri-to-package*
+ (acons (parse-uri "http://www.bibliography.org/XML/bib.ns")
+ (make-package "bib") *uri-to-package*))
+ (setf *uri-to-package*
+ (acons (parse-uri "urn:royal-mail.gov.uk/XML/ns/postal.ns,1999")
+ (make-package "royal") *uri-to-package*))
+ (setf *uri-to-package*
+ (acons (parse-uri "http://www.franz.com/XML/bib.ns")
+ (make-package "franz-ns") *uri-to-package*))
+ (pprint (multiple-value-list
+ (parse-xml *xml-example-string4*
+ :uri-to-package *uri-to-package*)))
+
+-->
+
+((((bibliography |xmlns:bib| "http://www.bibliography.org/XML/bib.ns" xmlns
+ "urn:royal-mail.gov.uk/XML/ns/postal.ns,1999")
+ "
+ "
+ ((bib::book royal::owner "Smith") "
+ " (bib::title "A Tale of Two Cities") "
+ "
+ ((bib::bibliography royal::|xmlns:bib| "http://www.franz.com/XML/bib.ns" royal::xmlns
+ "urn:royal-mail2.gov.uk/XML/ns/postal.ns,1999")
+ "
+ " ((franz-ns::library net.xml.namespace.0::branch "Main") "UK Library") "
+ " ((franz-ns::date net.xml.namespace.0::calendar "Julian") "1999") "
+ ")
+ "
+ " ((bib::date royal::calendar "Julian") "1999") "
+ ")
+ "
+ "))
+ ((#<uri urn:royal-mail2.gov.ukXML/ns/postal.ns,1999> . #<The net.xml.namespace.0 package>)
+ (#<uri http://www.franz.com/XML/bib.ns> . #<The franz-ns package>)
+ (#<uri urn:royal-mail.gov.ukXML/ns/postal.ns,1999> . #<The royal package>)
+ (#<uri http://www.bibliography.org/XML/bib.ns> . #<The bib package>)))
+
+ In the absence of XML Namespace attributes, element and attribute symbols are interned
+ in the current package. Note that this implies that attributes and elements referenced
+ in DTD content will be interned in the current package.
+
+4. The ACL 6.0 beta does not contain a little-endian Unicode external format. To
+ process XML input containing Unicode characters correctly:
+
+ a. Place the following in a file called ef-fat-little.cl in the ACL code
+ directory:
+
+(provide :ef-fat-little)
+
+(in-package :excl)
+
+(def-external-format :fat-little-base
+ :size 2)
+
+(def-char-to-octets-macro :fat-little-base (char
+ state
+ &key put-next-octet external-format)
+ (declare (ignore external-format state))
+ `(let ((code (char-code ,char)))
+ (,put-next-octet (ldb (byte 8 0) code))
+ (,put-next-octet (ldb (byte 8 8) code))))
+
+(def-octets-to-char-macro :fat-little-base (state-loc
+ &key get-next-octet external-format
+ octets-count-loc unget-octets)
+ (declare (ignore external-format state-loc unget-octets))
+ `(let ((lo ,get-next-octet)
+ (hi (progn (incf ,octets-count-loc)
+ ,get-next-octet)))
+ (code-char (+ (ash hi 8) lo))))
+
+(create-newline-ef :name :fat-little :base-name :fat-little-base
+ :nicknames '(:unicode-little))
+
+
+ b. Compile the file using a modern ACL.
+
+5. The parse-xml function has been tested using the OASIS conformance test suite (see
+ details below). The test suite has wide coverage across possible XML and DTD syntax,
+ but there may be some syntax paths that have not yet been tested or completely
+ supported. Here is a list of currently known syntax parsing issues:
+
+ a. ACL does not support 4 byte Unicode scalar values, so input containing such data
+ will not be processed correctly. (Note, however, that parse-xml does correctly detect
+ and process wide Unicode input.)
+
+ b. The OASIS tests that contain wide Unicode all use a little-endian encoded Unicode.
+ Changes to the unicode-check function are required to also support big-endian encoded
+ Unicode. (Note also that this issue may be resolved by an ACL 6.0 final release change.)
+
+ c. An initial <?xml declaration in external entity files is skipped without a check
+ being made to see if the <?xml declaration is itself incorrect.
+
+6. When investigating possible parser errors or examining more closely where the parser
+ determined that the input was non-well-formed, the net.xml.parser internal symbols
+ *debug-xml* and *debug-dtd* are useful. When not bound to nil, these variables cause
+ lexical analysis and intermediate parsing results to be output to *standard-output*.
+
+XML Conformance Test Suite
+
+Using the OASIS test suite (http://www.oasis-open.org),
+here are the current parse-xml results:
+
+xmltest/invalid: Not tested, since parse-xml is a non-validating parser
+
+ not-wf/
+
+ ext.sa: 3 tests; all pass
+ not-sa: 8 tests; all pass
+ sa: 186 tests; the following fail:
+
+ 170.xml: fails because ACL does not support 4 byte Unicode scalar values
+
+ valid/
+
+ ext-sa: 14 tests; all pass
+ not-sa: 31 tests; all pass
+ sa: 119 tests: the following fail:
+
+ 052.xml, 064.xml, 089.xml: fails because ACL does not support 4 byte
+ Unicode scalar values
+
+Compiling and Loading
+
+Load build.cl into a modern ACL session will result in a pxml.fasl file that can subsequently be
+loaded in a modern ACL to provide XML parsing functionality.
+
+-------------------------------------------------------------------------------------------
+
+parse-xml reference
+
+parse-xml [Generic function]
+
+Arguments: input-source &key external-callback content-only
+ general-entities parameter-entities
+ uri-to-package
+
+Returns multiple values:
+
+ 1) LXML and parsed DTD output, as described above.
+ 2) An association list containing the uri-to-package argument conses (if any)
+ and conses associated with any XML Namespace packages created during the
+ parse (see uri-to-package argument description, below).
+
+The external-callback argument, if specified, is a function object or symbol
+that parse-xml will execute when encountering an external DTD subset
+or external entity DTD declaration. Here is an example which shows that
+arguments the function should expect, and the value it should return:
+
+(defun file-callback (uri-object token &optional public)
+ ;; the uri-object is an ACL URI object created from
+ ;; the XML input. In this example, this function
+ ;; assumes that all uri's will be file specifications.
+ ;;
+ ;; the token argument identifies what token is associated
+ ;; with the external parse (for example :DOCTYPE for external
+ ;; DTD subset
+ ;;
+ ;; the public argument contains the associated PUBLIC string,
+ ;; when present
+ ;;
+ (declare (ignorable token public))
+ ;; an open stream is returned on success
+ ;; a nil return value indicates that the external
+ ;; parse should not occur
+ ;; Note that parse-xml will close the open stream before
+ ;; exiting
+ (ignore-errors (open (uri-path uri-object))))
+
+The general-entities argument is an association list containing general entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+
+The parameter-entities argument is an association list containing parameter entity symbol
+and replacement text pairs. The entity symbols should be in the keyword package.
+Note that this option may be useful in generating desirable parse results in
+situations where you do not wish to parse external entities or the external DTD subset.
+
+The uri-to-package argument is an association list containing uri objects and package
+objects. Typically, the uri objects correspond to XML Namespace attribute values, and
+the package objects correspond to the desired package for interning symbols associated
+with the uri namespace. If the parser encounters an uri object not contained in this list,
+it will generate a new package. The first generated package will be named net.xml.namespace.0,
+the second will be named net.xml.namespace.1, and so on.
+
+parse-xml Methods
+
+(parse-xml (p stream) &key external-callback content-only
+ general-entities parameter-entities
+ uri-to-package)
+
+(parse-xml (str string) &key external-callback content-only
+ general-entities parameter-entities
+ uri-to-package)
+
+An easy way to parse a file containing XML input:
+
+(with-open-file (p "example.xml")
+ (parse-xml p :content-only p))
+
+net.xml.parser unexported special variables:
+
+*debug-xml*
+
+When not bound to nil, generates XML lexical state and intermediary
+parse result debugging output.
+
+*debug-dtd*
+
+When not bound to nil, generates DTD lexical state and intermediary
+parse result debugging output.
\ No newline at end of file
--- /dev/null
+;;
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+;; $Id: pxml0.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+;; pxml.cl - parse xml
+;;
+;; Change Log
+;;
+;; 12/05/00 changes to allow using in ANSI mode lisp
+;; 12/20/00 namespace example fix; correct whitespace bug when
+;; looking for xml? tag in external entity files
+;;
+
+(defpackage net.xml.parser
+ (:use :lisp :clos :excl :net.uri)
+ (:export
+ #:parse-xml)
+ )
+
+(in-package :net.xml.parser)
+
+(unless (fboundp 'pxml-dribble-bug-hook)
+ (let ((pxml-version-strings nil))
+ (defun pxml-dribble-bug-hook (stream-or-string)
+ (if (stringp stream-or-string)
+ (push stream-or-string pxml-version-strings)
+ (loop for string in (reverse pxml-version-strings)
+ do (write-string string stream-or-string)
+ (terpri stream-or-string))))
+
+ (push 'pxml-dribble-bug-hook excl:*dribble-bug-hooks*)))
+
+(funcall 'pxml-dribble-bug-hook "$Id: pxml0.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
+
+(defun xml-char-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (if* (eq code #x9) then t
+ elseif (eq code #xA) then t
+ elseif (eq code #xD) then t
+ elseif (<= #x20 code #xD7FF) then t
+ elseif (<= #xE000 code #xFFFD) then t
+ else nil)))
+
+(defun xml-space-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (eq code #x20)
+ (eq code #x9)
+ (eq code #xD)
+ (eq code #xA))))
+
+#+unused
+(defmacro xml-eql-char-p (char)
+ `(eq ,char #\=))
+
+(defun xml-base-char-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (<= #x0041 code #x005A) (<= #x0061 code #x007A)
+ (<= #x00C0 code #x00D6) (<= #x00D8 code #x00F6)
+ (<= #x00F8 code #x00FF) (<= #x0100 code #x0131)
+ (<= #x0134 code #x013E) (<= #x0141 code #x0148)
+ (<= #x014A code #x017E) (<= #x0180 code #x01C3)
+ (<= #x01CD code #x01F0) (<= #x01F4 code #x01F5)
+ (<= #x01FA code #x0217) (<= #x0250 code #x02A8)
+ (<= #x02BB code #x02C1) (= code #x0386) (<= #x0388 code #x038A)
+ (= code #x038C) (<= #x038E code #x03A1) (<= #x03A3 code #x03CE)
+ (<= #x03D0 code #x03D6) (= code #x03DA) (= code #x03DC) (= code #x03DE)
+ (= code #x03E0) (<= #x03E2 code #x03F3) (<= #x0401 code #x040C)
+ (<= #x040E code #x044F) (<= #x0451 code #x045C)
+ (<= #x045E code #x0481) (<= #x0490 code #x04C4)
+ (<= #x04C7 code #x04C8) (<= #x04CB code #x04CC)
+ (<= #x04D0 code #x04EB) (<= #x04EE code #x04F5)
+ (<= #x04F8 code #x04F9) (<= #x0531 code #x0556) (= code #x0559)
+ (<= #x0561 code #x0586) (<= #x05D0 code #x05EA)
+ (<= #x05F0 code #x05F2) (<= #x0621 code #x063A)
+ (<= #x0641 code #x064A) (<= #x0671 code #x06B7)
+ (<= #x06BA code #x06BE) (<= #x06C0 code #x06CE)
+ (<= #x06D0 code #x06D3) (= code #x06D5) (<= #x06E5 code #x06E6)
+ (<= #x0905 code #x0939) (= code #x093D) (<= #x0958 code #x0961)
+ (<= #x0985 code #x098C) (<= #x098F code #x0990)
+ (<= #x0993 code #x09A8) (<= #x09AA code #x09B0) (= code #x09B2)
+ (<= #x09B6 code #x09B9) (<= #x09DC code #x09DD)
+ (<= #x09DF code #x09E1) (<= #x09F0 code #x09F1)
+ (<= #x0A05 code #x0A0A) (<= #x0A0F code #x0A10)
+ (<= #x0A13 code #x0A28) (<= #x0A2A code #x0A30)
+ (<= #x0A32 code #x0A33) (<= #x0A35 code #x0A36)
+ (<= #x0A38 code #x0A39) (<= #x0A59 code #x0A5C) (= code #x0A5E)
+ (<= #x0A72 code #x0A74) (<= #x0A85 code #x0A8B) (= code #x0A8D)
+ (<= #x0A8F code #x0A91) (<= #x0A93 code #x0AA8)
+ (<= #x0AAA code #x0AB0) (<= #x0AB2 code #x0AB3)
+ (<= #x0AB5 code #x0AB9) (<= #x0ABD code #x0AE0)
+ (<= #x0B05 code #x0B0C) (<= #x0B0F code #x0B10)
+ (<= #x0B13 code #x0B28) (<= #x0B2A code #x0B30)
+ (<= #x0B32 code #x0B33) (<= #x0B36 code #x0B39) (= code #x0B3D)
+ (<= #x0B5C code #x0B5D) (<= #x0B5F code #x0B61)
+ (<= #x0B85 code #x0B8A) (<= #x0B8E code #x0B90)
+ (<= #x0B92 code #x0B95) (<= #x0B99 code #x0B9A) (= code #x0B9C)
+ (<= #x0B9E code #x0B9F) (<= #x0BA3 code #x0BA4)
+ (<= #x0BA8 code #x0BAA) (<= #x0BAE code #x0BB5)
+ (<= #x0BB7 code #x0BB9) (<= #x0C05 code #x0C0C)
+ (<= #x0C0E code #x0C10) (<= #x0C12 code #x0C28)
+ (<= #x0C2A code #x0C33) (<= #x0C35 code #x0C39)
+ (<= #x0C60 code #x0C61) (<= #x0C85 code #x0C8C)
+ (<= #x0C8E code #x0C90) (<= #x0C92 code #x0CA8)
+ (<= #x0CAA code #x0CB3) (<= #x0CB5 code #x0CB9) (= code #x0CDE)
+ (<= #x0CE0 code #x0CE1) (<= #x0D05 code #x0D0C)
+ (<= #x0D0E code #x0D10) (<= #x0D12 code #x0D28)
+ (<= #x0D2A code #x0D39) (<= #x0D60 code #x0D61)
+ (<= #x0E01 code #x0E2E) (= code #x0E30) (<= #x0E32 code #x0E33)
+ (<= #x0E40 code #x0E45) (<= #x0E81 code #x0E82) (= code #x0E84)
+ (<= #x0E87 code #x0E88) (= code #x0E8A) (= code #x0E8D)
+ (<= #x0E94 code #x0E97) (<= #x0E99 code #x0E9F)
+ (<= #x0EA1 code #x0EA3) (= code #x0EA5) (= code #x0EA7)
+ (<= #x0EAA code #x0EAB) (<= #x0EAD code #x0EAE) (= code #x0EB0)
+ (<= #x0EB2 code #x0EB3) (= code #x0EBD) (<= #x0EC0 code #x0EC4)
+ (<= #x0F40 code #x0F47) (<= #x0F49 code #x0F69)
+ (<= #x10A0 code #x10C5) (<= #x10D0 code #x10F6) (= code #x1100)
+ (<= #x1102 code #x1103) (<= #x1105 code #x1107) (= code #x1109)
+ (<= #x110B code #x110C) (<= #x110E code #x1112) (= code #x113C)
+ (= code #x113E) (= code #x1140) (= code #x114C) (= code #x114E) (= code #x1150)
+ (<= #x1154 code #x1155) (= code #x1159) (<= #x115F code #x1161)
+ (= code #x1163) (= code #x1165) (= code #x1167) (= code #x1169)
+ (<= #x116D code #x116E) (<= #x1172 code #x1173) (= code #x1175)
+ (= code #x119E) (= code #x11A8) (= code #x11AB) (<= #x11AE code #x11AF)
+ (<= #x11B7 code #x11B8) (= code #x11BA) (<= #x11BC code #x11C2)
+ (= code #x11EB) (= code #x11F0) (= code #x11F9) (<= #x1E00 code #x1E9B)
+ (<= #x1EA0 code #x1EF9) (<= #x1F00 code #x1F15)
+ (<= #x1F18 code #x1F1D) (<= #x1F20 code #x1F45)
+ (<= #x1F48 code #x1F4D) (<= #x1F50 code #x1F57) (= code #x1F59)
+ (= code #x1F5B) (= code #x1F5D) (<= #x1F5F code #x1F7D)
+ (<= #x1F80 code #x1FB4) (<= #x1FB6 code #x1FBC) (= code #x1FBE)
+ (<= #x1FC2 code #x1FC4) (<= #x1FC6 code #x1FCC)
+ (<= #x1FD0 code #x1FD3) (<= #x1FD6 code #x1FDB)
+ (<= #x1FE0 code #x1FEC) (<= #x1FF2 code #x1FF4)
+ (<= #x1FF6 code #x1FFC) (= code #x2126) (<= #x212A code #x212B)
+ (= code #x212E) (<= #x2180 code #x2182) (<= #x3041 code #x3094)
+ (<= #x30A1 code #x30FA) (<= #x3105 code #x312C)
+ (<= #xAC00 code #xD7A3)
+ )))
+
+(defun xml-ideographic-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (<= #x4E00 code #x9FA5) (= code #x3007) (<= #x3021 code #x3029))))
+
+(defun xml-combining-char-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (<= #x0300 code #x0345) (<= #x0360 code #x0361)
+ (<= #x0483 code #x0486) (<= #x0591 code #x05A1)
+ (<= #x05A3 code #x05B9) (<= #x05BB code #x05BD) (= code #x05BF)
+ (<= #x05C1 code #x05C2) (= code #x05C4) (<= #x064B code #x0652)
+ (= code #x0670) (<= #x06D6 code #x06DC) (<= #x06DD code #x06DF)
+ (<= #x06E0 code #x06E4) (<= #x06E7 code #x06E8)
+ (<= #x06EA code #x06ED) (<= #x0901 code #x0903) (= code #x093C)
+ (<= #x093E code #x094C) (= code #x094D) (<= #x0951 code #x0954)
+ (<= #x0962 code #x0963) (<= #x0981 code #x0983) (= code #x09BC)
+ (<= #x09BE code #x09BF) (<= #x09C0 code #x09C4)
+ (<= #x09C7 code #x09C8) (<= #x09CB code #x09CD) (= code #x09D7)
+ (<= #x09E2 code #x09E3) (= code #x0A02) (= code #x0A3C) (= code #x0A3E)
+ (= code #x0A3F) (<= #x0A40 code #x0A42) (<= #x0A47 code #x0A48)
+ (<= #x0A4B code #x0A4D) (<= #x0A70 code #x0A71)
+ (<= #x0A81 code #x0A83) (= code #x0ABC) (<= #x0ABE code #x0AC5)
+ (<= #x0AC7 code #x0AC9) (<= #x0ACB code #x0ACD)
+ (<= #x0B01 code #x0B03) (= code #x0B3C) (<= #x0B3E code #x0B43)
+ (<= #x0B47 code #x0B48) (<= #x0B4B code #x0B4D)
+ (<= #x0B56 code #x0B57) (<= #x0B82 code #x0B83)
+ (<= #x0BBE code #x0BC2) (<= #x0BC6 code #x0BC8)
+ (<= #x0BCA code #x0BCD) (= code #x0BD7) (<= #x0C01 code #x0C03)
+ (<= #x0C3E code #x0C44) (<= #x0C46 code #x0C48)
+ (<= #x0C4A code #x0C4D) (<= #x0C55 code #x0C56)
+ (<= #x0C82 code #x0C83) (<= #x0CBE code #x0CC4)
+ (<= #x0CC6 code #x0CC8) (<= #x0CCA code #x0CCD)
+ (<= #x0CD5 code #x0CD6) (<= #x0D02 code #x0D03)
+ (<= #x0D3E code #x0D43) (<= #x0D46 code #x0D48)
+ (<= #x0D4A code #x0D4D) (= code #x0D57) (= code #x0E31)
+ (<= #x0E34 code #x0E3A) (<= #x0E47 code #x0E4E) (= code #x0EB1)
+ (<= #x0EB4 code #x0EB9) (<= #x0EBB code #x0EBC)
+ (<= #x0EC8 code #x0ECD) (<= #x0F18 code #x0F19) (= code #x0F35)
+ (= code #x0F37) (= code #x0F39) (= code #x0F3E) (= code #x0F3F)
+ (<= #x0F71 code #x0F84) (<= #x0F86 code #x0F8B)
+ (<= #x0F90 code #x0F95) (= code #x0F97) (<= #x0F99 code #x0FAD)
+ (<= #x0FB1 code #x0FB7) (= code #x0FB9) (<= #x20D0 code #x20DC)
+ (= code #x20E1) (<= #x302A code #x302F) (= code #x3099) (= code #x309A)
+ )))
+
+(defun xml-digit-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (<= #x0030 code #x0039) (<= #x0660 code #x0669)
+ (<= #x06F0 code #x06F9) (<= #x0966 code #x096F)
+ (<= #x09E6 code #x09EF) (<= #x0A66 code #x0A6F)
+ (<= #x0AE6 code #x0AEF) (<= #x0B66 code #x0B6F)
+ (<= #x0BE7 code #x0BEF) (<= #x0C66 code #x0C6F)
+ (<= #x0CE6 code #x0CEF) (<= #x0D66 code #x0D6F)
+ (<= #x0E50 code #x0E59) (<= #x0ED0 code #x0ED9)
+ (<= #x0F20 code #x0F29)
+ )))
+
+(defun xml-extender-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (= code #x00B7) (= code #x02D0) (= code #x02D1) (= code #x0387) (= code #x0640)
+ (= code #x0E46) (= code #x0EC6) (= code #x3005) (<= #x3031 code #x3035)
+ (<= #x309D code #x309E) (<= #x30FC code #x30FE)
+ )))
+
+(defmacro xml-letter-p (char)
+ `(or (xml-base-char-p ,char) (xml-ideographic-p ,char)))
+
+(defmacro xml-name-char-p (char)
+ `(or (xml-letter-p ,char) (xml-digit-p ,char) (eq ,char #\.)
+ (eq ,char #\-) (eq ,char #\_) (eq ,char #\:)
+ (xml-combining-char-p ,char) (xml-extender-p ,char)))
+
+(defmacro xml-name-start-char-p (char)
+ `(or (xml-letter-p ,char)
+ (eq #\_ ,char) (eq #\: ,char)
+ ))
+
--- /dev/null
+;;
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+;; $Id: pxml1.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+;; Change Log
+;;
+;; 10/14/00 add namespace support; xml-error fix
+
+(in-package :net.xml.parser)
+
+(pxml-dribble-bug-hook "$Id: pxml1.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
+
+(defparameter *collectors* (list nil nil nil nil nil nil nil nil))
+
+(defun put-back-collector (col)
+ (declare (optimize (speed 3) (safety 1)))
+ (mp::without-scheduling
+ (do ((cols *collectors* (cdr cols)))
+ ((null cols)
+ ; toss it away
+ nil)
+ (if* (null (car cols))
+ then (setf (car cols) col)
+ (return)))))
+
+(defun pub-id-char-p (char)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((code (char-code char)))
+ (or (= #x20 code) (= #xD code) (= #xA code)
+ (<= (char-code #\a) code (char-code #\z))
+ (<= (char-code #\A) code (char-code #\Z))
+ (<= (char-code #\0) code (char-code #\9))
+ (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
+ #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
+
+(defparameter *keyword-package* (find-package :keyword))
+
+;; cache of tokenbuf structs
+(defparameter *tokenbufs* (list nil nil nil nil))
+
+(defstruct iostruct
+ unget-char ;; character pushed back
+ tokenbuf ;; main input tokenbuf
+ read-sequence-func ;; optional alternative to read-sequence
+ entity-bufs ;; active entity tokenbufs
+ entity-names ;; active entity names
+ parameter-entities
+ general-entities
+ do-entity ;; still substituting entity text
+ seen-any-dtd
+ seen-external-dtd
+ seen-parameter-reference
+ standalonep
+ uri-to-package
+ ns-to-package
+ ns-scope
+ )
+
+(defstruct tokenbuf
+ cur ;; next index to use to grab from tokenbuf
+ max ;; index one beyond last character
+ data ;; character array
+ stream ;; for external sources
+ )
+
+(defun get-tokenbuf ()
+ (declare (optimize (speed 3) (safety 1)))
+ (let (buf)
+ (mp::without-scheduling
+ (do* ((bufs *tokenbufs* (cdr bufs))
+ (this (car bufs) (car bufs)))
+ ((null bufs))
+ (if* this
+ then (setf (car bufs) nil)
+ (setq buf this)
+ (return))))
+ (if* buf
+ then (setf (tokenbuf-cur buf) 0)
+ (setf (tokenbuf-max buf) 0)
+ (setf (tokenbuf-stream buf) nil)
+ buf
+ else (make-tokenbuf
+ :cur 0
+ :max 0
+ :data (make-array 1024 :element-type 'character)))))
+
+(defstruct collector
+ next ; next index to set
+ max ; 1+max index to set
+ data ; string vector
+ )
+
+(defun compute-tag (coll &optional (package *keyword-package*) ns-to-package)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; compute the symbol named by what's in the collector
+ (if* (not ns-to-package)
+ then (excl::intern* (collector-data coll) (collector-next coll) package)
+ else
+ (let (new-package (data (collector-data coll)))
+ (if* (and (eq (schar data 0) #\x)
+ (eq (schar data 1) #\m)
+ (eq (schar data 2) #\l)
+ (eq (schar data 3) #\n)
+ (eq (schar data 4) #\s)
+ (or (eq (schar data 5) #\:)
+ (= (collector-next coll) 5)))
+ then ;; putting xmlns: in :none namespace
+ (setf new-package (assoc :none ns-to-package))
+ (when new-package (setf package (rest new-package)))
+ (excl::intern* (collector-data coll) (collector-next coll) package)
+ else
+ (let ((colon-index -1)
+ (data (collector-data coll)))
+ (dotimes (i (collector-next coll))
+ (when (eq (schar data i) #\:)
+ (setf colon-index i)
+ (return)))
+ (if* (> colon-index -1) then
+ (let ((string1 (make-string colon-index))
+ new-package string2)
+ (dotimes (i colon-index)
+ (setf (schar string1 i) (schar data i)))
+ (setf new-package (assoc string1 ns-to-package :test 'string=))
+ (if* new-package
+ then
+ (setf string2 (make-string (- (collector-next coll)
+ (+ 1 colon-index))))
+ (dotimes (i (- (collector-next coll)
+ (+ 1 colon-index)))
+ (setf (schar string2 i)
+ (schar data (+ colon-index 1 i))))
+ (excl::intern string2 (rest new-package))
+ else
+ (excl::intern* (collector-data coll)
+ (collector-next coll) package)))
+ else
+ (let ((new-package (assoc :none ns-to-package)))
+ (when new-package
+ (setf package (rest new-package))))
+ (excl::intern* (collector-data coll)
+ (collector-next coll) package)))
+ ))
+ ))
+
+(defun compute-coll-string (coll)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; return the string that's in the collection
+ (let ((str (make-string (collector-next coll)))
+ (from (collector-data coll)))
+ (dotimes (i (collector-next coll))
+ (setf (schar str i) (schar from i)))
+
+ str))
+
+(defun grow-and-add (coll ch)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; increase the size of the data portion of the collector and then
+ ;; add the given char at the end
+ (let* ((odata (collector-data coll))
+ (ndata (make-string (* 2 (length odata)))))
+ (dotimes (i (length odata))
+ (setf (schar ndata i) (schar odata i)))
+ (setf (collector-data coll) ndata)
+ (setf (collector-max coll) (length ndata))
+ (let ((next (collector-next coll)))
+ (setf (schar ndata next) ch)
+ (setf (collector-next coll) (1+ next)))))
+
+(defun put-back-tokenbuf (buf)
+ (declare (optimize (speed 3) (safety 1)))
+ (mp::without-scheduling
+ (do ((bufs *tokenbufs* (cdr bufs)))
+ ((null bufs)
+ ; toss it away
+ nil)
+ (if* (null (car bufs))
+ then (setf (car bufs) buf)
+ (return)))))
+
+(defun get-collector ()
+ (declare (optimize (speed 3) (safety 1)))
+ (let (col)
+ (mp::without-scheduling
+ (do* ((cols *collectors* (cdr cols))
+ (this (car cols) (car cols)))
+ ((null cols))
+ (if* this
+ then (setf (car cols) nil)
+ (setq col this)
+ (return))))
+ (if* col
+ then (setf (collector-next col) 0)
+ col
+ else (make-collector
+ :next 0
+ :max 100
+ :data (make-string 100)))))
+
+(defmacro next-char (tokenbuf read-sequence-func)
+ `(let ((cur (tokenbuf-cur ,tokenbuf))
+ (tb (tokenbuf-data ,tokenbuf)))
+ (if* (>= cur (tokenbuf-max ,tokenbuf))
+ then ;; fill buffer
+ (if* (or (not (tokenbuf-stream ,tokenbuf))
+ (zerop (setf (tokenbuf-max ,tokenbuf)
+ (if* ,read-sequence-func
+ then (funcall ,read-sequence-func tb
+ (tokenbuf-stream ,tokenbuf))
+ else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
+ then (setq cur nil) ;; eof
+ else (setq cur 0)))
+ (if* cur
+ then (prog1
+ (let ((cc (schar tb cur)))
+ (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
+ (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
+
+(defun get-next-char (iostruct)
+ (declare (optimize (speed 3) (safety 1)))
+ (let* (from-stream (tmp-char
+ (let (char)
+ (if* (iostruct-unget-char iostruct) then
+ ;; from-stream is used to do input CR/LF normalization
+ (setf from-stream t)
+ (setf char (first (iostruct-unget-char iostruct)))
+ (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
+ char
+ elseif (iostruct-entity-bufs iostruct) then
+ (let (entity-buf)
+ (loop
+ (setf entity-buf (first (iostruct-entity-bufs iostruct)))
+ (if* (streamp (tokenbuf-stream entity-buf))
+ then (setf from-stream t)
+ else (setf from-stream nil))
+ (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
+ (when char (return))
+ (when (streamp (tokenbuf-stream entity-buf))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
+ (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
+ (when (not (iostruct-entity-bufs iostruct)) (return))))
+ (if* char then char
+ else (next-char (iostruct-tokenbuf iostruct)
+ (iostruct-read-sequence-func iostruct)))
+ else (setf from-stream t)
+ (next-char (iostruct-tokenbuf iostruct)
+ (iostruct-read-sequence-func iostruct))))))
+ (if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char)))
+
+(defun unicode-check (p tokenbuf)
+ (declare (ignorable tokenbuf) (optimize (speed 3) (safety 1)))
+ ;; need no-OO check because external format support isn't completely done yet
+ (when (not (typep p 'string-input-simple-stream))
+ #+(version>= 6 0 pre-final 1)
+ (let ((format (ignore-errors (excl:sniff-for-unicode p))))
+ (if* (eq format (find-external-format :unicode))
+ then
+ (setf (stream-external-format p) format)
+ else
+ (setf (stream-external-format p) (find-external-format :utf8))))
+ #-(version>= 6 0 pre-final 1)
+ (let* ((c (read-char p nil)) c2
+ (c-code (if c (char-code c) nil)))
+ (if* (eq #xFF c-code) then
+ (setf c2 (read-char p nil))
+ (setf c-code (if c (char-code c2) nil))
+ (if* (eq #xFE c-code) then
+ (format t "set unicode~%")
+ (setf (stream-external-format p)
+ (find-external-format #+(version>= 6 0 pre-final 1) :unicode
+ #-(version>= 6 0 pre-final 1) :fat-little))
+ else
+ (xml-error "stream has incomplete Unicode marker"))
+ else (setf (stream-external-format p)
+ (find-external-format :utf8))
+ (when c
+ (push c (iostruct-unget-char tokenbuf))
+ #+ignore (unread-char c p) ;; bug when there is single ^M in file
+ )))))
+
+(defun add-default-values (val attlist-data)
+ (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
+ (if* (symbolp val)
+ then
+ (let* ((tag-defaults (assoc val attlist-data)) defaults)
+ (dolist (def (rest tag-defaults))
+ (if* (stringp (third def)) then
+ (push (first def) defaults)
+ (push (if (eq (second def) :CDATA) (third def)
+ (normalize-attrib-value (third def))) defaults)
+ elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
+ (push (first def) defaults)
+ (push (if (eq (second def) :CDATA) (fourth def)
+ (normalize-attrib-value (fourth def))) defaults)
+ ))
+ (if* defaults then
+ (setf val (append (list val) (nreverse defaults)))
+ else val)
+ )
+ else
+ ;; first make sure there are no errors in given list
+ (let ((pairs (rest val)))
+ (loop
+ (when (null pairs) (return))
+ (let ((this-one (first pairs)))
+ (setf pairs (rest (rest pairs)))
+ (when (member this-one pairs)
+ (xml-error (concatenate 'string "Entity: "
+ (string (first val))
+ " has multiple "
+ (string this-one)
+ " attribute values"))))))
+ (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
+ (dolist (def (rest tag-defaults))
+ (let ((old (member (first def) (rest val))))
+ (if* (not old) then
+ (if* (stringp (third def)) then
+ (push (first def) defaults)
+ (push (third def) defaults)
+ elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
+ (push (first def) defaults)
+ (push (fourth def) defaults))
+ else
+ (push (first old) defaults)
+ (push (second old) defaults))))
+ (if* defaults then
+ ;; now look for attributes in original list that weren't in dtd
+ (let ((tmp-val (rest val)) att att-val)
+ (loop
+ (when (null tmp-val) (return))
+ (setf att (first tmp-val))
+ (setf att-val (second tmp-val))
+ (setf tmp-val (rest (rest tmp-val)))
+ (when (not (member att defaults))
+ (push att defaults)
+ (push att-val defaults))))
+ (setf val (append (list (first val)) (nreverse defaults)))
+ else val))
+ ))
+
+(defun normalize-public-value (public-value)
+ (setf public-value (string-trim '(#\space) public-value))
+ (let ((count 0) (stop (length public-value)) (last-ch nil) cch)
+ (loop
+ (when (= count stop) (return public-value))
+ (setf cch (schar public-value count))
+ (if* (and (eq cch #\space) (eq last-ch #\space)) then
+ (setf public-value
+ (remove #\space public-value :start count :count 1))
+ (decf stop)
+ else (incf count)
+ (setf last-ch cch)))))
+
+
+(defun normalize-attrib-value (attrib-value &optional first-pass)
+ (declare (optimize (speed 3) (safety 1)))
+ (when first-pass
+ (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
+ (loop
+ (when (= count stop) (return))
+ (setf cch (schar attrib-value count))
+ (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
+ elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
+ (setf (schar attrib-value count) #\space)
+ elseif (and (eq cch #\newline) (eq last-ch #\return)) then
+ (setf attrib-value
+ (remove #\space attrib-value :start count :count 1))
+ (decf stop))
+ (incf count)
+ (setf last-ch cch))))
+ (setf attrib-value (string-trim '(#\space) attrib-value))
+ (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
+ (loop
+ (when (= count stop) (return attrib-value))
+ (setf cch (schar attrib-value count))
+ (if* (and (eq cch #\space) (eq last-ch #\space)) then
+ (setf attrib-value
+ (remove #\space attrib-value :start count :count 1))
+ (decf stop)
+ else (incf count)
+ (setf last-ch cch)))))
+
+(defun check-xmldecl (val tokenbuf)
+ (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
+ (when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val)))))
+ (xml-error "XML declaration tag does not include correct 'version' attribute"))
+ (when (and (fourth val)
+ (or (not (symbolp (fourth val)))
+ (and (not (string= "standalone" (symbol-name (fourth val))))
+ (not (string= "encoding" (symbol-name (fourth val)))))))
+ (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute"))
+ (when (and (fourth val) (string= "standalone" (symbol-name (fourth val))))
+ (if* (equal (fifth val) "yes") then
+ (setf (iostruct-standalonep tokenbuf) t)
+ elseif (not (equal (fifth val) "no")) then
+ (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
+ (dotimes (i (length (third val)))
+ (let ((c (schar (third val) i)))
+ (when (and (not (alpha-char-p c))
+ (not (digit-char-p c))
+ (not (member c '(#\. #\_ #\- #\:)))
+ )
+ (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
+ (when (and (fourth val) (eql :encoding (fourth val)))
+ (dotimes (i (length (fifth val)))
+ (let ((c (schar (fifth val) i)))
+ (when (and (not (alpha-char-p c))
+ (if* (> i 0) then
+ (and (not (digit-char-p c))
+ (not (member c '(#\. #\_ #\-))))
+ else t))
+ (xml-error "XML declaration tag does not include correct 'encoding' attribute value")))))
+ )
+
+(defun xml-error (text)
+ (declare (optimize (speed 3) (safety 1)))
+ (funcall 'error "~a" (concatenate 'string "XML not well-formed - " text)))
--- /dev/null
+;;
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+;; $Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+;; Change Log
+;;
+;; 10/14/00 add namespace support
+
+(in-package :net.xml.parser)
+
+(pxml-dribble-bug-hook "$Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
+
+;; state titles can be better chosen and explained
+
+(defvar *debug-xml* nil)
+
+(defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities
+ content-only uri-to-package)
+ (declare (optimize (speed 3) (safety 1)))
+ (parse-xml (make-string-input-stream str) :external-callback external-callback
+ :general-entities general-entities
+ :parameter-entities parameter-entities :content-only content-only
+ :uri-to-package uri-to-package))
+
+(defmethod parse-xml ((p stream) &key external-callback general-entities
+ parameter-entities content-only uri-to-package)
+ (declare (optimize (speed 3) (safety 1)))
+ (pxml-internal0 p nil external-callback general-entities parameter-entities content-only
+ uri-to-package))
+
+(eval-when (compile load eval)
+ (defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element
+ (defconstant state-docstart-misc 1) ;; looking for Misc, doctypedecl, 1st element
+ (defconstant state-docstart-misc2 2) ;; looking for Misc, 1st element
+ (defconstant state-element-done 3) ;; looking for Misc
+ (defconstant state-element-contents 4) ;; looking for element content
+ )
+
+(defun all-xml-whitespace-p (val)
+ (dotimes (i (length val) t)
+ (when (not (xml-space-p (elt val i))) (return nil))))
+
+(defun pxml-internal0 (p read-sequence-func external-callback
+ general-entities parameter-entities content-only uri-to-package)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf)
+ :do-entity t
+ :read-sequence-func read-sequence-func)))
+ ;; set up stream right
+ (setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p)
+ ;; set up user specified entities
+ (setf (iostruct-parameter-entities tokenbuf) parameter-entities)
+ (setf (iostruct-general-entities tokenbuf) general-entities)
+ (setf (iostruct-uri-to-package tokenbuf) uri-to-package)
+ ;; look for Unicode file
+ (unicode-check p tokenbuf)
+ (unwind-protect
+ (values (pxml-internal tokenbuf external-callback content-only)
+ (iostruct-uri-to-package tokenbuf))
+ (dolist (entity-buf (iostruct-entity-bufs tokenbuf))
+ (when (streamp (tokenbuf-stream entity-buf))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))))
+ ))
+
+(defun pxml-internal (tokenbuf external-callback content-only)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((state state-docstart)
+ (guts)
+ (pending)
+ (attlist-data)
+ (public-string)
+ (system-string)
+ (entity-open-tags)
+ )
+
+ (loop
+ (multiple-value-bind (val kind kind2)
+ (next-token tokenbuf external-callback attlist-data)
+ (when *debug-xml*
+ (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
+ (case state
+ (#.state-docstart
+ (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
+ then
+ (check-xmldecl val tokenbuf)
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc)
+ elseif (eq kind :comment)
+ then
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc)
+ elseif (and (listp val) (eq :DOCTYPE (first val)))
+ then
+ (if* (eq (third val) :SYSTEM) then
+ (setf system-string (fourth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ elseif (eq (third val) :PUBLIC) then
+ (setf public-string (normalize-public-value (fourth val)))
+ (setf system-string (fifth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val)))
+ (when system-string
+ (if* external-callback then
+ (let ((ext-stream (apply external-callback
+ (list (parse-uri system-string)
+ :DOCTYPE
+ public-string
+ ))))
+ (when ext-stream
+ (let (ext-io (entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) ext-stream)
+ (setf ext-io (make-iostruct :tokenbuf entity-buf
+ :do-entity
+ (iostruct-do-entity tokenbuf)
+ :read-sequence-func
+ (iostruct-read-sequence-func tokenbuf)))
+ (unicode-check ext-stream ext-io)
+ (setf (iostruct-parameter-entities ext-io)
+ (iostruct-parameter-entities tokenbuf))
+ (setf (iostruct-general-entities ext-io)
+ (iostruct-general-entities tokenbuf))
+ (unwind-protect
+ (setf val (append val
+ (list (append
+ (list :external)
+ (parse-dtd
+ ext-io
+ t external-callback)))))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (setf (iostruct-seen-external-dtd tokenbuf) t)
+ (setf (iostruct-seen-parameter-reference tokenbuf)
+ (iostruct-seen-parameter-reference ext-io))
+ (setf (iostruct-general-entities tokenbuf)
+ (iostruct-general-entities ext-io))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (iostruct-parameter-entities ext-io))
+ (setf (iostruct-do-entity tokenbuf)
+ (iostruct-do-entity ext-io))
+ (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
+ (when (streamp (tokenbuf-stream entity-buf2))
+ (close (tokenbuf-stream entity-buf2))
+ (put-back-tokenbuf entity-buf2)))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ )))
+ else
+ (setf (iostruct-do-entity tokenbuf) nil)))
+ (setf attlist-data
+ (process-attlist (rest (rest val)) attlist-data))
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc2)
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ (setf state state-docstart-misc)
+ elseif (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ (setf state state-docstart-misc)
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (if* (null guts) then
+ " at start of contents"
+ else
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))))
+ else
+ (print (list val kind kind2))
+ (break "need to check for other allowable docstarts")))
+ (#.state-docstart-misc2
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (and (listp val) (eq :comment (first val)))
+ then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ elseif (eq kind :eof)
+ then
+ (xml-error "unexpected end of file encountered")
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (if* (null guts) then
+ " at start of contents"
+ else
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))))
+ else
+ (error "this branch unexpected <1>")))
+ (#.state-docstart-misc
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (and (listp val) (eq :DOCTYPE (first val)))
+ then
+ (if* (eq (third val) :SYSTEM) then
+ (setf system-string (fourth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ elseif (eq (third val) :PUBLIC) then
+ (setf public-string (normalize-public-value (fourth val)))
+ (setf system-string (fifth val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val))
+ (setf val (remove (third val) val)))
+ (when system-string
+ (if* external-callback then
+ (let ((ext-stream (apply external-callback
+ (list (parse-uri system-string)
+ :DOCTYPE
+ public-string
+ ))))
+ (when ext-stream
+ (let (ext-io (entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) ext-stream)
+ (setf ext-io (make-iostruct :tokenbuf entity-buf
+ :do-entity
+ (iostruct-do-entity tokenbuf)
+ :read-sequence-func
+ (iostruct-read-sequence-func tokenbuf)))
+ (unicode-check ext-stream ext-io)
+ (setf (iostruct-parameter-entities ext-io)
+ (iostruct-parameter-entities tokenbuf))
+ (setf (iostruct-general-entities ext-io)
+ (iostruct-general-entities tokenbuf))
+ (unwind-protect
+ (setf val (append val
+ (list (append
+ (list :external)
+ (parse-dtd
+ ext-io
+ t external-callback)))))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (setf (iostruct-seen-external-dtd tokenbuf) t)
+ (setf (iostruct-seen-parameter-reference tokenbuf)
+ (iostruct-seen-parameter-reference ext-io))
+ (setf (iostruct-general-entities tokenbuf)
+ (iostruct-general-entities ext-io))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (iostruct-parameter-entities ext-io))
+ (setf (iostruct-do-entity tokenbuf)
+ (iostruct-do-entity ext-io))
+ (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
+ (when (streamp (tokenbuf-stream entity-buf2))
+ (close (tokenbuf-stream entity-buf2))
+ (put-back-tokenbuf entity-buf2)))
+ (close (tokenbuf-stream entity-buf))
+ (put-back-tokenbuf entity-buf))
+ )))
+ else
+ (setf (iostruct-do-entity tokenbuf) nil)))
+ (setf attlist-data
+ (process-attlist (rest (rest val)) attlist-data))
+ (when (not content-only) (push val guts))
+ (setf state state-docstart-misc2)
+ elseif (and (listp val) (eq :comment (first val)))
+ then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (push val guts)
+ elseif (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then (push (list val) guts)
+ (setf state state-element-done)
+ elseif (eq kind :start-tag)
+ then (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ (setf state state-element-contents)
+ else (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'"))))
+ else
+ (print (list val kind kind2))
+ (break "check for other docstart-misc states")))
+ (#.state-element-contents
+ (if* (or (symbolp val)
+ (and (listp val) (symbolp (first val))))
+ then
+ (when (eq kind :start-tag)
+ (setf val (add-default-values val attlist-data)))
+ (if* (eq kind :end-tag)
+ then (let ((candidate (first (first pending))))
+ (when (listp candidate) (setf candidate (first candidate)))
+ (if* (eq candidate val)
+ then
+ (if* (iostruct-entity-bufs tokenbuf) then
+ (when (not (eq (first entity-open-tags) val))
+ (xml-error
+ (concatenate 'string
+ (string val)
+ " element closed in entity that did not open it")))
+ (setf entity-open-tags (rest entity-open-tags))
+ else
+ (when (eq (first entity-open-tags) val)
+ (xml-error
+ (concatenate 'string
+ (string val)
+ " element closed outside of entity that did not open it")))
+ )
+ (if* (= (length pending) 1)
+ then
+ (push (first pending) guts)
+ (setf state state-element-done)
+ else
+ (setf (second pending)
+ (append (second pending) (list (first pending)))))
+ (setf pending (rest pending))
+ ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
+ else (xml-error (format nil
+ "encountered end tag: ~s expected: ~s"
+ val candidate))))
+ elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
+ then
+ (setf (first pending)
+ (append (first pending) (list (list val))))
+ ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
+ elseif (eq kind :start-tag)
+ then
+ (push (list val) pending)
+ ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
+ (when (iostruct-entity-bufs tokenbuf)
+ (push (if (symbolp val) val (first val)) entity-open-tags))
+ elseif (eq kind :cdata) then
+ (setf (first pending)
+ (append (first pending) (rest val)))
+ (let ((old (first pending))
+ (new))
+ (dolist (item old)
+ (if* (and (stringp (first new)) (stringp item)) then
+ (setf (first new)
+ (concatenate 'string (first new) item))
+ else (push item new)))
+ (setf (first pending) (reverse new)))
+ elseif (eq kind :comment) then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then
+ (setf (first pending)
+ (append (first pending) (list val)))
+ elseif (eq kind :eof)
+ then
+ (xml-error "unexpected end of file encountered")
+ else (xml-error (format nil "unexpected token: ~s" val)))
+ elseif (eq kind :pcdata)
+ then
+ (setf (first pending)
+ (append (first pending) (list val)))
+ (let ((old (first pending))
+ (new))
+ (dolist (item old)
+ (if* (and (stringp (first new)) (stringp item)) then
+ (setf (first new)
+ (concatenate 'string (first new) item))
+ else (push item new)))
+ (setf (first pending) (reverse new)))
+ else (xml-error (format nil "unexpected token: ~s" val))))
+ (#.state-element-done
+ (if* (eq kind :pcdata)
+ then
+ (when (or (not kind2) (not (all-xml-whitespace-p val)))
+ (if* (not kind2) then
+ (xml-error "An entity reference occured where only whitespace or the first element may occur")
+ else
+ (xml-error (concatenate 'string
+ "unrecognized content '"
+ (subseq val 0 (min (length val) 40)) "'"))))
+ elseif (eq kind :eof) then
+ (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
+ (return (nreverse guts))
+ elseif (eq kind :comment) then
+ (when (not content-only) (push val guts))
+ elseif (eq kind :pi)
+ then (push val guts)
+ else
+ (xml-error (concatenate 'string
+ "encountered token at illegal syntax position: '"
+ (string kind) "'"
+ (concatenate 'string
+ " following: '"
+ (format nil "~s" (first guts))
+ "'")))
+ ))
+ (t
+ (error "need to support state:~s token:~s kind:~s kind2:~s <parse>" state val kind kind2)))
+ ))))
+
+(eval-when (compile load eval)
+ (defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data
+ (defconstant state-readtagfirst 1) ;; seen < looking for /,?,!,name start
+ (defconstant state-readtag-? 2) ;; seen <? looking for space,char
+ (defconstant state-readtag-! 3) ;; seen <! looking for name,[,-
+ (defconstant state-readtag-end 4) ;; found </ looking for tag name
+ (defconstant state-readtag 5) ;; found < name start looking for more name, /, >
+ (defconstant state-findattributename 6) ;; found <?xml space looking for ?,>,space,name start
+ (defconstant state-readpi 7)
+ (defconstant state-noattributename 8)
+ (defconstant state-attribname 9) ;; found <?xml space name start looking for more name,=
+ (defconstant state-attribstartvalue 10) ;; found <?xml space name= looking for ',"
+ (defconstant state-attribvaluedelim 11)
+ (defconstant state-readtag-!-name 12) ;; seen <!name(start) looking for more name chars or space
+ (defconstant state-readtag-!-conditional 13) ;; found <![ looking for CDATA, INCLUDE, IGNORE
+ (defconstant state-readtag-!-comment 14)
+ (defconstant state-readtag-!-readcomment 15)
+ (defconstant state-readtag-!-readcomment2 16)
+ (defconstant state-readtag-end-bracket 17)
+ (defconstant state-readpi2 18) ;; found <?name space char looking for char,?
+ (defconstant state-prereadpi 19);; found <?name space looking for space,character
+ (defconstant state-pre-!-contents 20) ;; found <!name space looking for > or contents
+ (defconstant state-!-contents 21) ;; found <!name space name start looking for more name,>,[,space
+ (defconstant state-!-doctype 22) ;; found <!DOCTYPE space looking for space,>,[,name
+ (defconstant state-begin-dtd 23)
+ (defconstant state-!-doctype-ext 24) ;; found <!DOCTYPE space name space name start looking for name,space
+ (defconstant state-!-doctype-system 25) ;; found <!DOCTYPE name SYSTEM looking for ',"
+ (defconstant state-!-doctype-public 26) ;; found <!DOCTYPE name PUBLIC looking for ',"
+ (defconstant state-!-doctype-system2 27) ;; found <!DOCTYPE name SYSTEM " looking for chars,"
+ (defconstant state-!-doctype-system3 28) ;; found <!DOCTYPE name SYSTEM ' looking for chars,'
+ (defconstant state-!-doctype-ext2 29) ;; found <!DOCTYPE name SYSTEM/PUBLIC etc. looking for space,>,[
+ (defconstant state-!-doctype-ext3 30) ;; processed DTD looking for space,>
+ (defconstant state-!-doctype-public2 31) ;; found <!DOCTYPE name PUBLIC " looking for text or "
+ (defconstant state-!-doctype-public3 32) ;; found <!DOCTYPE name PUBLIC ' looking for text or '
+ (defconstant state-readtag2 33) ;; found <name looking for space,/,>,attrib name
+ (defconstant state-readtag3 34) ;; found <name/ or <name / looking for >
+ (defconstant state-readtag4 35) ;; found <name attrib-name start looking for more name,=
+ (defconstant state-readtag5 36) ;; found attrib= looking for ',"
+ (defconstant state-readtag6 37) ;; found attrib=['"] looking for end delimiter,value,reference
+ (defconstant state-readtag7 38) ;; found & inside attribute value, looking for # or name start
+ (defconstant state-readtag8 39) ;; found &# in attribute value looking for char code
+ (defconstant state-readtag9 40) ;; found &name start looking for more name,;
+ (defconstant state-readtag10 41) ;; found &#x in attribute value looking for hex code
+ (defconstant state-readtag11 42) ;; found &#[0-9] looking for more digits,;
+ (defconstant state-readtag-end2 43) ;; found </ & tag name start looking for more tag, space, >
+ (defconstant state-readtag-end3 44) ;; found </ end tag name space looking for >
+ (defconstant state-pcdata2 45) ;; seen & looking for name start
+ (defconstant state-pcdata3 46) ;; seen &# looking for character reference code
+ (defconstant state-pcdata4 47) ;; working on entity reference name looking for ;
+ (defconstant state-pcdata5 48) ;; working on hex character code reference
+ (defconstant state-pcdata6 49) ;; working on decimal character code reference
+ (defconstant state-findattributename0 50)
+ (defconstant state-readtag6a 51)
+ (defconstant state-readtag-!-conditional4 52)
+ (defconstant state-readtag-!-conditional5 53)
+ (defconstant state-readtag-!-conditional6 54)
+ (defconstant state-readtag-!-conditional7 55)
+ ;;(defconstant state-pcdata-parsed 56)
+ (defconstant state-pcdata7 57)
+ (defconstant state-pcdata8 58)
+ (defconstant state-readtag12 59)
+ (defconstant state-attribname2 60)
+ )
+
+(defun next-token (tokenbuf external-callback attlist-data)
+ (declare (optimize (speed 3) (safety 1)))
+ ;; return two values:
+ ;; the next token from the stream.
+ ;; the kind of token
+ ;;
+ ;; if read-sequence-func is non-nil,
+ ;; read-sequence-func is called to fetch the next character
+ (macrolet ((add-to-entity-buf (entity-symbol p-value)
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
+ (iostruct-entity-bufs tokenbuf))))
+
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
+
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
+
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
+
+ (to-preferred-case (ch)
+ ;; should check the case mode
+ `(char-downcase ,ch))
+
+ )
+
+ (let ((state state-pcdata)
+ (coll (get-collector))
+ (entity (get-collector))
+ (tag-to-return)
+ (tag-to-return-string)
+ (attrib-name)
+ (empty-delim)
+ (value-delim)
+ (attrib-value)
+ (attribs-to-return)
+ (contents-to-return)
+ (char-code 0)
+ (special-tag-count 0)
+ (attrib-value-tokenbuf)
+ (last-ch)
+ (cdatap t)
+ (pcdatap t)
+ (entity-source)
+ (ns-token)
+ (ch))
+
+ (loop
+
+ (setq ch (get-next-char tokenbuf))
+ (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
+ ch (char-code ch) state (iostruct-entity-names tokenbuf)))
+ (if* (null ch)
+ then (return) ; eof -- exit loop
+ )
+
+
+ (case state
+ (#.state-pcdata
+ (if* (eq ch #\<)
+ then
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ (if* (> (collector-next coll) 0)
+ then ; have collected something, return this string
+ (un-next-char ch) ; push back the <
+ (return)
+ else ; collect a tag
+ (setq state state-readtagfirst))
+ elseif (eq #\& ch)
+ then (setf state state-pcdata2)
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ (setf pcdatap nil)
+ elseif (eq #\] ch) then (setf state state-pcdata7)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in input"))
+ else
+ (add-to-coll coll ch)
+ #+ignore
+ (if* (not (eq ch #\return))
+ then (add-to-coll coll ch))))
+
+ (#.state-pcdata7
+ (if* (eq #\] ch) then (setf state state-pcdata8)
+ else (setf state state-pcdata)
+ (add-to-coll coll #\]) (un-next-char ch)))
+
+ (#.state-pcdata8
+ (if* (eq #\> ch) then
+ (add-to-coll coll #\])
+ (add-to-coll coll #\])
+ (add-to-coll coll #\>)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "content cannot contain ']]>':'"
+ (compute-coll-string coll)
+ "'"))
+ elseif (eq #\] ch) then
+ (add-to-coll coll #\])
+ else (setf state state-pcdata)
+ (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
+
+ (#.state-pcdata2
+ (if* (eq #\# ch)
+ then (setf state state-pcdata3)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-pcdata4)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal reference name, starting at: '&"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-pcdata3
+ (if* (eq #\x ch)
+ then (setf state state-pcdata5)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-pcdata6)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character reference code, starting at: '&#"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-pcdata4
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ elseif (eq #\; ch)
+ then (let ((entity-symbol (compute-tag entity)))
+ (clear-coll entity)
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
+ elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
+ elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
+ elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
+ elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
+ else
+ (let (p-value)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-general-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (add-to-entity-buf entity-symbol p-value)
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (if* entity-stream then
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ ;; check for possible external textdecl
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (swallow-xml-token
+ tokenbuf
+ external-callback)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ )
+ else
+ (xml-error (concatenate 'string
+ "Reference to unparsed entity "
+ (string entity-symbol)))
+ ))
+ )
+ elseif (or (not (iostruct-seen-any-dtd tokenbuf))
+ (iostruct-standalonep tokenbuf)
+ (and (iostruct-seen-any-dtd tokenbuf)
+ (not (iostruct-seen-external-dtd tokenbuf))
+ (not (iostruct-seen-parameter-reference tokenbuf))))
+ then
+ (xml-error (concatenate 'string
+ (string entity-symbol)
+ " must have entity declaration before being referenced"))
+ ))
+ ))
+ (setq state state-pcdata)
+ else (let ((tmp (compute-coll-string entity)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "reference not terminated by ';', starting at: '&"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+
+ (#.state-pcdata5
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (when (not (xml-char-p (code-char char-code)))
+ (xml-error
+ (concatenate 'string
+ "Character reference: "
+ (format nil "~s" char-code)
+ " (decimal) is not valid XML input character")))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-pcdata)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal hexidecimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+
+ (#.state-pcdata6
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (when (not (xml-char-p (code-char char-code)))
+ (xml-error
+ (concatenate 'string
+ "Character reference: "
+ (format nil "~s" char-code)
+ " (decimal) is not valid XML input character")))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-pcdata)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal decimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+
+ (#.state-readtag-end
+ (if* (xml-name-start-char-p ch)
+ then (setf state state-readtag-end2)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '</"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-end2
+ (if* (xml-name-char-p ch)
+ then (add-to-coll coll ch)
+ elseif (eq #\> ch) then
+ (let ((tag-string (compute-coll-string coll)))
+ (when (and (iostruct-ns-scope tokenbuf)
+ (string= tag-string
+ (first (first (iostruct-ns-scope tokenbuf)))))
+ (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
+ (setf (iostruct-ns-to-package tokenbuf)
+ (remove (assoc item (iostruct-ns-to-package tokenbuf))
+ (iostruct-ns-to-package tokenbuf))))
+ (setf (iostruct-ns-scope tokenbuf)
+ (rest (iostruct-ns-scope tokenbuf)))))
+ (setq tag-to-return (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (return)
+ elseif (xml-space-p ch) then (setf state state-readtag-end3)
+ (let ((tag-string (compute-coll-string coll)))
+ (when (and (iostruct-ns-scope tokenbuf)
+ (string= tag-string
+ (first (first (iostruct-ns-scope tokenbuf)))))
+ (setf (iostruct-ns-scope tokenbuf)
+ (rest (iostruct-ns-scope tokenbuf)))))
+ (setq tag-to-return (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '</"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+
+ (#.state-readtag-end3
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal end tag name, starting at: '"
+ (compute-coll-string coll)
+ "' end tag name: " tmp )))
+ ))
+
+ (#.state-readtagfirst
+ ; starting to read a tag name
+ (if* (eq #\/ ch)
+ then (setf state state-readtag-end)
+ elseif (eq #\? ch)
+ then (setf state state-readtag-?)
+ (setf empty-delim #\?)
+ elseif (eq #\! ch)
+ then (setf state state-readtag-!)
+ (setf empty-delim nil)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-readtag)
+ (setf empty-delim #\/)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<', starting at '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!
+ (if* (xml-name-start-char-p ch)
+ then
+ (setf state state-readtag-!-name)
+ (un-next-char ch)
+ elseif (eq #\[ ch)
+ then
+ (setf state state-readtag-!-conditional)
+ elseif (eq #\- ch)
+ then
+ (setf state state-readtag-!-comment)
+ else
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<!', starting at '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-conditional
+ (if* (eq #\C ch) then
+ (setf state state-readtag-!-conditional4)
+ (setf special-tag-count 1)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character following '<![', starting at '<!["
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-conditional4
+ (if* (not (eq (elt "CDATA[" special-tag-count) ch))
+ then (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![', starting at '<!["
+ (subseq "CDATA[" 0 special-tag-count)
+ (compute-coll-string coll)
+ "'"))
+ elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
+ else (incf special-tag-count)))
+
+ (#.state-readtag-!-conditional5
+ (if* (eq #\] ch)
+ then (setf state state-readtag-!-conditional6)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in CDATA input"))
+ else (add-to-coll coll ch)))
+
+ (#.state-readtag-!-conditional6
+ (if* (eq #\] ch)
+ then (setf state state-readtag-!-conditional7)
+ else (setf state state-readtag-!-conditional5)
+ (add-to-coll coll #\])
+ (add-to-coll coll ch)))
+
+ (#.state-readtag-!-conditional7
+ (if* (eq #\> ch)
+ then
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ "CDATA cannot be constructed from entity reference/character data sequence")
+ else
+ (setf entity-source nil))
+ (return)
+ elseif (eq #\] ch) then
+ (add-to-coll coll #\]) ;; come back here to check again
+ else (setf state state-readtag-!-conditional5)
+ (add-to-coll coll #\])
+ (add-to-coll coll #\])
+ (add-to-coll coll ch)))
+
+ (#.state-readtag-!-comment
+ (if* (eq #\- ch)
+ then (setf state state-readtag-!-readcomment)
+ (setf tag-to-return :comment)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![-', starting at '<!-"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag-!-readcomment
+ (if* (eq #\- ch)
+ then (setf state state-readtag-!-readcomment2)
+ elseif (not (xml-char-p ch)) then
+ (xml-error (concatenate 'string
+ "Illegal character: "
+ (string ch)
+ " detected in input"))
+ else (add-to-coll coll ch)))
+
+ (#.state-readtag-!-readcomment2
+ (if* (eq #\- ch)
+ then (setf state state-readtag-end-bracket)
+ else (setf state state-readtag-!-readcomment)
+ (add-to-coll coll #\-) (add-to-coll coll ch)))
+
+ (#.state-readtag-end-bracket
+ (if* (eq #\> ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string tag-to-return)
+ " tag cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '--' comment terminator, starting at '--"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-readtag
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (if* (xml-space-p ch) then
+ (setf tag-to-return-string (compute-coll-string coll))
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (setf state state-readtag2)
+ elseif (eq #\> ch) then
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (return)
+ elseif (eq #\/ ch) then
+ (setq tag-to-return
+ (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (setf state state-readtag3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token name, starting at '"
+ (compute-coll-string coll)
+ "'"))
+ )))
+
+ (#.state-readtag2
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\/ ch) then (setf state state-readtag3)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-readtag4)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token, starting at '"
+ (compute-coll-string coll)
+ "' following element token start: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag4
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (eq #\= ch) then
+ (setq attrib-name (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (let ((name (symbol-name attrib-name)))
+ (when (and (>= (length name) 5)
+ (string= name "xmlns" :end1 5))
+ (if* (= (length name) 5)
+ then
+ (setf ns-token :none)
+ elseif (eq (schar name 5) #\:)
+ then
+ (setf ns-token (subseq name 6)))))
+ (setf state state-readtag5)
+ elseif (xml-space-p ch) then
+ (setq attrib-name (compute-tag coll *package*
+ (iostruct-ns-to-package tokenbuf)))
+ (clear-coll coll)
+ (let ((name (symbol-name attrib-name)))
+ (when (and (>= (length name) 5)
+ (string= name "xmlns" :end1 5))
+ (if* (= (length name) 5)
+ then
+ (setf ns-token :none)
+ else
+ (setf ns-token (subseq name 6)))))
+ (setf state state-readtag12)
+ else (let ((tmp (compute-coll-string coll)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "looking for attribute '=', found: '"
+ (compute-coll-string coll)
+ "' following attribute name: " tmp)))
+ ))
+
+ (#.state-readtag12
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\= ch) then (setf state state-readtag5)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "looking for attribute '=', found: '"
+ (compute-coll-string coll)
+ "' following attribute name: " (string attrib-name)))))
+
+ (#.state-readtag5
+ ;; begin to collect attribute value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (let* ((tag-defaults (assoc tag-to-return attlist-data))
+ (this-attrib (assoc attrib-name tag-defaults)))
+ (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
+ (setf cdatap nil))
+ )
+ (setq state state-readtag6)
+ elseif (xml-space-p ch) then nil
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value not delimited by ' or \" : '"
+ (compute-coll-string coll)
+ "' following attribute: " (string attrib-name)))
+ ))
+
+ (#.state-readtag6
+ (let ((from-entity (and attrib-value-tokenbuf
+ (eq attrib-value-tokenbuf
+ (first (iostruct-entity-bufs tokenbuf))))))
+ (when (not from-entity) (setf attrib-value-tokenbuf nil))
+ (if* from-entity then
+ (if* (eq #\newline ch) then (setf ch #\space)
+ elseif (eq #\return ch) then (setf ch #\space)
+ elseif (eq #\tab ch) then (setf ch #\space)
+ ))
+ (if* (and (not from-entity) (eq ch value-delim))
+ then (setq attrib-value (compute-coll-string coll))
+ (when (not cdatap)
+ (setf attrib-value (normalize-attrib-value attrib-value)))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (when ns-token
+ (let ((package (assoc (parse-uri attrib-value)
+ (iostruct-uri-to-package tokenbuf)
+ :test 'uri=)))
+ (if* package then (setf package (rest package))
+ else
+ (setf package
+ (let ((i 0) new-package)
+ (loop
+ (let* ((candidate (concatenate 'string
+ "net.xml.namespace."
+ (format nil "~s" i)))
+ (exists (find-package candidate)))
+ (if* exists
+ then (incf i)
+ else (setf new-package (make-package candidate))
+ (setf (iostruct-uri-to-package tokenbuf)
+ (acons (parse-uri attrib-value) new-package
+ (iostruct-uri-to-package tokenbuf)))
+ (return new-package)))))))
+ (setf (iostruct-ns-to-package tokenbuf)
+ (acons ns-token package (iostruct-ns-to-package tokenbuf)))
+ )
+ (if* (and (first (iostruct-ns-scope tokenbuf))
+ (string= (first (first (iostruct-ns-scope tokenbuf)))
+ tag-to-return-string))
+ then
+ (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
+ else
+ (push (list tag-to-return-string (list ns-token))
+ (iostruct-ns-scope tokenbuf)))
+ (setf ns-token nil))
+ (setq state state-readtag6a)
+ elseif (eq #\newline ch) then
+ (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
+ elseif (or (eq #\tab ch) (eq #\return ch)) then
+ (add-to-coll coll #\space)
+ elseif (eq #\& ch)
+ then (setq state state-readtag7)
+ (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value cannot contain '<': '"
+ (compute-coll-string coll)
+ "' following attribute: " (string attrib-name)))
+ )
+ (setf last-ch ch)))
+
+ (#.state-readtag6a
+ (if* (xml-space-p ch) then (setf state state-readtag2)
+ elseif (eq #\> ch) then (setf state state-readtag2)
+ (return)
+ elseif (eq #\/ ch) then (setf state state-readtag3)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal token, starting at '"
+ (compute-coll-string coll)
+ "' following element token start: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag7
+ (if* (eq #\# ch)
+ then (setf state state-readtag8)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-readtag9)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal reference name: '&"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag8
+ (if* (eq #\x ch)
+ then (setf state state-readtag10)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-readtag11)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag10
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-readtag6)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal hexidecimal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ )))
+
+ (#.state-readtag11
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string (code-char char-code))
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-readtag6)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal decimal character reference code: '"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ )))
+
+ (#.state-readtag9
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ elseif (eq #\; ch)
+ then (let ((entity-symbol (compute-tag entity)))
+ (clear-coll entity)
+ (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
+ (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " reference cannot be constructed from entity reference/character data sequence"))
+ else
+ (setf entity-source nil))
+ (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
+ elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
+ elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
+ elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
+ elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
+ else (let (p-value)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-general-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (add-to-entity-buf entity-symbol p-value)
+ (when (not attrib-value-tokenbuf)
+ (setf attrib-value-tokenbuf
+ (first (iostruct-entity-bufs tokenbuf))))
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (if* entity-stream then
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ ;; check for possible external textdecl
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (swallow-xml-token
+ tokenbuf
+ external-callback)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ )
+ else
+ (xml-error (concatenate 'string
+ "Reference to unparsed entity "
+ (string entity-symbol)))
+ ))
+ )
+ elseif (or (not (iostruct-seen-any-dtd tokenbuf))
+ (and (iostruct-seen-any-dtd tokenbuf)
+ (not (iostruct-seen-external-dtd tokenbuf))
+ (not (iostruct-seen-parameter-reference tokenbuf))))
+ then
+ (xml-error (concatenate 'string
+ (string entity-symbol)
+ " must have entity declaration before being referenced"))
+ ))
+ ))
+ (setq state state-readtag6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "attribute value contains illegal reference name: '&"
+ (compute-coll-string coll)
+ "' in attribute value for: " (string attrib-name)))
+ ))
+
+ (#.state-readtag3
+ (if* (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected '>' found '"
+ (compute-coll-string coll)
+ "' in element: " (string tag-to-return)))
+ ))
+
+ (#.state-readtag-!-name
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (when (not (xml-space-p ch))
+ (xml-error (concatenate 'string
+ "expecting whitespace following: '<!"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'")))
+ (setq tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (setf state state-pre-!-contents)))
+
+ (#.state-readtag-?
+ (if* (xml-name-char-p ch)
+ then
+ (add-to-coll coll ch)
+ else
+ (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
+ (xml-error (concatenate 'string
+ "expecting name following: '<?"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'"))
+ )
+ (when (= (collector-next coll) 0)
+ (xml-error "null <? token"))
+ (if* (and (= (collector-next coll) 3)
+ (eq (elt (collector-data coll) 0) #\x)
+ (eq (elt (collector-data coll) 1) #\m)
+ (eq (elt (collector-data coll) 2) #\l)
+ )
+ then
+ (when (eq #\? ch) (xml-error "null <?xml token"))
+ (setq tag-to-return :xml)
+ (setf state state-findattributename)
+ elseif (and (= (collector-next coll) 3)
+ (or (eq (elt (collector-data coll) 0) #\x)
+ (eq (elt (collector-data coll) 0) #\X))
+ (or (eq (elt (collector-data coll) 1) #\m)
+ (eq (elt (collector-data coll) 1) #\M))
+ (or (eq (elt (collector-data coll) 2) #\l)
+ (eq (elt (collector-data coll) 2) #\L))
+ ) then
+ (xml-error "<?xml tag must be all lower case")
+ else
+ (setq tag-to-return (compute-tag coll))
+ (when (eq #\? ch) (un-next-char ch))
+ (setf state state-prereadpi))
+ (clear-coll coll)))
+
+ (#.state-pre-!-contents
+ (if* (xml-space-p ch)
+ then nil
+ elseif (not (xml-char-p ch))
+ then (xml-error (concatenate 'string ;; no test for this...
+ "illegal character '"
+ (string ch)
+ " following <!" (string tag-to-return)))
+ elseif (eq #\> ch)
+ then (return)
+ else (un-next-char ch)
+ (setf state state-!-contents)))
+
+ (#.state-begin-dtd
+ (un-next-char ch)
+ (let ((val (parse-dtd tokenbuf nil external-callback)))
+ (setf (iostruct-seen-any-dtd tokenbuf) t)
+ (push (append (list :[) val)
+ contents-to-return))
+ (setf state state-!-doctype-ext3))
+
+ (#.state-!-contents
+ (if* (xml-name-char-p ch)
+ then (add-to-coll coll ch)
+ elseif (eq #\> ch)
+ then (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ elseif (eq #\[ ch)
+ then (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-begin-dtd)
+ elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
+ ;; look at tag-to-return and set state accordingly
+ then (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype)
+ else (xml-error
+ (concatenate 'string
+ "illegal name: '"
+ (string tag-to-return)
+ "' in <! tag: "))
+ ))
+
+ (#.state-!-doctype-ext
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ else
+ (when (not (xml-space-p ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in '"
+ (compute-coll-string coll)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))
+ ))
+ )
+ (let ((token (compute-tag coll)))
+ (push token contents-to-return)
+ (clear-coll coll)
+ (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
+ elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
+ else (xml-error
+ (concatenate 'string
+ "expected 'SYSTEM' or 'PUBLIC' got '"
+ (string (first contents-to-return))
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return))))
+ )
+ )))
+
+ (#.state-!-doctype-public
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\" ch) then (setf state state-!-doctype-public2)
+ elseif (eq #\' ch) then (setf state state-!-doctype-public3)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))
+ ))
+
+ (#.state-!-doctype-system
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\" ch) then (setf state state-!-doctype-system2)
+ elseif (eq #\' ch) then (setf state state-!-doctype-system3)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))
+ ))
+
+ (#.state-!-doctype-public2
+ (if* (eq #\" ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-system)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in DOCTYPE PUBLIC string: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-public3
+ (if* (eq #\' ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-system)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in DOCTYPE PUBLIC string: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-system2
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq #\" ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-ext2)
+ else (add-to-coll coll ch)))
+
+ (#.state-!-doctype-system3
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq #\' ch) then (push (compute-coll-string coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-doctype-ext2)
+ else (add-to-coll coll ch)))
+
+ (#.state-!-doctype-ext2
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\[ ch)
+ then (setf state state-begin-dtd)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in DOCTYPE token: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype-ext3
+ (if* (xml-space-p ch) then nil
+ elseif (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in DOCTYPE token following dtd: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-!-doctype
+ ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
+ (if* (xml-space-p ch) then nil
+ elseif (xml-name-start-char-p ch)
+ then
+ (setf state state-!-doctype-ext)
+ (un-next-char ch)
+ elseif (eq #\> ch) then (return)
+ elseif (eq #\[ ch)
+ then (setf state state-begin-dtd)
+ else (xml-error
+ (concatenate 'string
+ "illegal character: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))))
+ ))
+
+ (#.state-prereadpi
+ (if* (xml-space-p ch)
+ then nil
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (un-next-char ch)
+ (setf state state-readpi)))
+
+ (#.state-readpi
+ (if* (eq #\? ch)
+ then (setf state state-readpi2)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)))
+
+ (#.state-readpi2
+ (if* (eq #\> ch)
+ then (return)
+ elseif (eq #\? ch) then
+ (add-to-coll coll #\?) ;; come back here to try again
+ else (setf state state-readpi)
+ (add-to-coll coll #\?)
+ (add-to-coll coll ch)))
+
+ (#.state-findattributename0
+ (if* (xml-space-p ch) then (setf state state-findattributename)
+ elseif (eq ch empty-delim)
+ then (setf state state-noattributename)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected space or tag end before: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-findattributename
+ ;; search until we find the start of an attribute name
+ ;; or the end of the tag
+ (if* (eq ch empty-delim)
+ then (setf state state-noattributename)
+ elseif (xml-space-p ch)
+ then nil ;; skip whitespace
+ elseif (xml-name-start-char-p ch)
+ then
+ (un-next-char ch)
+ (setf state state-attribname)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml token: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-attribname
+ ;; collect attribute name
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (xml-space-p ch) then
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribname2)
+ else
+ (when (not (eq #\= ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml attribute token: '"
+ (compute-coll-string coll) "'"))
+ )
+ (setq attrib-name (compute-tag coll))
+ (clear-coll coll)
+ (setq state state-attribstartvalue)))
+
+ (#.state-attribname2
+ (if* (eq #\= ch) then (setq state state-attribstartvalue)
+ elseif (xml-space-p ch) then nil
+ else
+ (un-next-char ch)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal char in <?xml attribute token: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-attribstartvalue
+ ;; begin to collect value
+ (if* (or (eq ch #\")
+ (eq ch #\'))
+ then (setq value-delim ch)
+ (setq state state-attribvaluedelim)
+ elseif (xml-space-p ch) then nil
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "expected ' or \" before <?xml attribute token value: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-attribvaluedelim
+ (if* (eq ch value-delim)
+ then (setq attrib-value (compute-coll-string coll))
+ (clear-coll coll)
+ (push attrib-name attribs-to-return)
+ (push attrib-value attribs-to-return)
+ (setq state state-findattributename0)
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in attribute token value: '"
+ (compute-coll-string coll) "'"))
+ ))
+
+ (#.state-noattributename
+ (if* (eq #\> ch)
+ then
+ (return) ;; ready to build return token
+ else
+ (xml-error
+ (concatenate 'string
+ "expected '>' found: '" (string ch) "' in <?xml token"))
+ ))
+
+ (t
+ (error "need to support state:~s" state))
+ ))
+ (put-back-collector entity)
+ (case state
+ (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
+ (put-back-collector coll)
+ (if* attribs-to-return
+ then (values (cons tag-to-return
+ (nreverse attribs-to-return))
+ (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
+ else
+ (values tag-to-return :start-tag :end-tag)
+ ))
+ (#.state-readtag-end-bracket
+ ;; this is a :commant tag
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (cons tag-to-return (list ret)) :comment :nil)))
+ (#.state-pcdata
+ (let ((next-char (collector-next coll)))
+ (put-back-collector coll)
+ (if* (zerop next-char)
+ then (values nil :eof nil)
+ else (values (compute-coll-string coll) :pcdata pcdatap))))
+ (#.state-readpi2
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
+ ((#.state-readtag-!-conditional)
+ (put-back-collector coll)
+ (values (append (list tag-to-return) contents-to-return) :start-tag
+ :end-tag))
+ ((#.state-!-contents
+ #.state-!-doctype
+ #.state-!-doctype-ext2
+ #.state-!-doctype-ext3)
+ (put-back-collector coll)
+ (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
+ :end-tag))
+ (#.state-readtag3
+ (put-back-collector coll)
+ (values (if* attribs-to-return
+ then (cons tag-to-return
+ (nreverse attribs-to-return))
+ else tag-to-return) :start-tag :end-tag))
+ ((#.state-readtag2
+ #.state-readtag)
+ (put-back-collector coll)
+ (values (if* attribs-to-return
+ then (cons tag-to-return
+ (nreverse attribs-to-return))
+ else tag-to-return) :start-tag nil))
+ ((#.state-readtag-end2
+ #.state-readtag-end3)
+ (put-back-collector coll)
+ (values tag-to-return :end-tag nil))
+ (#.state-readtag-!-conditional7
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (values (append (list :cdata) (list ret)) :cdata nil)))
+ (t
+ ;; if ch is null that means we encountered unexpected EOF
+ (when (null ch)
+ (put-back-collector coll)
+ (xml-error "unexpected end of input"))
+ (print (list tag-to-return attribs-to-return))
+ (let ((ret (compute-coll-string coll)))
+ (put-back-collector coll)
+ (error "need to support state <post>:~s ~s ~s ~s" state
+ tag-to-return
+ contents-to-return
+ ret))))
+ )))
+
+(defun swallow-xml-token (tokenbuf external-callback)
+ (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
+ (let ((xml (next-token tokenbuf external-callback nil)))
+ (if* (and (eq (fourth xml) :standalone) (stringp (fifth xml))
+ (equal (fifth xml) "yes")) then
+ (xml-error "external XML entity cannot be standalone document")
+ elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml))
+ (equal (seventh xml) "yes")) then
+ (xml-error "external XML entity cannot be standalone document"))))
+
+;; return the string with entity references replaced by text
+;; normalizing will happen later
+;; we're ok on different types - just ignore IMPLIED & REQUIRED; and possibly skip FIXED
+(defun parse-default-value (value-list tokenbuf external-callback)
+ (declare (optimize (speed 3) (safety 1)))
+ (let (value-string)
+ (if* (stringp (first value-list)) then (setf value-string (first value-list))
+ elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list)))
+ (let ((tmp-result (parse-xml
+ (concatenate 'string
+ "<item x='"
+ value-string
+ "'/>")
+ :external-callback external-callback
+ :general-entities
+ (iostruct-general-entities tokenbuf))))
+ (if* (stringp (first value-list)) then
+ (setf (first value-list)
+ (third (first (first tmp-result))))
+ elseif (eq (first value-list) :FIXED) then
+ (setf (second value-list)
+ (third (first (first tmp-result)))))))
+ value-list)
+
+(defun process-attlist (args attlist-data)
+ (declare (optimize (speed 3) (safety 1)))
+ (dolist (arg1 args attlist-data)
+ ;;(format t "arg1: ~s~%" arg1)
+ (dolist (item (rest arg1))
+ ;;(format t "item: ~s~%" item)
+ (when (eq :ATTLIST (first item))
+ (let* ((name (second item))
+ (name-data (assoc name attlist-data))
+ (new-name-data (rest name-data)))
+ ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
+ (dolist (attrib-data (rest (rest item)))
+ ;;(format t "attrib-data: ~s~%" attrib-data)
+ #+ignore
+ (setf (rest (rest attrib-data))
+ (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
+ (when (not (assoc (first attrib-data) new-name-data))
+ (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
+ (if* name-data then
+ (rplacd (assoc name attlist-data) (nreverse new-name-data))
+ else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))
+
+(provide :pxml)
--- /dev/null
+;;
+;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
+;;
+;; This code is free software; you can redistribute it and/or
+;; modify it under the terms of the version 2.1 of
+;; the GNU Lesser General Public License as published by
+;; the Free Software Foundation, as clarified by the AllegroServe
+;; prequel found in license-allegroserve.txt.
+;;
+;; This code is distributed in the hope that it will be useful,
+;; but without any warranty; without even the implied warranty of
+;; merchantability or fitness for a particular purpose. See the GNU
+;; Lesser General Public License for more details.
+;;
+;; Version 2.1 of the GNU Lesser General Public License is in the file
+;; license-lgpl.txt that was distributed with this file.
+;; If it is not present, you can access it from
+;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
+;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
+;; Suite 330, Boston, MA 02111-1307 USA
+;;
+;; $Id: pxml3.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
+
+(in-package :net.xml.parser)
+
+(pxml-dribble-bug-hook "$Id: pxml3.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
+
+(defvar *debug-dtd* nil)
+
+(defun parse-dtd (tokenbuf
+ external external-callback)
+ (declare (optimize (speed 3) (safety 1)))
+ (let ((guts)
+ (include-count 0))
+ (loop
+ (multiple-value-bind (val kind)
+ (next-dtd-token tokenbuf
+ external include-count external-callback)
+ (if* (eq kind :end-dtd) then
+ (return (nreverse guts))
+ elseif (eq kind :include) then
+ (incf include-count)
+ elseif (eq kind :ignore) then nil
+ elseif (eq kind :include-end) then
+ (if* (> include-count 0) then (decf include-count)
+ else (xml-error "unexpected ']]>' token"))
+ else (when (iostruct-do-entity tokenbuf) (push val guts)))))))
+
+(defparameter dtd-parser-states ())
+
+(macrolet ((def-dtd-parser-state (var val)
+ `(progn (eval-when (compile load eval) (defconstant ,var ,val))
+ (pushnew '(,val . ,var) dtd-parser-states :key #'car))))
+ (def-dtd-parser-state state-dtdstart 0)
+ (def-dtd-parser-state state-tokenstart 1)
+ (def-dtd-parser-state state-dtd-? 2)
+ (def-dtd-parser-state state-dtd-! 3)
+ (def-dtd-parser-state state-dtd-comment 4)
+ (def-dtd-parser-state state-dtd-!-token 5)
+ (def-dtd-parser-state state-dtd-!-element 6)
+ (def-dtd-parser-state state-dtd-!-element-name 7)
+ (def-dtd-parser-state state-dtd-!-element-content 8)
+ (def-dtd-parser-state state-dtd-!-element-type 9)
+ (def-dtd-parser-state state-dtd-!-element-type-paren 10)
+ (def-dtd-parser-state state-dtd-!-element-type-token 11)
+ (def-dtd-parser-state state-dtd-!-element-type-end 12)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-name 13)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd 14)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd2 15)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd3 16)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd4 17)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd5 18)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd6 19)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd7 20)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd8 21)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-pcd9 22)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-name2 23)
+ ;;(def-dtd-parser-state state-dtd-!-element-type-paren-seq 24) folded into choice
+ (def-dtd-parser-state state-dtd-!-element-type-paren-choice 25)
+ (def-dtd-parser-state state-dtd-!-element-type-paren2 26)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-choice-name 27)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-choice-paren 28)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-choice-name2 29)
+ (def-dtd-parser-state state-dtd-!-element-type-paren3 30)
+ (def-dtd-parser-state state-dtd-!-element-type-paren-choice-name3 31)
+ (def-dtd-parser-state state-dtd-!-attlist 32)
+ (def-dtd-parser-state state-dtd-!-attlist-name 33)
+ (def-dtd-parser-state state-dtd-!-attdef 34)
+ (def-dtd-parser-state state-dtd-!-attdef-name 35)
+ (def-dtd-parser-state state-dtd-!-attdef-type 36)
+ ;;(def-dtd-parser-state state-dtd-!-attdef-enumeration 37)
+ (def-dtd-parser-state state-dtd-!-attdef-decl 38)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-type 39)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value 40)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value2 41)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value3 42)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value4 43)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value5 44)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value6 45)
+ (def-dtd-parser-state state-dtd-!-attdef-decl-value7 46)
+ (def-dtd-parser-state state-dtd-!-attdef-notation 47)
+ (def-dtd-parser-state state-dtd-!-attdef-notation2 48)
+ (def-dtd-parser-state state-dtd-!-attdef-notation3 49)
+ (def-dtd-parser-state state-dtd-!-attdef-notation4 50)
+ (def-dtd-parser-state state-dtd-!-attdef-type2 51)
+ (def-dtd-parser-state state-dtd-!-entity 52)
+ (def-dtd-parser-state state-dtd-!-entity2 53)
+ (def-dtd-parser-state state-dtd-!-entity3 54)
+ (def-dtd-parser-state state-dtd-!-entity4 55)
+ (def-dtd-parser-state state-dtd-!-entity-value 56)
+ (def-dtd-parser-state state-dtd-!-entity5 57)
+ (def-dtd-parser-state state-dtd-!-entity6 58)
+ (def-dtd-parser-state state-!-dtd-system 59)
+ (def-dtd-parser-state state-!-dtd-public 60)
+ (def-dtd-parser-state state-!-dtd-system2 61)
+ (def-dtd-parser-state state-!-dtd-system3 62)
+ (def-dtd-parser-state state-!-dtd-system4 63)
+ (def-dtd-parser-state state-!-dtd-system5 64)
+ (def-dtd-parser-state state-!-dtd-system6 65)
+ (def-dtd-parser-state state-!-dtd-system7 66)
+ (def-dtd-parser-state state-!-dtd-public2 67)
+ (def-dtd-parser-state state-dtd-!-notation 68)
+ (def-dtd-parser-state state-dtd-!-notation2 69)
+ (def-dtd-parser-state state-dtd-!-notation3 70)
+ (def-dtd-parser-state state-dtd-?-2 71)
+ (def-dtd-parser-state state-dtd-?-3 72)
+ (def-dtd-parser-state state-dtd-?-4 73)
+ (def-dtd-parser-state state-dtd-comment2 74)
+ (def-dtd-parser-state state-dtd-comment3 75)
+ (def-dtd-parser-state state-dtd-comment4 76)
+ (def-dtd-parser-state state-dtd-!-entity7 77)
+ (def-dtd-parser-state state-dtd-!-attdef-notation5 78)
+ (def-dtd-parser-state state-!-dtd-public3 79)
+ (def-dtd-parser-state state-dtd-!-cond 80)
+ (def-dtd-parser-state state-dtd-!-cond2 81)
+ (def-dtd-parser-state state-dtd-!-include 82)
+ (def-dtd-parser-state state-dtd-!-ignore 83)
+ (def-dtd-parser-state state-dtd-!-include2 84)
+ (def-dtd-parser-state state-dtd-!-include3 85)
+ (def-dtd-parser-state state-dtd-!-include4 86)
+ (def-dtd-parser-state state-dtd-!-ignore2 87)
+ (def-dtd-parser-state state-dtd-!-ignore3 88)
+ (def-dtd-parser-state state-dtd-!-ignore4 89)
+ (def-dtd-parser-state state-dtd-!-ignore5 90)
+ (def-dtd-parser-state state-dtd-!-ignore6 91)
+ (def-dtd-parser-state state-dtd-!-ignore7 92)
+ )
+
+(defun next-dtd-token (tokenbuf
+ external include-count external-callback)
+ (declare (:fbound parse-default-value) (optimize (speed 3) (safety 1)))
+ (macrolet ((add-to-entity-buf (entity-symbol p-value)
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
+ (iostruct-entity-bufs tokenbuf))))
+
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
+
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
+
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.)))))
+
+ (to-preferred-case (ch)
+ ;; should check the case mode
+ `(char-downcase ,ch))
+
+ )
+ (let ((state state-dtdstart)
+ (coll (get-collector))
+ (entity (get-collector))
+ (tag-to-return)
+ (contents-to-return)
+ (pending (list nil))
+ (pending-type)
+ (value-delim)
+ (public-string)
+ (char-code 0)
+ (check-count 0)
+ (ignore-count 0)
+ (reference-save-state)
+ (prefp)
+ (entityp)
+ (pentityp)
+ (prev-state)
+ (ch))
+ (loop
+ (setq ch (get-next-char tokenbuf))
+ (when *debug-dtd*
+ (format t "~@<dtd ~:Ichar: ~s ~:_state: ~s ~:_contents: ~s ~:_pending: ~s ~:_pending-type: ~s ~:_entity-names ~s~:>~%"
+ ch (or (cdr (assoc state dtd-parser-states)) state)
+ contents-to-return pending pending-type
+ (iostruct-entity-names tokenbuf)))
+ (if* (null ch)
+ then (setf prev-state state)
+ (setf state :eof)
+ (return) ;; eof -- exit loop
+ )
+
+ (case state
+ (#.state-dtdstart
+ (if* (and (eq #\] ch)
+ external (> include-count 0)) then
+ (setf state state-dtd-!-include3)
+ elseif (and (eq #\] ch) (not external)) then (return)
+ elseif (eq #\< ch) then (setf state state-tokenstart)
+ elseif (xml-space-p ch) then nil
+ elseif (eq #\% ch) then (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-include3
+ (if* (eq #\] ch) then (setf state state-dtd-!-include4)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD token, starting at: ']"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-include4
+ (if* (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD token, starting at: ']]"
+ (compute-coll-string coll)
+ "'"))))
+ #+ignore
+ (#.state-dtd-pref
+ (if* (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-pref2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD parameter reference name, starting at: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-tokenstart
+ (if* (eq #\? ch) then (setf state state-dtd-?)
+ elseif (eq #\! ch) then (setf state state-dtd-!)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-?
+ (if* (xml-name-char-p ch)
+ then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (when (not (xml-space-p ch))
+ (xml-error (concatenate 'string
+ "expecting name following: '<?"
+ (compute-coll-string coll)
+ "' ; got: '" (string ch) "'"))
+ )
+ (when (= (collector-next coll) 0)
+ (xml-error "null <? token"))
+ (if* (and (= (collector-next coll) 3)
+ (or (eq (elt (collector-data coll) 0) #\X)
+ (eq (elt (collector-data coll) 0) #\x))
+ (or (eq (elt (collector-data coll) 1) #\M)
+ (eq (elt (collector-data coll) 1) #\m))
+ (or (eq (elt (collector-data coll) 2) #\L)
+ (eq (elt (collector-data coll) 2) #\l)))
+ then
+ (xml-error "<?xml not allowed in dtd")
+ else
+ (setq tag-to-return (compute-tag coll))
+ (setf state state-dtd-?-2))
+ (clear-coll coll)))
+ (#.state-dtd-?-2
+ (if* (xml-space-p ch)
+ then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)
+ (setf state state-dtd-?-3)))
+ (#.state-dtd-?-3
+ (if* (eq #\? ch)
+ then (setf state state-dtd-?-4)
+ elseif (not (xml-char-p ch))
+ then (xml-error "XML is not well formed") ;; no test
+ else (add-to-coll coll ch)))
+ (#.state-dtd-?-4
+ (if* (eq #\> ch)
+ then
+ (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (setf state state-dtd-?-3)
+ (add-to-coll coll #\?)
+ (add-to-coll coll ch)))
+ (#.state-dtd-!
+ (if* (eq #\- ch) then (setf state state-dtd-comment)
+ elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-token)
+ (un-next-char ch)
+ elseif (and (eq #\[ ch) external) then
+ (setf state state-dtd-!-cond)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-cond
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\I ch) then (setf state state-dtd-!-cond2)
+ else (error "this should not happen")
+ ))
+ (#.state-dtd-!-cond2
+ (if* (eq #\N ch) then (setf state state-dtd-!-include)
+ (setf check-count 2)
+ elseif (eq #\G ch) then (setf state state-dtd-!-ignore)
+ (setf check-count 2)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-ignore
+ (if* (and (eq check-count 5) (eq ch #\E)) then
+ (setf state state-dtd-!-ignore2)
+ elseif (eq ch (elt "IGNORE" check-count)) then
+ (incf check-count)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-ignore2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\[ ch) then (setf state state-dtd-!-ignore3)
+ (incf ignore-count)
+ else (xml-error "'[' missing after '<![Ignore'")))
+ (#.state-dtd-!-ignore3
+ (if* (eq #\< ch) then (setf state state-dtd-!-ignore4)
+ elseif (eq #\] ch) then (setf state state-dtd-!-ignore5)))
+ (#.state-dtd-!-ignore4
+ (if* (eq #\! ch) then (setf state state-dtd-!-ignore6)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore5
+ (if* (eq #\] ch) then (setf state state-dtd-!-ignore7)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore6
+ (if* (eq #\[ ch) then (incf ignore-count)
+ (setf state state-dtd-!-ignore3)
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-ignore7
+ (if* (eq #\> ch) then (decf ignore-count)
+ (when (= ignore-count 0) (return))
+ else (un-next-char ch)
+ (setf state state-dtd-!-ignore3)))
+ (#.state-dtd-!-include
+ (if* (and (eq check-count 6) (eq ch #\E)) then
+ (setf state state-dtd-!-include2)
+ elseif (eq ch (elt "INCLUD" check-count)) then
+ (incf check-count)
+ else (xml-error "<![ external DTD token not INCLUDE or IGNORE")
+ ))
+ (#.state-dtd-!-include2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\[ ch) then (return)
+ else (xml-error "'[' missing after '<![INCLUDE'")))
+ (#.state-dtd-comment
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment2)
+ (setf tag-to-return :comment)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '<![-', starting at '<!-"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-comment2
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment3)
+ else (add-to-coll coll ch)))
+ (#.state-dtd-comment3
+ (if* (eq #\- ch)
+ then (setf state state-dtd-comment4)
+ else (setf state state-dtd-comment2)
+ (add-to-coll coll #\-) (add-to-coll coll ch)))
+ (#.state-dtd-comment4
+ (if* (eq #\> ch)
+ then (push (compute-coll-string coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal token following '--' comment terminator, starting at '--"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-token
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf tag-to-return (compute-tag coll))
+ (clear-coll coll)
+ (if* (eq tag-to-return :ELEMENT) then (setf state state-dtd-!-element)
+ elseif (eq tag-to-return :ATTLIST) then
+ (setf state state-dtd-!-attlist)
+ elseif (eq tag-to-return :ENTITY) then
+ (setf entityp t)
+ (setf state state-dtd-!-entity)
+ elseif (eq tag-to-return :NOTATION) then
+ (setf state state-dtd-!-notation)
+ else
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (string tag-to-return)
+ "'")))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-notation2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!NOTATION "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation2
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-notation3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!NOTATION name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-notation3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!NOTATION spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity
+ (if* (eq #\% ch) then (push :param contents-to-return)
+ (setf pentityp t)
+ (setf state state-dtd-!-entity2)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf pending nil)
+ (setf state state-dtd-!-entity3)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ENTITY "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity2
+ (if* (xml-space-p ch) then (setf state state-dtd-!-entity7)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity3
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (setf contents-to-return
+ (nreverse contents-to-return))
+ (clear-coll coll)
+ (setf state state-dtd-!-entity4)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-entity-value)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity6)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY spec: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity6
+ (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
+ then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (when (not (xml-space-p ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in '"
+ (compute-coll-string coll)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (first contents-to-return))
+ ))
+ )
+ (let ((token (compute-tag coll)))
+ (push token contents-to-return)
+ (clear-coll coll)
+ (if* (eq :SYSTEM token) then (setf state state-!-dtd-system)
+ elseif (eq :PUBLIC token) then (setf state state-!-dtd-public)
+ else (xml-error
+ (concatenate 'string
+ "expected 'SYSTEM' or 'PUBLIC' got '"
+ (string (first contents-to-return))
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return))))
+ )
+ )))
+ (#.state-dtd-!-entity7
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-dtd-!-entity3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY % name: "
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-public
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\" ch) (eq #\' ch)) then
+ (setf state state-!-dtd-public2)
+ (setf value-delim ch)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))))
+ (#.state-!-dtd-public2
+ (if* (eq value-delim ch) then
+ (push (setf public-string
+ (normalize-public-value
+ (compute-coll-string coll))) contents-to-return)
+ (clear-coll coll)
+ (setf state state-!-dtd-public3)
+ elseif (pub-id-char-p ch) then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "illegal character in string: '"
+ (compute-coll-string coll) "'"))
+ ))
+ (#.state-!-dtd-public3
+ (if* (xml-space-p ch) then (setf state state-!-dtd-system)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and (not entityp)
+ (eq #\> ch)) then
+ (setf state state-!-dtd-system)
+ (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "Expected space before: '"
+ (compute-coll-string coll) "'"))
+ ))
+ (#.state-!-dtd-system
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\" ch) (eq #\' ch)) then
+ (setf state state-!-dtd-system2)
+ (setf value-delim ch)
+ elseif (and (not entityp)
+ (eq #\> ch)) then (return)
+ else (xml-error
+ (concatenate 'string
+ "expected quote or double-quote got: '"
+ (string ch)
+ "' in <! tag: " (string tag-to-return) " "
+ (string (second contents-to-return)) " "
+ (string (first contents-to-return))
+ ))))
+ (#.state-!-dtd-system2
+ (when (not (xml-char-p ch))
+ (xml-error "XML is not well formed")) ;; not tested
+ (if* (eq value-delim ch) then
+ (let ((entity-symbol (first (last contents-to-return)))
+ (system-string (compute-coll-string coll)))
+ (if* pentityp then
+ (when (not (assoc entity-symbol (iostruct-parameter-entities tokenbuf)))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string)
+ (iostruct-parameter-entities tokenbuf)))
+ )
+ else
+ (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string
+ )
+ (iostruct-general-entities tokenbuf)))
+ (when (not (assoc entity-symbol (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons entity-symbol (list (parse-uri system-string)
+ tag-to-return
+ public-string
+ )
+ (iostruct-general-entities tokenbuf))))
+ )
+ )
+ (push system-string contents-to-return))
+ (clear-coll coll)
+ (setf state state-!-dtd-system3)
+ else (add-to-coll coll ch)))
+ (#.state-!-dtd-system3
+ (if* (xml-space-p ch) then (setf state state-!-dtd-system4)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and (not pentityp) (xml-name-start-char-p ch)) then
+ (add-to-coll coll ch)
+ (setf state state-!-dtd-system5)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system5
+ (if* (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq :NDATA token))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ )
+ (clear-coll coll)
+ (push token contents-to-return)
+ (setf state state-!-dtd-system6))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system6
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (add-to-coll coll ch)
+ (setf state state-!-dtd-system7)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-!-dtd-system7
+ (if* (xml-name-char-p ch) then
+ (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-entity5) ;; just looking for space, >
+ elseif (eq #\> ch) then
+ (push (compute-tag coll) contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first (nreverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity-value
+ (if* (eq ch value-delim) then
+ (let ((tmp (compute-coll-string coll)))
+ (when (> (length tmp) 0)
+ (when (null (first pending)) (setf pending (rest pending)))
+ (push tmp pending)))
+ (if* (> (length pending) 1) then
+ (push (nreverse pending) contents-to-return)
+ else (push (first pending) contents-to-return))
+ (setf pending (list nil))
+ (setf state state-dtd-!-entity5)
+ (clear-coll coll)
+ (if* pentityp then
+ (when (not (assoc (third contents-to-return)
+ (iostruct-parameter-entities tokenbuf)))
+ (setf (iostruct-parameter-entities tokenbuf)
+ (acons (third contents-to-return)
+ (first contents-to-return)
+ (iostruct-parameter-entities tokenbuf))))
+ else
+ (when (not (assoc (second contents-to-return)
+ (iostruct-general-entities tokenbuf)))
+ (setf (iostruct-general-entities tokenbuf)
+ (acons (second contents-to-return)
+ (first contents-to-return)
+ (iostruct-general-entities tokenbuf)))))
+ elseif (eq #\& ch) then
+ (setf reference-save-state state-dtd-!-entity-value)
+ (setf state state-dtd-!-attdef-decl-value3)
+ elseif (eq #\% ch) then
+ (setf prefp t)
+ (setf reference-save-state state-dtd-!-entity-value)
+ (setf state state-dtd-!-attdef-decl-value3)
+ elseif (xml-char-p ch)
+ then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ENTITY value for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-entity5
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ENTITY spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attlist
+ (if* (xml-name-start-char-p ch) then (setf state state-dtd-!-attlist-name)
+ (un-next-char ch)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ATTLIST "
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-attlist-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll *package*)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef)
+ elseif (eq #\> ch) then
+ (push (compute-tag coll *package*)
+ contents-to-return)
+ (clear-coll coll)
+ (return)
+ else (push (compute-tag coll)
+ contents-to-return)
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-attdef-name)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf (first pending) (compute-tag coll *package*))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-type)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-type
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (un-next-char ch)
+ ;; let next state do all other checking
+ (setf state state-dtd-!-attdef-type2)))
+ (#.state-dtd-!-attdef-type2
+ ;; can only be one of a few tokens, but wait until token built to check
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and (eq #\( ch) (= 0 (length (compute-coll-string coll)))) then
+ (push (list :enumeration) pending)
+ (setf state state-dtd-!-attdef-notation2)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (and (not (eq :CDATA token))
+ (not (eq :ID token))
+ (not (eq :IDREF token))
+ (not (eq :IDREFS token))
+ (not (eq :ENTITY token))
+ (not (eq :ENTITIES token))
+ (not (eq :NMTOKEN token))
+ (not (eq :NMTOKENS token))
+ (not (eq :NOTATION token)))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (if* (eq token :NOTATION) then
+ (push (list token) pending)
+ (setf state state-dtd-!-attdef-notation)
+ else
+ (push token pending)
+ (setf state state-dtd-!-attdef-decl))
+ )
+ (clear-coll coll)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\( ch) then (setf state state-dtd-!-attdef-notation2)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (setf state state-dtd-!-attdef-notation3)
+ (add-to-coll coll ch)
+ elseif (and (xml-name-char-p ch) (listp (first pending))
+ (eq :enumeration (first (reverse (first pending))))) then
+ (setf state state-dtd-!-attdef-notation3)
+ (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation3
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-notation4)
+ elseif (eq #\| ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-attdef-notation2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf (first pending) (nreverse (first pending)))
+ ;;(setf state state-dtd-!-attdef-decl)
+ (setf state state-dtd-!-attdef-notation5)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-notation5
+ (if* (xml-space-p ch) then (setf state state-dtd-!-attdef-decl)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error
+ (concatenate 'string
+ "Expected space before: '"
+ (compute-coll-string coll) "'"))))
+ (#.state-dtd-!-attdef-notation4
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-char-p ch) then (add-to-coll coll ch)
+ (setf state state-dtd-!-attdef-notation3)
+ elseif (eq #\| ch) then (setf state state-dtd-!-attdef-notation2)
+ elseif (eq #\) ch) then (setf state state-dtd-!-attdef-decl)
+ (setf (first pending) (nreverse (first pending)))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl
+ (if* (eq #\# ch) then
+ (setf state state-dtd-!-attdef-decl-type)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-attdef-decl-value)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value
+ (if* (eq ch value-delim) then
+ #-ignore
+ (push (first (parse-default-value (list (compute-coll-string coll))
+ tokenbuf external-callback))
+ pending)
+ #+ignore
+ (push (compute-coll-string coll) pending)
+ (setf contents-to-return
+ (append contents-to-return
+ (if* entityp then
+ (nreverse pending)
+ else (list (nreverse pending)))))
+ (setf pending (list nil))
+ (setf state state-dtd-!-attdef)
+ (clear-coll coll)
+ elseif (eq #\& ch) then (setf state state-dtd-!-attdef-decl-value3)
+ (setf reference-save-state state-dtd-!-attdef-decl-value)
+ elseif (and (xml-char-p ch) (not (eq #\< ch)))
+ then (add-to-coll coll ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value3
+ (if* (and (not prefp) (eq #\# ch))
+ then (setf state state-dtd-!-attdef-decl-value4)
+ elseif (xml-name-start-char-p ch)
+ then (setf state state-dtd-!-attdef-decl-value5)
+ (when (not prefp) (add-to-coll coll #\&))
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal reference name, starting at: '&"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-attdef-decl-value4
+ (if* (eq #\x ch)
+ then (setf state state-dtd-!-attdef-decl-value6)
+ elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
+ then (setf state state-dtd-!-attdef-decl-value7)
+ (un-next-char ch)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal character reference code, starting at: '&#"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-attdef-decl-value5
+ (if* (xml-name-char-p ch)
+ then (add-to-coll entity ch)
+ (when (not prefp) (add-to-coll coll ch))
+ elseif (eq #\; ch)
+ then
+ (if* (not prefp) then (add-to-coll coll ch)
+ elseif (not external) then
+ (xml-error
+ (concatenate 'string
+ "internal dtd subset cannot reference parameter entity within a token; entity: "
+ (compute-coll-string entity)))
+ else
+ (let* ((entity-symbol (compute-tag entity))
+ (p-value
+ (assoc entity-symbol (iostruct-parameter-entities tokenbuf))))
+ (clear-coll entity)
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (dotimes (i (length p-value))
+ (add-to-coll coll (schar p-value i)))
+ elseif p-value then
+ (if* (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ else
+ (let ((count 0) (string "<?xml ") last-ch
+ save-ch save-unget
+ (tmp-count 0)
+ (entity-stream
+ (apply external-callback p-value)))
+ (when entity-stream
+ (let ((tmp-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream tmp-buf)
+ entity-stream)
+ (setf save-unget
+ (iostruct-unget-char tokenbuf))
+ (setf (iostruct-unget-char tokenbuf) nil)
+ (unicode-check entity-stream tokenbuf)
+ (when (iostruct-unget-char tokenbuf)
+ (setf save-ch (first (iostruct-unget-char tokenbuf))))
+ (setf (iostruct-unget-char tokenbuf) save-unget)
+ (loop
+ (let ((cch
+ (if* save-ch
+ then
+ (let ((s2 save-ch))
+ (setf save-ch nil)
+ s2)
+ else
+ (next-char
+ tmp-buf
+ (iostruct-read-sequence-func
+ tokenbuf)))))
+ (when (null cch) (return))
+ (when *debug-dtd*
+ (format t "dtd-char: ~s~%" cch))
+ (if* (< count 0) then
+ (if* (and (eq last-ch #\?)
+ (eq cch #\>)) then
+ (setf count 6)
+ else (setf last-ch cch))
+ elseif (< count 6) then
+ (when (and (= count 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (if* (not (eq cch
+ (schar string count)
+ )) then
+ (loop
+ (when (= tmp-count count)
+ (return))
+ (add-to-coll coll
+ (schar string
+ tmp-count))
+ (incf tmp-count))
+ (add-to-coll coll cch)
+ (setf count 10)
+ else (incf count))
+ elseif (= count 6) then
+ (dotimes (i 6)
+ (add-to-coll coll (schar string i)))
+ (setf count 10)
+ else (add-to-coll coll cch))))
+ (setf (iostruct-entity-names tokenbuf)
+ (rest (iostruct-entity-names tokenbuf)))
+ (close entity-stream)
+ (put-back-tokenbuf tmp-buf)))))
+ )
+ (setf state state-dtdstart)
+ else nil
+ )))
+ (setf state reference-save-state)
+ else (let ((tmp (compute-coll-string entity)))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "reference not terminated by ';', starting at: '&"
+ tmp
+ (compute-coll-string coll)
+ "'")))
+ ))
+ (#.state-dtd-!-attdef-decl-value6
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state state-dtd-!-attdef-decl-value)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
+ elseif (<= (char-code #\A) code (char-code #\F))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
+ elseif (<= (char-code #\a) code (char-code #\f))
+ then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal hexidecimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+ (#.state-dtd-!-attdef-decl-value7
+ (let ((code (char-code ch)))
+ (if* (eq #\; ch)
+ then (add-to-coll coll (code-char char-code))
+ (setf char-code 0)
+ (setq state reference-save-state)
+ elseif (<= (char-code #\0) code (char-code #\9))
+ then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal decimal character reference code, starting at: '"
+ (compute-coll-string coll)
+ "', calculated char code: "
+ (format nil "~s" char-code)))
+ )))
+ (#.state-dtd-!-attdef-decl-type
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (xml-space-p ch) (eq #\> ch)) then
+ (let ((token (compute-tag coll)))
+ (when (and (not (eq :REQUIRED token))
+ (not (eq :IMPLIED token))
+ (not (eq :FIXED token)))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token pending)
+ (if* (eq :FIXED token) then
+ (when (eq #\> ch)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (setf state state-dtd-!-attdef-decl-value2)
+ elseif (eq #\> ch) then
+ (setf contents-to-return
+ (append contents-to-return (list (nreverse pending))))
+ (return)
+ else (setf contents-to-return
+ (append contents-to-return (list (nreverse pending))))
+ (setf pending (list nil))
+ (setf state state-dtd-!-attdef)))
+ (clear-coll coll)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#. state-dtd-!-attdef-decl-value2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (or (eq #\' ch) (eq #\" ch)) then
+ (setf value-delim ch)
+ (setf state state-dtd-!-attdef-decl-value)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ATTLIST type spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then (setf state state-dtd-!-element-name)
+ (un-next-char ch)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD characters, starting at: '<!ELEMENT "
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-element-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll)
+ contents-to-return)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT name: '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type
+ (if* (eq #\( ch) then (setf state state-dtd-!-element-type-paren)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-token)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-name)
+ elseif (eq #\# ch) then
+ (setf state state-dtd-!-element-type-paren-pcd)
+ elseif (eq #\( ch) then
+ (push nil pending)
+ (setf state state-dtd-!-element-type-paren-choice-paren)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))))
+ (#.state-dtd-!-element-type-paren2
+ (if* (eq #\> ch) then
+ ;; there only one name...
+ (setf (first contents-to-return) (first (first contents-to-return)))
+ (return)
+ elseif (eq #\* ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :* (first contents-to-return))
+ elseif (eq #\? ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :? (first contents-to-return))
+ elseif (eq #\+ ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (if* (> (length (first contents-to-return)) 1) then
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return))))
+ elseif (listp (first (first contents-to-return))) then
+ (setf (first contents-to-return)
+ (first (first contents-to-return))))
+ (push :+ (first contents-to-return))
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (when (> (length (first contents-to-return)) 1)
+ (setf (first contents-to-return)
+ (list (append (list :\choice)
+ (first contents-to-return)))))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\? ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :? (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\* ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :* (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\+ ch) then
+ (push (compute-tag coll) (first pending))
+ (setf (first pending)
+ (list (push :+ (first pending))))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-name2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (if* (= (length pending) 1) then
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren2)
+ else ;; this is (xxx)
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (first pending)))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ elseif (eq #\, ch) then
+ (when (and (first pending) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (push :seq pending-type)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (first pending) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (push :choice pending-type)
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-name2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\| ch) then
+ (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\, ch) then
+ (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (push (list (first pending)) contents-to-return)
+ (setf state state-dtd-!-element-type-paren2)
+ else (setf pending (reverse (rest (reverse pending))))
+ )
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-dtd-!-element-type-paren-choice
+ (if* (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-choice-name)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\( ch) then
+ (push nil pending)
+ (setf state state-dtd-!-element-type-paren-choice-paren)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+
+ (#.state-dtd-!-element-type-paren-choice-paren
+ (if* (xml-name-start-char-p ch) then
+ (setf state state-dtd-!-element-type-paren-name)
+ (un-next-char ch)
+ elseif (eq #\( ch) then (push nil pending)
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\? ch) then
+ (push (list :? (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\* ch) then
+ (push (list :* (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\+ ch) then
+ (push (list :+ (compute-tag coll)) (first pending))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\) ch) then
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending)
+ ;; (list (first pending)) ;2001-03-22
+ (first pending) ;2001-03-22
+ ))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ elseif (eq #\, ch) then
+ (when (and (first pending) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (first pending) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push (compute-tag coll) (first pending))
+ (clear-coll coll)
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name2
+ (if* (eq #\| ch)
+ ;; begin changes 2001-03-22
+ then (setf state state-dtd-!-element-type-paren-choice)
+ (push :choice pending-type)
+ elseif (eq #\, ch)
+ then (setf state state-dtd-!-element-type-paren-choice)
+ (push :seq pending-type)
+ ;; end changes 2001-03-22
+ elseif (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf state state-dtd-!-element-type-paren-choice-name3)
+ )
+ (setf pending (rest pending))
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-choice-name3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\? ch) then
+ (setf (first pending) (list :? (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\* ch) then
+ (setf (first pending) (list :* (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\+ ch) then
+ (setf (first pending) (list :+ (first pending)))
+ (setf state state-dtd-!-element-type-paren-choice-name2)
+ elseif (eq #\) ch) then
+ (if* (= (length pending) 1) then
+ (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (push (first pending) contents-to-return)
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren3)
+ else (setf (first pending) (nreverse (first pending)))
+ (if* (> (length (first pending)) 1) then
+ (push (first pending-type) (first pending))
+ (setf pending-type (rest pending-type))
+ else (setf (first pending) (first (first pending))))
+ (if* (second pending) then
+ (push (first pending) (second pending))
+ else (setf (second pending) (list (first pending))))
+ (setf pending (rest pending))
+ (setf state state-dtd-!-element-type-paren-choice)
+ )
+ elseif (eq #\, ch) then
+ (when (and (rest (first pending)) (not (eq :seq (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :seq pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ elseif (eq #\| ch) then
+ (when (and (rest (first pending)) (not (eq :choice (first pending-type))))
+ (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal '|' and ',' mix starting at '"
+ (compute-coll-string coll)
+ "'")))
+ (push :choice pending-type)
+ (setf state state-dtd-!-element-type-paren-choice)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren3
+ (if* (eq #\+ ch) then
+ (setf (first contents-to-return)
+ (append (list :+) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\? ch) then
+ (setf (first contents-to-return)
+ (append (list :?) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\* ch) then
+ (setf (first contents-to-return)
+ (append (list :*) (list (first contents-to-return))))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\> ch) then (return)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (clear-coll coll)
+ (push token contents-to-return))
+ (setf state state-dtd-!-element-type-paren-pcd2)
+ elseif (eq #\| ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return))
+ (clear-coll coll)
+ (setf state state-dtd-!-element-type-paren-pcd3)
+ elseif (eq #\) ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (eq token :PCDATA))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return))
+ (setf state state-dtd-!-element-type-paren-pcd4)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd2
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\) ch) then
+ (setf state state-dtd-!-element-type-paren-pcd4)
+ elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd3
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-name-start-char-p ch) then
+ (un-next-char ch)
+ (setf state state-dtd-!-element-type-paren-pcd7)
+ else (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd4
+ (if* (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd6)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\* ch) then
+ (setf (first contents-to-return) '(:* :PCDATA))
+ (setf state state-dtd-!-element-type-paren-pcd5)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd5
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd6
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents following <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd7
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (setf state state-dtd-!-element-type-paren-pcd8)
+ (let ((token (compute-tag coll)))
+ (clear-coll coll)
+ (if* (listp (first contents-to-return)) then
+ (push token (first contents-to-return))
+ else (setf (first contents-to-return)
+ (list token (first contents-to-return)))))
+ elseif (eq #\) ch) then
+ (setf state state-dtd-!-element-type-paren-pcd9)
+ (let ((token (compute-tag coll)))
+ (clear-coll coll)
+ (if* (listp (first contents-to-return)) then
+ (push token (first contents-to-return))
+ else (setf (first contents-to-return)
+ (list token (first contents-to-return)))))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd8
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\| ch) then (setf state state-dtd-!-element-type-paren-pcd3)
+ elseif (eq #\) ch) then (setf state state-dtd-!-element-type-paren-pcd9)
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-paren-pcd9
+ (if* (eq #\* ch) then (setf state state-dtd-!-element-type-paren-pcd5)
+ (setf (first contents-to-return) (nreverse (first contents-to-return)))
+ (when (> (length (first contents-to-return)) 1)
+ (setf (first contents-to-return)
+ (list (append (list :choice)
+ (first contents-to-return)))))
+ (push :* (first contents-to-return))
+ else (clear-coll coll)
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "illegal DTD contents in <!ELEMENT content spec for "
+ (string (first (reverse contents-to-return)))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ ))
+ (#.state-dtd-!-element-type-token
+ (if* (xml-name-char-p ch) then (add-to-coll coll ch)
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (xml-space-p ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (or (eq token :EMPTY) (eq token :ANY)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return)
+ (setf state state-dtd-!-element-type-end))
+ elseif (eq #\> ch) then
+ (let ((token (compute-tag coll)))
+ (when (not (or (eq token :EMPTY) (eq token :ANY)))
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'")))
+ (push token contents-to-return)
+ (return))
+ else (add-to-coll coll ch)
+ (xml-error (concatenate 'string
+ "illegal DTD <!ELEMENT content spec for "
+ (string (first contents-to-return))
+ ": '"
+ (compute-coll-string coll)
+ "'"))
+ )
+ )
+ (#.state-dtd-!-element-type-end
+ (if* (xml-space-p ch) then nil
+ elseif (and external (eq #\% ch)) then
+ (external-param-reference tokenbuf coll external-callback)
+ elseif (eq #\> ch) then (return)
+ else (xml-error (concatenate 'string
+ "expected '>', got '"
+ (string ch)
+ "' in DTD <! ELEMENT "
+ (string (first contents-to-return))
+ " for "
+ (string (second contents-to-return))))
+ ))
+ (t
+ (error "need to support dtd state:~s" state))))
+ (put-back-collector entity)
+ (put-back-collector coll)
+ (case state
+ (#.state-dtdstart
+ (when (and (null ch) (not external))
+ (xml-error "unexpected end of input while parsing DTD"))
+ (if* (null tag-to-return) then (values nil :end-dtd)
+ else (error "process other return state")))
+ ((#.state-dtd-!-element-type-end #.state-dtd-!-element-type-token
+ #.state-dtd-!-element-type-paren-pcd4 #.state-dtd-!-element-type-paren-pcd6
+ #.state-dtd-!-element-type-paren-pcd5 #.state-dtd-!-element-type-paren2
+ #.state-dtd-!-element-type-paren3)
+ (values (append (list tag-to-return) (nreverse contents-to-return))
+ nil))
+ ((#.state-dtd-!-attdef-decl-type #.state-dtd-!-attlist-name
+ #.state-dtd-!-attdef)
+ (values (append (list tag-to-return) contents-to-return)
+ nil))
+ ((#.state-dtd-!-entity5 #.state-!-dtd-system3
+ #.state-!-dtd-system7 #.state-!-dtd-system4
+ #.state-!-dtd-system ;; this is actually a !NOTATION
+ #.state-dtd-?-4 ;; PI
+ #.state-dtd-comment4 ;; comment
+ )
+ (let ((ret (append (list tag-to-return) (nreverse contents-to-return))))
+ (values ret
+ nil)))
+ #+ignore
+ (#.state-dtd-pref2
+ (values (nreverse contents-to-return) nil))
+ (#.state-dtd-!-include2
+ (values nil :include))
+ (#.state-dtd-!-include4
+ (values nil :include-end))
+ (#.state-dtd-!-ignore7
+ (values nil :ignore))
+ (:eof
+ (if* (not external) then
+ (xml-error "unexpected end of input while processing DTD internal subset")
+ elseif (or (> include-count 0) (not (eq prev-state state-dtdstart))) then
+ (xml-error "unexpected end of input while processing external DTD"))
+ (values nil :end-dtd))
+ (t
+ (print (list tag-to-return contents-to-return))
+ (error "need to support dtd <post> state:~s" state)))
+ )
+ ))
+
+(defun external-param-reference (tokenbuf old-coll external-callback)
+ (declare (:fbound next-token) (ignorable old-coll) (optimize (speed 3) (safety 1)))
+ (setf (iostruct-seen-parameter-reference tokenbuf) t)
+ (macrolet ((add-to-entity-buf (entity-symbol p-value)
+ `(progn
+ (push (make-tokenbuf :cur 0 :max (length ,p-value) :data ,p-value)
+ (iostruct-entity-bufs tokenbuf))))
+ (clear-coll (coll)
+ `(setf (collector-next ,coll) 0))
+ (un-next-char (ch)
+ `(push ,ch (iostruct-unget-char tokenbuf)))
+ (add-to-coll (coll ch)
+ `(let ((.next. (collector-next ,coll)))
+ (if* (>= .next. (collector-max ,coll))
+ then (grow-and-add ,coll ,ch)
+ else (setf (schar (collector-data ,coll) .next.)
+ ,ch)
+ (setf (collector-next ,coll) (1+ .next.))))))
+ (let ((ch (get-next-char tokenbuf))
+ (coll (get-collector))
+ p-value entity-symbol)
+ (add-to-coll coll ch)
+ (when (not (xml-name-start-char-p ch))
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "Illegal DTD parameter entity name starting at: "
+ (compute-coll-string coll))))
+ (loop
+ (setf ch (get-next-char tokenbuf))
+ (if* (eq #\; ch) then
+ (setf entity-symbol (compute-tag coll))
+ (clear-coll coll)
+ #+ignore (format t "entity symbol: ~s entities: ~s match: ~s~%"
+ entity-symbol (iostruct-parameter-entities tokenbuf)
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))
+ (if* (and (iostruct-do-entity tokenbuf)
+ (setf p-value
+ (assoc entity-symbol
+ (iostruct-parameter-entities tokenbuf)))) then
+ (setf p-value (rest p-value))
+ (when (member entity-symbol (iostruct-entity-names tokenbuf))
+ (xml-error (concatenate 'string
+ "entity:"
+ (string entity-symbol)
+ " in recursive reference")))
+ (push entity-symbol (iostruct-entity-names tokenbuf))
+ (if* (stringp p-value) then
+ (setf p-value (concatenate 'string " " p-value " "))
+ (add-to-entity-buf entity-symbol p-value)
+ elseif (null external-callback) then
+ (setf (iostruct-do-entity tokenbuf) nil)
+ elseif p-value then
+ (let ((entity-stream (apply external-callback p-value)))
+ (when entity-stream
+ (let ((entity-buf (get-tokenbuf)))
+ (setf (tokenbuf-stream entity-buf) entity-stream)
+ (unicode-check entity-stream tokenbuf)
+ (add-to-entity-buf entity-symbol " ")
+ (push entity-buf
+ (iostruct-entity-bufs tokenbuf))
+ (let ((count 0) cch
+ (string "<?xml "))
+ (if* (dotimes (i (length string) t)
+ (setf cch (get-next-char tokenbuf))
+ (when (and (= i 5)
+ (xml-space-p cch))
+ (setf cch #\space))
+ (when (not (eq cch
+ (schar string count)))
+ (return nil))
+ (incf count)) then
+ (setf count 5)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))
+ ;; swallow <?xml token
+ (next-token tokenbuf external-callback nil)
+ else
+ (un-next-char cch)
+ (decf count)
+ (loop
+ (when (< count 0) (return))
+ (un-next-char (schar string count))
+ (decf count))))
+ (push #\space (iostruct-unget-char tokenbuf))
+ )
+ )))
+ else (xml-error
+ (concatenate 'string
+ (string entity-symbol)
+ " parameter entity referenced but not declared"))
+ )
+ (put-back-collector coll)
+ (return)
+ elseif (xml-name-char-p ch) then (add-to-coll coll ch)
+ else
+ (dotimes (i 15)
+ (add-to-coll coll ch)
+ (setq ch (get-next-char tokenbuf))
+ (if* (null ch)
+ then (return)))
+ (xml-error (concatenate 'string
+ "Illegal DTD parameter entity name stating at: "
+ (compute-coll-string coll))))))))
+
+