2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose. See the GNU
13 ;; Lesser General Public License for more details.
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA 02111-1307 USA
22 ;; $Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
26 ;; 10/14/00 add namespace support
28 (in-package :net.xml.parser)
30 (pxml-dribble-bug-hook "$Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
32 ;; state titles can be better chosen and explained
34 (defvar *debug-xml* nil)
36 (defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities
37 content-only uri-to-package)
38 (declare (optimize (speed 3) (safety 1)))
39 (parse-xml (make-string-input-stream str) :external-callback external-callback
40 :general-entities general-entities
41 :parameter-entities parameter-entities :content-only content-only
42 :uri-to-package uri-to-package))
44 (defmethod parse-xml ((p stream) &key external-callback general-entities
45 parameter-entities content-only uri-to-package)
46 (declare (optimize (speed 3) (safety 1)))
47 (pxml-internal0 p nil external-callback general-entities parameter-entities content-only
50 (eval-when (compile load eval)
51 (defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element
52 (defconstant state-docstart-misc 1) ;; looking for Misc, doctypedecl, 1st element
53 (defconstant state-docstart-misc2 2) ;; looking for Misc, 1st element
54 (defconstant state-element-done 3) ;; looking for Misc
55 (defconstant state-element-contents 4) ;; looking for element content
58 (defun all-xml-whitespace-p (val)
59 (dotimes (i (length val) t)
60 (when (not (xml-space-p (elt val i))) (return nil))))
62 (defun pxml-internal0 (p read-sequence-func external-callback
63 general-entities parameter-entities content-only uri-to-package)
64 (declare (optimize (speed 3) (safety 1)))
65 (let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf)
67 :read-sequence-func read-sequence-func)))
68 ;; set up stream right
69 (setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p)
70 ;; set up user specified entities
71 (setf (iostruct-parameter-entities tokenbuf) parameter-entities)
72 (setf (iostruct-general-entities tokenbuf) general-entities)
73 (setf (iostruct-uri-to-package tokenbuf) uri-to-package)
74 ;; look for Unicode file
75 (unicode-check p tokenbuf)
77 (values (pxml-internal tokenbuf external-callback content-only)
78 (iostruct-uri-to-package tokenbuf))
79 (dolist (entity-buf (iostruct-entity-bufs tokenbuf))
80 (when (streamp (tokenbuf-stream entity-buf))
81 (close (tokenbuf-stream entity-buf))
82 (put-back-tokenbuf entity-buf))))
85 (defun pxml-internal (tokenbuf external-callback content-only)
86 (declare (optimize (speed 3) (safety 1)))
87 (let ((state state-docstart)
97 (multiple-value-bind (val kind kind2)
98 (next-token tokenbuf external-callback attlist-data)
100 (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
103 (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
105 (check-xmldecl val tokenbuf)
106 (when (not content-only) (push val guts))
107 (setf state state-docstart-misc)
108 elseif (eq kind :comment)
110 (when (not content-only) (push val guts))
111 (setf state state-docstart-misc)
112 elseif (and (listp val) (eq :DOCTYPE (first val)))
114 (if* (eq (third val) :SYSTEM) then
115 (setf system-string (fourth val))
116 (setf val (remove (third val) val))
117 (setf val (remove (third val) val))
118 elseif (eq (third val) :PUBLIC) then
119 (setf public-string (normalize-public-value (fourth val)))
120 (setf system-string (fifth val))
121 (setf val (remove (third val) val))
122 (setf val (remove (third val) val))
123 (setf val (remove (third val) val)))
125 (if* external-callback then
126 (let ((ext-stream (apply external-callback
127 (list (parse-uri system-string)
132 (let (ext-io (entity-buf (get-tokenbuf)))
133 (setf (tokenbuf-stream entity-buf) ext-stream)
134 (setf ext-io (make-iostruct :tokenbuf entity-buf
136 (iostruct-do-entity tokenbuf)
138 (iostruct-read-sequence-func tokenbuf)))
139 (unicode-check ext-stream ext-io)
140 (setf (iostruct-parameter-entities ext-io)
141 (iostruct-parameter-entities tokenbuf))
142 (setf (iostruct-general-entities ext-io)
143 (iostruct-general-entities tokenbuf))
145 (setf val (append val
150 t external-callback)))))
151 (setf (iostruct-seen-any-dtd tokenbuf) t)
152 (setf (iostruct-seen-external-dtd tokenbuf) t)
153 (setf (iostruct-seen-parameter-reference tokenbuf)
154 (iostruct-seen-parameter-reference ext-io))
155 (setf (iostruct-general-entities tokenbuf)
156 (iostruct-general-entities ext-io))
157 (setf (iostruct-parameter-entities tokenbuf)
158 (iostruct-parameter-entities ext-io))
159 (setf (iostruct-do-entity tokenbuf)
160 (iostruct-do-entity ext-io))
161 (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
162 (when (streamp (tokenbuf-stream entity-buf2))
163 (close (tokenbuf-stream entity-buf2))
164 (put-back-tokenbuf entity-buf2)))
165 (close (tokenbuf-stream entity-buf))
166 (put-back-tokenbuf entity-buf))
169 (setf (iostruct-do-entity tokenbuf) nil)))
171 (process-attlist (rest (rest val)) attlist-data))
172 (when (not content-only) (push val guts))
173 (setf state state-docstart-misc2)
177 (setf state state-docstart-misc)
178 elseif (eq kind :pcdata)
180 (when (or (not kind2) (not (all-xml-whitespace-p val)))
181 (if* (not kind2) then
182 (xml-error "An entity reference occured where only whitespace or the first element may occur")
184 (xml-error (concatenate 'string
185 "unrecognized content '"
186 (subseq val 0 (min (length val) 40)) "'"))))
187 (setf state state-docstart-misc)
188 elseif (or (symbolp val)
189 (and (listp val) (symbolp (first val))))
191 (when (eq kind :start-tag)
192 (setf val (add-default-values val attlist-data)))
193 (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
194 then (push (list val) guts)
195 (setf state state-element-done)
196 elseif (eq kind :start-tag)
197 then (push (list val) pending)
198 ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
199 (when (iostruct-entity-bufs tokenbuf)
200 (push (if (symbolp val) val (first val)) entity-open-tags))
201 (setf state state-element-contents)
202 else (xml-error (concatenate 'string
203 "encountered token at illegal syntax position: '"
205 (if* (null guts) then
206 " at start of contents"
210 (format nil "~s" (first guts))
213 (print (list val kind kind2))
214 (break "need to check for other allowable docstarts")))
215 (#.state-docstart-misc2
216 (if* (eq kind :pcdata)
218 (when (or (not kind2) (not (all-xml-whitespace-p val)))
219 (if* (not kind2) then
220 (xml-error "An entity reference occured where only whitespace or the first element may occur")
222 (xml-error (concatenate 'string
223 "unrecognized content '"
224 (subseq val 0 (min (length val) 40)) "'"))))
225 elseif (and (listp val) (eq :comment (first val)))
227 (when (not content-only) (push val guts))
231 elseif (eq kind :eof)
233 (xml-error "unexpected end of file encountered")
234 elseif (or (symbolp val)
235 (and (listp val) (symbolp (first val))))
237 (when (eq kind :start-tag)
238 (setf val (add-default-values val attlist-data)))
239 (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
240 then (push (list val) guts)
241 (setf state state-element-done)
242 elseif (eq kind :start-tag)
243 then (push (list val) pending)
244 ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
245 (when (iostruct-entity-bufs tokenbuf)
246 (push (if (symbolp val) val (first val)) entity-open-tags))
247 (setf state state-element-contents)
248 else (xml-error (concatenate 'string
249 "encountered token at illegal syntax position: '"
251 (if* (null guts) then
252 " at start of contents"
256 (format nil "~s" (first guts))
259 (error "this branch unexpected <1>")))
260 (#.state-docstart-misc
261 (if* (eq kind :pcdata)
263 (when (or (not kind2) (not (all-xml-whitespace-p val)))
264 (if* (not kind2) then
265 (xml-error "An entity reference occured where only whitespace or the first element may occur")
267 (xml-error (concatenate 'string
268 "unrecognized content '"
269 (subseq val 0 (min (length val) 40)) "'"))))
270 elseif (and (listp val) (eq :DOCTYPE (first val)))
272 (if* (eq (third val) :SYSTEM) then
273 (setf system-string (fourth val))
274 (setf val (remove (third val) val))
275 (setf val (remove (third val) val))
276 elseif (eq (third val) :PUBLIC) then
277 (setf public-string (normalize-public-value (fourth val)))
278 (setf system-string (fifth val))
279 (setf val (remove (third val) val))
280 (setf val (remove (third val) val))
281 (setf val (remove (third val) val)))
283 (if* external-callback then
284 (let ((ext-stream (apply external-callback
285 (list (parse-uri system-string)
290 (let (ext-io (entity-buf (get-tokenbuf)))
291 (setf (tokenbuf-stream entity-buf) ext-stream)
292 (setf ext-io (make-iostruct :tokenbuf entity-buf
294 (iostruct-do-entity tokenbuf)
296 (iostruct-read-sequence-func tokenbuf)))
297 (unicode-check ext-stream ext-io)
298 (setf (iostruct-parameter-entities ext-io)
299 (iostruct-parameter-entities tokenbuf))
300 (setf (iostruct-general-entities ext-io)
301 (iostruct-general-entities tokenbuf))
303 (setf val (append val
308 t external-callback)))))
309 (setf (iostruct-seen-any-dtd tokenbuf) t)
310 (setf (iostruct-seen-external-dtd tokenbuf) t)
311 (setf (iostruct-seen-parameter-reference tokenbuf)
312 (iostruct-seen-parameter-reference ext-io))
313 (setf (iostruct-general-entities tokenbuf)
314 (iostruct-general-entities ext-io))
315 (setf (iostruct-parameter-entities tokenbuf)
316 (iostruct-parameter-entities ext-io))
317 (setf (iostruct-do-entity tokenbuf)
318 (iostruct-do-entity ext-io))
319 (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
320 (when (streamp (tokenbuf-stream entity-buf2))
321 (close (tokenbuf-stream entity-buf2))
322 (put-back-tokenbuf entity-buf2)))
323 (close (tokenbuf-stream entity-buf))
324 (put-back-tokenbuf entity-buf))
327 (setf (iostruct-do-entity tokenbuf) nil)))
329 (process-attlist (rest (rest val)) attlist-data))
330 (when (not content-only) (push val guts))
331 (setf state state-docstart-misc2)
332 elseif (and (listp val) (eq :comment (first val)))
334 (when (not content-only) (push val guts))
338 elseif (or (symbolp val)
339 (and (listp val) (symbolp (first val))))
341 (when (eq kind :start-tag)
342 (setf val (add-default-values val attlist-data)))
343 (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
344 then (push (list val) guts)
345 (setf state state-element-done)
346 elseif (eq kind :start-tag)
347 then (push (list val) pending)
348 ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
349 (when (iostruct-entity-bufs tokenbuf)
350 (push (if (symbolp val) val (first val)) entity-open-tags))
351 (setf state state-element-contents)
352 else (xml-error (concatenate 'string
353 "encountered token at illegal syntax position: '"
357 (format nil "~s" (first guts))
360 (print (list val kind kind2))
361 (break "check for other docstart-misc states")))
362 (#.state-element-contents
363 (if* (or (symbolp val)
364 (and (listp val) (symbolp (first val))))
366 (when (eq kind :start-tag)
367 (setf val (add-default-values val attlist-data)))
368 (if* (eq kind :end-tag)
369 then (let ((candidate (first (first pending))))
370 (when (listp candidate) (setf candidate (first candidate)))
371 (if* (eq candidate val)
373 (if* (iostruct-entity-bufs tokenbuf) then
374 (when (not (eq (first entity-open-tags) val))
378 " element closed in entity that did not open it")))
379 (setf entity-open-tags (rest entity-open-tags))
381 (when (eq (first entity-open-tags) val)
385 " element closed outside of entity that did not open it")))
387 (if* (= (length pending) 1)
389 (push (first pending) guts)
390 (setf state state-element-done)
392 (setf (second pending)
393 (append (second pending) (list (first pending)))))
394 (setf pending (rest pending))
395 ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
396 else (xml-error (format nil
397 "encountered end tag: ~s expected: ~s"
399 elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
401 (setf (first pending)
402 (append (first pending) (list (list val))))
403 ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
404 elseif (eq kind :start-tag)
406 (push (list val) pending)
407 ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
408 (when (iostruct-entity-bufs tokenbuf)
409 (push (if (symbolp val) val (first val)) entity-open-tags))
410 elseif (eq kind :cdata) then
411 (setf (first pending)
412 (append (first pending) (rest val)))
413 (let ((old (first pending))
416 (if* (and (stringp (first new)) (stringp item)) then
418 (concatenate 'string (first new) item))
419 else (push item new)))
420 (setf (first pending) (reverse new)))
421 elseif (eq kind :comment) then
422 (when (not content-only) (push val guts))
425 (setf (first pending)
426 (append (first pending) (list val)))
427 elseif (eq kind :eof)
429 (xml-error "unexpected end of file encountered")
430 else (xml-error (format nil "unexpected token: ~s" val)))
431 elseif (eq kind :pcdata)
433 (setf (first pending)
434 (append (first pending) (list val)))
435 (let ((old (first pending))
438 (if* (and (stringp (first new)) (stringp item)) then
440 (concatenate 'string (first new) item))
441 else (push item new)))
442 (setf (first pending) (reverse new)))
443 else (xml-error (format nil "unexpected token: ~s" val))))
444 (#.state-element-done
445 (if* (eq kind :pcdata)
447 (when (or (not kind2) (not (all-xml-whitespace-p val)))
448 (if* (not kind2) then
449 (xml-error "An entity reference occured where only whitespace or the first element may occur")
451 (xml-error (concatenate 'string
452 "unrecognized content '"
453 (subseq val 0 (min (length val) 40)) "'"))))
454 elseif (eq kind :eof) then
455 (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
456 (return (nreverse guts))
457 elseif (eq kind :comment) then
458 (when (not content-only) (push val guts))
462 (xml-error (concatenate 'string
463 "encountered token at illegal syntax position: '"
467 (format nil "~s" (first guts))
471 (error "need to support state:~s token:~s kind:~s kind2:~s <parse>" state val kind kind2)))
474 (eval-when (compile load eval)
475 (defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data
476 (defconstant state-readtagfirst 1) ;; seen < looking for /,?,!,name start
477 (defconstant state-readtag-? 2) ;; seen <? looking for space,char
478 (defconstant state-readtag-! 3) ;; seen <! looking for name,[,-
479 (defconstant state-readtag-end 4) ;; found </ looking for tag name
480 (defconstant state-readtag 5) ;; found < name start looking for more name, /, >
481 (defconstant state-findattributename 6) ;; found <?xml space looking for ?,>,space,name start
482 (defconstant state-readpi 7)
483 (defconstant state-noattributename 8)
484 (defconstant state-attribname 9) ;; found <?xml space name start looking for more name,=
485 (defconstant state-attribstartvalue 10) ;; found <?xml space name= looking for ',"
486 (defconstant state-attribvaluedelim 11)
487 (defconstant state-readtag-!-name 12) ;; seen <!name(start) looking for more name chars or space
488 (defconstant state-readtag-!-conditional 13) ;; found <![ looking for CDATA, INCLUDE, IGNORE
489 (defconstant state-readtag-!-comment 14)
490 (defconstant state-readtag-!-readcomment 15)
491 (defconstant state-readtag-!-readcomment2 16)
492 (defconstant state-readtag-end-bracket 17)
493 (defconstant state-readpi2 18) ;; found <?name space char looking for char,?
494 (defconstant state-prereadpi 19);; found <?name space looking for space,character
495 (defconstant state-pre-!-contents 20) ;; found <!name space looking for > or contents
496 (defconstant state-!-contents 21) ;; found <!name space name start looking for more name,>,[,space
497 (defconstant state-!-doctype 22) ;; found <!DOCTYPE space looking for space,>,[,name
498 (defconstant state-begin-dtd 23)
499 (defconstant state-!-doctype-ext 24) ;; found <!DOCTYPE space name space name start looking for name,space
500 (defconstant state-!-doctype-system 25) ;; found <!DOCTYPE name SYSTEM looking for ',"
501 (defconstant state-!-doctype-public 26) ;; found <!DOCTYPE name PUBLIC looking for ',"
502 (defconstant state-!-doctype-system2 27) ;; found <!DOCTYPE name SYSTEM " looking for chars,"
503 (defconstant state-!-doctype-system3 28) ;; found <!DOCTYPE name SYSTEM ' looking for chars,'
504 (defconstant state-!-doctype-ext2 29) ;; found <!DOCTYPE name SYSTEM/PUBLIC etc. looking for space,>,[
505 (defconstant state-!-doctype-ext3 30) ;; processed DTD looking for space,>
506 (defconstant state-!-doctype-public2 31) ;; found <!DOCTYPE name PUBLIC " looking for text or "
507 (defconstant state-!-doctype-public3 32) ;; found <!DOCTYPE name PUBLIC ' looking for text or '
508 (defconstant state-readtag2 33) ;; found <name looking for space,/,>,attrib name
509 (defconstant state-readtag3 34) ;; found <name/ or <name / looking for >
510 (defconstant state-readtag4 35) ;; found <name attrib-name start looking for more name,=
511 (defconstant state-readtag5 36) ;; found attrib= looking for ',"
512 (defconstant state-readtag6 37) ;; found attrib=['"] looking for end delimiter,value,reference
513 (defconstant state-readtag7 38) ;; found & inside attribute value, looking for # or name start
514 (defconstant state-readtag8 39) ;; found &# in attribute value looking for char code
515 (defconstant state-readtag9 40) ;; found &name start looking for more name,;
516 (defconstant state-readtag10 41) ;; found &#x in attribute value looking for hex code
517 (defconstant state-readtag11 42) ;; found &#[0-9] looking for more digits,;
518 (defconstant state-readtag-end2 43) ;; found </ & tag name start looking for more tag, space, >
519 (defconstant state-readtag-end3 44) ;; found </ end tag name space looking for >
520 (defconstant state-pcdata2 45) ;; seen & looking for name start
521 (defconstant state-pcdata3 46) ;; seen &# looking for character reference code
522 (defconstant state-pcdata4 47) ;; working on entity reference name looking for ;
523 (defconstant state-pcdata5 48) ;; working on hex character code reference
524 (defconstant state-pcdata6 49) ;; working on decimal character code reference
525 (defconstant state-findattributename0 50)
526 (defconstant state-readtag6a 51)
527 (defconstant state-readtag-!-conditional4 52)
528 (defconstant state-readtag-!-conditional5 53)
529 (defconstant state-readtag-!-conditional6 54)
530 (defconstant state-readtag-!-conditional7 55)
531 ;;(defconstant state-pcdata-parsed 56)
532 (defconstant state-pcdata7 57)
533 (defconstant state-pcdata8 58)
534 (defconstant state-readtag12 59)
535 (defconstant state-attribname2 60)
538 (defun next-token (tokenbuf external-callback attlist-data)
539 (declare (optimize (speed 3) (safety 1)))
540 ;; return two values:
541 ;; the next token from the stream.
544 ;; if read-sequence-func is non-nil,
545 ;; read-sequence-func is called to fetch the next character
546 (macrolet ((add-to-entity-buf (entity-symbol p-value)
548 (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
549 (iostruct-entity-bufs tokenbuf))))
552 `(push ,ch (iostruct-unget-char tokenbuf)))
555 `(setf (collector-next ,coll) 0))
557 (add-to-coll (coll ch)
558 `(let ((.next. (collector-next ,coll)))
559 (if* (>= .next. (collector-max ,coll))
560 then (grow-and-add ,coll ,ch)
561 else (setf (schar (collector-data ,coll) .next.)
563 (setf (collector-next ,coll) (1+ .next.)))))
565 (to-preferred-case (ch)
566 ;; should check the case mode
567 `(char-downcase ,ch))
571 (let ((state state-pcdata)
572 (coll (get-collector))
573 (entity (get-collector))
575 (tag-to-return-string)
583 (special-tag-count 0)
584 (attrib-value-tokenbuf)
594 (setq ch (get-next-char tokenbuf))
595 (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
596 ch (char-code ch) state (iostruct-entity-names tokenbuf)))
598 then (return) ; eof -- exit loop
606 (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
607 (if* (> (collector-next coll) 0)
608 then ; have collected something, return this string
609 (un-next-char ch) ; push back the <
612 (setq state state-readtagfirst))
614 then (setf state state-pcdata2)
615 (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
617 elseif (eq #\] ch) then (setf state state-pcdata7)
618 elseif (not (xml-char-p ch)) then
619 (xml-error (concatenate 'string
620 "Illegal character: "
622 " detected in input"))
624 (add-to-coll coll ch)
626 (if* (not (eq ch #\return))
627 then (add-to-coll coll ch))))
630 (if* (eq #\] ch) then (setf state state-pcdata8)
631 else (setf state state-pcdata)
632 (add-to-coll coll #\]) (un-next-char ch)))
635 (if* (eq #\> ch) then
636 (add-to-coll coll #\])
637 (add-to-coll coll #\])
638 (add-to-coll coll #\>)
640 (add-to-coll coll ch)
641 (setq ch (get-next-char tokenbuf))
644 (xml-error (concatenate 'string
645 "content cannot contain ']]>':'"
646 (compute-coll-string coll)
648 elseif (eq #\] ch) then
649 (add-to-coll coll #\])
650 else (setf state state-pcdata)
651 (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
655 then (setf state state-pcdata3)
656 elseif (xml-name-start-char-p ch)
657 then (setf state state-pcdata4)
659 else (clear-coll coll)
661 (add-to-coll coll ch)
662 (setq ch (get-next-char tokenbuf))
665 (xml-error (concatenate 'string
666 "illegal reference name, starting at: '&"
667 (compute-coll-string coll)
673 then (setf state state-pcdata5)
674 elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
675 then (setf state state-pcdata6)
677 else (clear-coll coll)
679 (add-to-coll coll ch)
680 (setq ch (get-next-char tokenbuf))
683 (xml-error (concatenate 'string
684 "illegal character reference code, starting at: '&#"
685 (compute-coll-string coll)
690 (if* (xml-name-char-p ch)
691 then (add-to-coll entity ch)
693 then (let ((entity-symbol (compute-tag entity)))
695 (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
698 (string entity-symbol)
699 " reference cannot be constructed from entity reference/character data sequence"))
701 (setf entity-source nil))
702 (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
703 elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
704 elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
705 elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
706 elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
709 (if* (and (iostruct-do-entity tokenbuf)
712 (iostruct-general-entities tokenbuf)))) then
713 (setf p-value (rest p-value))
714 (when (member entity-symbol (iostruct-entity-names tokenbuf))
715 (xml-error (concatenate 'string
717 (string entity-symbol)
718 " in recursive reference")))
719 (push entity-symbol (iostruct-entity-names tokenbuf))
720 (if* (stringp p-value) then
721 (add-to-entity-buf entity-symbol p-value)
722 elseif (null external-callback) then
723 (setf (iostruct-do-entity tokenbuf) nil)
725 (let ((entity-stream (apply external-callback p-value)))
726 (if* entity-stream then
727 (let ((entity-buf (get-tokenbuf)))
728 (setf (tokenbuf-stream entity-buf) entity-stream)
729 (unicode-check entity-stream tokenbuf)
731 (iostruct-entity-bufs tokenbuf))
732 ;; check for possible external textdecl
735 (if* (dotimes (i (length string) t)
736 (setf cch (get-next-char tokenbuf))
741 (schar string count)))
746 (when (< count 0) (return))
747 (un-next-char (schar string count))
749 ;; swallow <?xml token
757 (when (< count 0) (return))
758 (un-next-char (schar string count))
762 (xml-error (concatenate 'string
763 "Reference to unparsed entity "
764 (string entity-symbol)))
767 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
768 (iostruct-standalonep tokenbuf)
769 (and (iostruct-seen-any-dtd tokenbuf)
770 (not (iostruct-seen-external-dtd tokenbuf))
771 (not (iostruct-seen-parameter-reference tokenbuf))))
773 (xml-error (concatenate 'string
774 (string entity-symbol)
775 " must have entity declaration before being referenced"))
778 (setq state state-pcdata)
779 else (let ((tmp (compute-coll-string entity)))
782 (add-to-coll coll ch)
783 (setq ch (get-next-char tokenbuf))
786 (xml-error (concatenate 'string
787 "reference not terminated by ';', starting at: '&"
789 (compute-coll-string coll)
794 (let ((code (char-code ch)))
796 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
799 (string (code-char char-code))
800 " reference cannot be constructed from entity reference/character data sequence"))
802 (setf entity-source nil))
803 (when (not (xml-char-p (code-char char-code)))
806 "Character reference: "
807 (format nil "~s" char-code)
808 " (decimal) is not valid XML input character")))
809 (add-to-coll coll (code-char char-code))
811 (setq state state-pcdata)
812 elseif (<= (char-code #\0) code (char-code #\9))
813 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
814 elseif (<= (char-code #\A) code (char-code #\F))
815 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
816 elseif (<= (char-code #\a) code (char-code #\f))
817 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
818 else (clear-coll coll)
820 (add-to-coll coll ch)
821 (setq ch (get-next-char tokenbuf))
824 (xml-error (concatenate 'string
825 "illegal hexidecimal character reference code, starting at: '"
826 (compute-coll-string coll)
827 "', calculated char code: "
828 (format nil "~s" char-code)))
832 (let ((code (char-code ch)))
834 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
837 (string (code-char char-code))
838 " reference cannot be constructed from entity reference/character data sequence"))
840 (setf entity-source nil))
841 (when (not (xml-char-p (code-char char-code)))
844 "Character reference: "
845 (format nil "~s" char-code)
846 " (decimal) is not valid XML input character")))
847 (add-to-coll coll (code-char char-code))
849 (setq state state-pcdata)
850 elseif (<= (char-code #\0) code (char-code #\9))
851 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
852 else (clear-coll coll)
854 (add-to-coll coll ch)
855 (setq ch (get-next-char tokenbuf))
858 (xml-error (concatenate 'string
859 "illegal decimal character reference code, starting at: '"
860 (compute-coll-string coll)
861 "', calculated char code: "
862 (format nil "~s" char-code)))
866 (if* (xml-name-start-char-p ch)
867 then (setf state state-readtag-end2)
869 else (clear-coll coll)
871 (add-to-coll coll ch)
872 (setq ch (get-next-char tokenbuf))
875 (xml-error (concatenate 'string
876 "illegal end tag name, starting at: '</"
877 (compute-coll-string coll)
881 (#.state-readtag-end2
882 (if* (xml-name-char-p ch)
883 then (add-to-coll coll ch)
884 elseif (eq #\> ch) then
885 (let ((tag-string (compute-coll-string coll)))
886 (when (and (iostruct-ns-scope tokenbuf)
888 (first (first (iostruct-ns-scope tokenbuf)))))
889 (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
890 (setf (iostruct-ns-to-package tokenbuf)
891 (remove (assoc item (iostruct-ns-to-package tokenbuf))
892 (iostruct-ns-to-package tokenbuf))))
893 (setf (iostruct-ns-scope tokenbuf)
894 (rest (iostruct-ns-scope tokenbuf)))))
895 (setq tag-to-return (compute-tag coll *package*
896 (iostruct-ns-to-package tokenbuf)))
898 elseif (xml-space-p ch) then (setf state state-readtag-end3)
899 (let ((tag-string (compute-coll-string coll)))
900 (when (and (iostruct-ns-scope tokenbuf)
902 (first (first (iostruct-ns-scope tokenbuf)))))
903 (setf (iostruct-ns-scope tokenbuf)
904 (rest (iostruct-ns-scope tokenbuf)))))
905 (setq tag-to-return (compute-tag coll *package*
906 (iostruct-ns-to-package tokenbuf)))
907 else (let ((tmp (compute-coll-string coll)))
910 (add-to-coll coll ch)
911 (setq ch (get-next-char tokenbuf))
914 (xml-error (concatenate 'string
915 "illegal end tag name, starting at: '</"
917 (compute-coll-string coll)
921 (#.state-readtag-end3
922 (if* (xml-space-p ch) then nil
923 elseif (eq #\> ch) then (return)
924 else (let ((tmp (compute-coll-string coll)))
927 (add-to-coll coll ch)
928 (setq ch (get-next-char tokenbuf))
931 (xml-error (concatenate 'string
932 "illegal end tag name, starting at: '"
933 (compute-coll-string coll)
934 "' end tag name: " tmp )))
937 (#.state-readtagfirst
938 ; starting to read a tag name
940 then (setf state state-readtag-end)
942 then (setf state state-readtag-?)
943 (setf empty-delim #\?)
945 then (setf state state-readtag-!)
946 (setf empty-delim nil)
947 elseif (xml-name-start-char-p ch)
948 then (setf state state-readtag)
949 (setf empty-delim #\/)
951 else (clear-coll coll)
953 (add-to-coll coll ch)
954 (setq ch (get-next-char tokenbuf))
957 (xml-error (concatenate 'string
958 "illegal character following '<', starting at '"
959 (compute-coll-string coll)
964 (if* (xml-name-start-char-p ch)
966 (setf state state-readtag-!-name)
970 (setf state state-readtag-!-conditional)
973 (setf state state-readtag-!-comment)
977 (add-to-coll coll ch)
978 (setq ch (get-next-char tokenbuf))
981 (xml-error (concatenate 'string
982 "illegal character following '<!', starting at '<!"
983 (compute-coll-string coll)
987 (#.state-readtag-!-conditional
988 (if* (eq #\C ch) then
989 (setf state state-readtag-!-conditional4)
990 (setf special-tag-count 1)
991 else (clear-coll coll)
993 (add-to-coll coll ch)
994 (setq ch (get-next-char tokenbuf))
997 (xml-error (concatenate 'string
998 "illegal character following '<![', starting at '<!["
999 (compute-coll-string coll)
1003 (#.state-readtag-!-conditional4
1004 (if* (not (eq (elt "CDATA[" special-tag-count) ch))
1005 then (clear-coll coll)
1007 (add-to-coll coll ch)
1008 (setq ch (get-next-char tokenbuf))
1011 (xml-error (concatenate 'string
1012 "illegal token following '<![', starting at '<!["
1013 (subseq "CDATA[" 0 special-tag-count)
1014 (compute-coll-string coll)
1016 elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
1017 else (incf special-tag-count)))
1019 (#.state-readtag-!-conditional5
1021 then (setf state state-readtag-!-conditional6)
1022 elseif (not (xml-char-p ch)) then
1023 (xml-error (concatenate 'string
1024 "Illegal character: "
1026 " detected in CDATA input"))
1027 else (add-to-coll coll ch)))
1029 (#.state-readtag-!-conditional6
1031 then (setf state state-readtag-!-conditional7)
1032 else (setf state state-readtag-!-conditional5)
1033 (add-to-coll coll #\])
1034 (add-to-coll coll ch)))
1036 (#.state-readtag-!-conditional7
1039 (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1041 "CDATA cannot be constructed from entity reference/character data sequence")
1043 (setf entity-source nil))
1045 elseif (eq #\] ch) then
1046 (add-to-coll coll #\]) ;; come back here to check again
1047 else (setf state state-readtag-!-conditional5)
1048 (add-to-coll coll #\])
1049 (add-to-coll coll #\])
1050 (add-to-coll coll ch)))
1052 (#.state-readtag-!-comment
1054 then (setf state state-readtag-!-readcomment)
1055 (setf tag-to-return :comment)
1056 else (clear-coll coll)
1058 (add-to-coll coll ch)
1059 (setq ch (get-next-char tokenbuf))
1062 (xml-error (concatenate 'string
1063 "illegal token following '<![-', starting at '<!-"
1064 (compute-coll-string coll)
1068 (#.state-readtag-!-readcomment
1070 then (setf state state-readtag-!-readcomment2)
1071 elseif (not (xml-char-p ch)) then
1072 (xml-error (concatenate 'string
1073 "Illegal character: "
1075 " detected in input"))
1076 else (add-to-coll coll ch)))
1078 (#.state-readtag-!-readcomment2
1080 then (setf state state-readtag-end-bracket)
1081 else (setf state state-readtag-!-readcomment)
1082 (add-to-coll coll #\-) (add-to-coll coll ch)))
1084 (#.state-readtag-end-bracket
1086 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1088 (concatenate 'string
1089 (string tag-to-return)
1090 " tag cannot be constructed from entity reference/character data sequence"))
1092 (setf entity-source nil))
1094 else (clear-coll coll)
1096 (add-to-coll coll ch)
1097 (setq ch (get-next-char tokenbuf))
1100 (xml-error (concatenate 'string
1101 "illegal token following '--' comment terminator, starting at '--"
1102 (compute-coll-string coll)
1107 (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1109 (add-to-coll coll ch)
1111 (if* (xml-space-p ch) then
1112 (setf tag-to-return-string (compute-coll-string coll))
1114 (compute-tag coll *package*
1115 (iostruct-ns-to-package tokenbuf)))
1117 (setf state state-readtag2)
1118 elseif (eq #\> ch) then
1120 (compute-tag coll *package*
1121 (iostruct-ns-to-package tokenbuf)))
1124 elseif (eq #\/ ch) then
1126 (compute-tag coll *package*
1127 (iostruct-ns-to-package tokenbuf)))
1129 (setf state state-readtag3)
1130 else (dotimes (i 15)
1131 (add-to-coll coll ch)
1132 (setq ch (get-next-char tokenbuf))
1136 (concatenate 'string
1137 "illegal token name, starting at '"
1138 (compute-coll-string coll)
1143 (if* (xml-space-p ch) then nil
1144 elseif (eq #\> ch) then (return)
1145 elseif (eq #\/ ch) then (setf state state-readtag3)
1146 elseif (xml-name-start-char-p ch) then
1148 (setf state state-readtag4)
1149 else (clear-coll coll)
1151 (add-to-coll coll ch)
1152 (setq ch (get-next-char tokenbuf))
1156 (concatenate 'string
1157 "illegal token, starting at '"
1158 (compute-coll-string coll)
1159 "' following element token start: " (string tag-to-return)))
1163 (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1165 (add-to-coll coll ch)
1166 elseif (eq #\= ch) then
1167 (setq attrib-name (compute-tag coll *package*
1168 (iostruct-ns-to-package tokenbuf)))
1170 (let ((name (symbol-name attrib-name)))
1171 (when (and (>= (length name) 5)
1172 (string= name "xmlns" :end1 5))
1173 (if* (= (length name) 5)
1175 (setf ns-token :none)
1176 elseif (eq (schar name 5) #\:)
1178 (setf ns-token (subseq name 6)))))
1179 (setf state state-readtag5)
1180 elseif (xml-space-p ch) then
1181 (setq attrib-name (compute-tag coll *package*
1182 (iostruct-ns-to-package tokenbuf)))
1184 (let ((name (symbol-name attrib-name)))
1185 (when (and (>= (length name) 5)
1186 (string= name "xmlns" :end1 5))
1187 (if* (= (length name) 5)
1189 (setf ns-token :none)
1191 (setf ns-token (subseq name 6)))))
1192 (setf state state-readtag12)
1193 else (let ((tmp (compute-coll-string coll)))
1196 (add-to-coll coll ch)
1197 (setq ch (get-next-char tokenbuf))
1201 (concatenate 'string
1202 "looking for attribute '=', found: '"
1203 (compute-coll-string coll)
1204 "' following attribute name: " tmp)))
1208 (if* (xml-space-p ch) then nil
1209 elseif (eq #\= ch) then (setf state state-readtag5)
1212 (add-to-coll coll ch)
1213 (setq ch (get-next-char tokenbuf))
1217 (concatenate 'string
1218 "looking for attribute '=', found: '"
1219 (compute-coll-string coll)
1220 "' following attribute name: " (string attrib-name)))))
1223 ;; begin to collect attribute value
1224 (if* (or (eq ch #\")
1226 then (setq value-delim ch)
1227 (let* ((tag-defaults (assoc tag-to-return attlist-data))
1228 (this-attrib (assoc attrib-name tag-defaults)))
1229 (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
1232 (setq state state-readtag6)
1233 elseif (xml-space-p ch) then nil
1236 (add-to-coll coll ch)
1237 (setq ch (get-next-char tokenbuf))
1241 (concatenate 'string
1242 "attribute value not delimited by ' or \" : '"
1243 (compute-coll-string coll)
1244 "' following attribute: " (string attrib-name)))
1248 (let ((from-entity (and attrib-value-tokenbuf
1249 (eq attrib-value-tokenbuf
1250 (first (iostruct-entity-bufs tokenbuf))))))
1251 (when (not from-entity) (setf attrib-value-tokenbuf nil))
1252 (if* from-entity then
1253 (if* (eq #\newline ch) then (setf ch #\space)
1254 elseif (eq #\return ch) then (setf ch #\space)
1255 elseif (eq #\tab ch) then (setf ch #\space)
1257 (if* (and (not from-entity) (eq ch value-delim))
1258 then (setq attrib-value (compute-coll-string coll))
1260 (setf attrib-value (normalize-attrib-value attrib-value)))
1262 (push attrib-name attribs-to-return)
1263 (push attrib-value attribs-to-return)
1265 (let ((package (assoc (parse-uri attrib-value)
1266 (iostruct-uri-to-package tokenbuf)
1268 (if* package then (setf package (rest package))
1271 (let ((i 0) new-package)
1273 (let* ((candidate (concatenate 'string
1274 "net.xml.namespace."
1275 (format nil "~s" i)))
1276 (exists (find-package candidate)))
1279 else (setf new-package (make-package candidate))
1280 (setf (iostruct-uri-to-package tokenbuf)
1281 (acons (parse-uri attrib-value) new-package
1282 (iostruct-uri-to-package tokenbuf)))
1283 (return new-package)))))))
1284 (setf (iostruct-ns-to-package tokenbuf)
1285 (acons ns-token package (iostruct-ns-to-package tokenbuf)))
1287 (if* (and (first (iostruct-ns-scope tokenbuf))
1288 (string= (first (first (iostruct-ns-scope tokenbuf)))
1289 tag-to-return-string))
1291 (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
1293 (push (list tag-to-return-string (list ns-token))
1294 (iostruct-ns-scope tokenbuf)))
1295 (setf ns-token nil))
1296 (setq state state-readtag6a)
1297 elseif (eq #\newline ch) then
1298 (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
1299 elseif (or (eq #\tab ch) (eq #\return ch)) then
1300 (add-to-coll coll #\space)
1302 then (setq state state-readtag7)
1303 (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
1304 elseif (and (xml-char-p ch) (not (eq #\< ch)))
1305 then (add-to-coll coll ch)
1308 (add-to-coll coll ch)
1309 (setq ch (get-next-char tokenbuf))
1313 (concatenate 'string
1314 "attribute value cannot contain '<': '"
1315 (compute-coll-string coll)
1316 "' following attribute: " (string attrib-name)))
1321 (if* (xml-space-p ch) then (setf state state-readtag2)
1322 elseif (eq #\> ch) then (setf state state-readtag2)
1324 elseif (eq #\/ ch) then (setf state state-readtag3)
1325 else (clear-coll coll)
1327 (add-to-coll coll ch)
1328 (setq ch (get-next-char tokenbuf))
1332 (concatenate 'string
1333 "illegal token, starting at '"
1334 (compute-coll-string coll)
1335 "' following element token start: " (string tag-to-return)))
1340 then (setf state state-readtag8)
1341 elseif (xml-name-start-char-p ch)
1342 then (setf state state-readtag9)
1344 else (clear-coll coll)
1346 (add-to-coll coll ch)
1347 (setq ch (get-next-char tokenbuf))
1351 (concatenate 'string
1352 "attribute value contains illegal reference name: '&"
1353 (compute-coll-string coll)
1354 "' in attribute value for: " (string attrib-name)))
1359 then (setf state state-readtag10)
1360 elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
1361 then (setf state state-readtag11)
1363 else (clear-coll coll)
1365 (add-to-coll coll ch)
1366 (setq ch (get-next-char tokenbuf))
1370 (concatenate 'string
1371 "attribute value contains illegal character reference code: '"
1372 (compute-coll-string coll)
1373 "' in attribute value for: " (string attrib-name)))
1377 (let ((code (char-code ch)))
1379 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1381 (concatenate 'string
1382 (string (code-char char-code))
1383 " reference cannot be constructed from entity reference/character data sequence"))
1385 (setf entity-source nil))
1386 (add-to-coll coll (code-char char-code))
1388 (setq state state-readtag6)
1389 elseif (<= (char-code #\0) code (char-code #\9))
1390 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
1391 elseif (<= (char-code #\A) code (char-code #\F))
1392 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
1393 elseif (<= (char-code #\a) code (char-code #\f))
1394 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
1395 else (clear-coll coll)
1397 (add-to-coll coll ch)
1398 (setq ch (get-next-char tokenbuf))
1402 (concatenate 'string
1403 "attribute value contains illegal hexidecimal character reference code: '"
1404 (compute-coll-string coll)
1405 "' in attribute value for: " (string attrib-name)))
1409 (let ((code (char-code ch)))
1411 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1413 (concatenate 'string
1414 (string (code-char char-code))
1415 " reference cannot be constructed from entity reference/character data sequence"))
1417 (setf entity-source nil))
1418 (add-to-coll coll (code-char char-code))
1420 (setq state state-readtag6)
1421 elseif (<= (char-code #\0) code (char-code #\9))
1422 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
1423 else (clear-coll coll)
1425 (add-to-coll coll ch)
1426 (setq ch (get-next-char tokenbuf))
1430 (concatenate 'string
1431 "attribute value contains illegal decimal character reference code: '"
1432 (compute-coll-string coll)
1433 "' in attribute value for: " (string attrib-name)))
1437 (if* (xml-name-char-p ch)
1438 then (add-to-coll entity ch)
1440 then (let ((entity-symbol (compute-tag entity)))
1442 (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1444 (concatenate 'string
1445 (string entity-symbol)
1446 " reference cannot be constructed from entity reference/character data sequence"))
1448 (setf entity-source nil))
1449 (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
1450 elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
1451 elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
1452 elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
1453 elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
1455 (if* (and (iostruct-do-entity tokenbuf)
1457 (assoc entity-symbol
1458 (iostruct-general-entities tokenbuf)))) then
1459 (setf p-value (rest p-value))
1460 (when (member entity-symbol (iostruct-entity-names tokenbuf))
1461 (xml-error (concatenate 'string
1463 (string entity-symbol)
1464 " in recursive reference")))
1465 (push entity-symbol (iostruct-entity-names tokenbuf))
1466 (if* (stringp p-value) then
1467 (add-to-entity-buf entity-symbol p-value)
1468 (when (not attrib-value-tokenbuf)
1469 (setf attrib-value-tokenbuf
1470 (first (iostruct-entity-bufs tokenbuf))))
1471 elseif (null external-callback) then
1472 (setf (iostruct-do-entity tokenbuf) nil)
1474 (let ((entity-stream (apply external-callback p-value)))
1475 (if* entity-stream then
1476 (let ((entity-buf (get-tokenbuf)))
1477 (setf (tokenbuf-stream entity-buf) entity-stream)
1478 (unicode-check entity-stream tokenbuf)
1480 (iostruct-entity-bufs tokenbuf))
1481 ;; check for possible external textdecl
1484 (if* (dotimes (i (length string) t)
1485 (setf cch (get-next-char tokenbuf))
1490 (schar string count)))
1495 (when (< count 0) (return))
1496 (un-next-char (schar string count))
1498 ;; swallow <?xml token
1506 (when (< count 0) (return))
1507 (un-next-char (schar string count))
1511 (xml-error (concatenate 'string
1512 "Reference to unparsed entity "
1513 (string entity-symbol)))
1516 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
1517 (and (iostruct-seen-any-dtd tokenbuf)
1518 (not (iostruct-seen-external-dtd tokenbuf))
1519 (not (iostruct-seen-parameter-reference tokenbuf))))
1521 (xml-error (concatenate 'string
1522 (string entity-symbol)
1523 " must have entity declaration before being referenced"))
1526 (setq state state-readtag6)
1527 else (dotimes (i 15)
1528 (add-to-coll coll ch)
1529 (setq ch (get-next-char tokenbuf))
1533 (concatenate 'string
1534 "attribute value contains illegal reference name: '&"
1535 (compute-coll-string coll)
1536 "' in attribute value for: " (string attrib-name)))
1540 (if* (eq #\> ch) then (return)
1541 else (clear-coll coll)
1543 (add-to-coll coll ch)
1544 (setq ch (get-next-char tokenbuf))
1548 (concatenate 'string
1549 "expected '>' found '"
1550 (compute-coll-string coll)
1551 "' in element: " (string tag-to-return)))
1554 (#.state-readtag-!-name
1555 (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1557 (add-to-coll coll ch)
1559 (when (not (xml-space-p ch))
1560 (xml-error (concatenate 'string
1561 "expecting whitespace following: '<!"
1562 (compute-coll-string coll)
1563 "' ; got: '" (string ch) "'")))
1564 (setq tag-to-return (compute-tag coll))
1566 (setf state state-pre-!-contents)))
1569 (if* (xml-name-char-p ch)
1571 (add-to-coll coll ch)
1573 (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
1574 (xml-error (concatenate 'string
1575 "expecting name following: '<?"
1576 (compute-coll-string coll)
1577 "' ; got: '" (string ch) "'"))
1579 (when (= (collector-next coll) 0)
1580 (xml-error "null <? token"))
1581 (if* (and (= (collector-next coll) 3)
1582 (eq (elt (collector-data coll) 0) #\x)
1583 (eq (elt (collector-data coll) 1) #\m)
1584 (eq (elt (collector-data coll) 2) #\l)
1587 (when (eq #\? ch) (xml-error "null <?xml token"))
1588 (setq tag-to-return :xml)
1589 (setf state state-findattributename)
1590 elseif (and (= (collector-next coll) 3)
1591 (or (eq (elt (collector-data coll) 0) #\x)
1592 (eq (elt (collector-data coll) 0) #\X))
1593 (or (eq (elt (collector-data coll) 1) #\m)
1594 (eq (elt (collector-data coll) 1) #\M))
1595 (or (eq (elt (collector-data coll) 2) #\l)
1596 (eq (elt (collector-data coll) 2) #\L))
1598 (xml-error "<?xml tag must be all lower case")
1600 (setq tag-to-return (compute-tag coll))
1601 (when (eq #\? ch) (un-next-char ch))
1602 (setf state state-prereadpi))
1605 (#.state-pre-!-contents
1606 (if* (xml-space-p ch)
1608 elseif (not (xml-char-p ch))
1609 then (xml-error (concatenate 'string ;; no test for this...
1610 "illegal character '"
1612 " following <!" (string tag-to-return)))
1615 else (un-next-char ch)
1616 (setf state state-!-contents)))
1620 (let ((val (parse-dtd tokenbuf nil external-callback)))
1621 (setf (iostruct-seen-any-dtd tokenbuf) t)
1622 (push (append (list :[) val)
1623 contents-to-return))
1624 (setf state state-!-doctype-ext3))
1627 (if* (xml-name-char-p ch)
1628 then (add-to-coll coll ch)
1630 then (push (compute-coll-string coll) contents-to-return)
1634 then (push (compute-tag coll) contents-to-return)
1636 (setf state state-begin-dtd)
1637 elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
1638 ;; look at tag-to-return and set state accordingly
1639 then (push (compute-tag coll) contents-to-return)
1641 (setf state state-!-doctype)
1643 (concatenate 'string
1645 (string tag-to-return)
1649 (#.state-!-doctype-ext
1650 (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1652 (add-to-coll coll ch)
1654 (when (not (xml-space-p ch))
1656 (add-to-coll coll ch)
1657 (setq ch (get-next-char tokenbuf))
1661 (concatenate 'string
1662 "illegal character in '"
1663 (compute-coll-string coll)
1664 "' in <! tag: " (string tag-to-return) " "
1665 (string (first contents-to-return))
1668 (let ((token (compute-tag coll)))
1669 (push token contents-to-return)
1671 (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
1672 elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
1674 (concatenate 'string
1675 "expected 'SYSTEM' or 'PUBLIC' got '"
1676 (string (first contents-to-return))
1677 "' in <! tag: " (string tag-to-return) " "
1678 (string (second contents-to-return))))
1682 (#.state-!-doctype-public
1683 (if* (xml-space-p ch) then nil
1684 elseif (eq #\" ch) then (setf state state-!-doctype-public2)
1685 elseif (eq #\' ch) then (setf state state-!-doctype-public3)
1687 (concatenate 'string
1688 "expected quote or double-quote got: '"
1690 "' in <! tag: " (string tag-to-return) " "
1691 (string (second contents-to-return)) " "
1692 (string (first contents-to-return))
1696 (#.state-!-doctype-system
1697 (if* (xml-space-p ch) then nil
1698 elseif (eq #\" ch) then (setf state state-!-doctype-system2)
1699 elseif (eq #\' ch) then (setf state state-!-doctype-system3)
1701 (concatenate 'string
1702 "expected quote or double-quote got: '"
1704 "' in <! tag: " (string tag-to-return) " "
1705 (string (second contents-to-return)) " "
1706 (string (first contents-to-return))
1710 (#.state-!-doctype-public2
1711 (if* (eq #\" ch) then (push (compute-coll-string coll)
1714 (setf state state-!-doctype-system)
1715 elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1716 else (dotimes (i 15)
1717 (add-to-coll coll ch)
1718 (setq ch (get-next-char tokenbuf))
1722 (concatenate 'string
1723 "illegal character in DOCTYPE PUBLIC string: '"
1724 (compute-coll-string coll) "'"))
1727 (#.state-!-doctype-public3
1728 (if* (eq #\' ch) then (push (compute-coll-string coll)
1731 (setf state state-!-doctype-system)
1732 elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1733 else (dotimes (i 15)
1734 (add-to-coll coll ch)
1735 (setq ch (get-next-char tokenbuf))
1739 (concatenate 'string
1740 "illegal character in DOCTYPE PUBLIC string: '"
1741 (compute-coll-string coll) "'"))
1744 (#.state-!-doctype-system2
1745 (when (not (xml-char-p ch))
1746 (xml-error "XML is not well formed")) ;; not tested
1747 (if* (eq #\" ch) then (push (compute-coll-string coll)
1750 (setf state state-!-doctype-ext2)
1751 else (add-to-coll coll ch)))
1753 (#.state-!-doctype-system3
1754 (when (not (xml-char-p ch))
1755 (xml-error "XML is not well formed")) ;; not tested
1756 (if* (eq #\' ch) then (push (compute-coll-string coll)
1759 (setf state state-!-doctype-ext2)
1760 else (add-to-coll coll ch)))
1762 (#.state-!-doctype-ext2
1763 (if* (xml-space-p ch) then nil
1764 elseif (eq #\> ch) then (return)
1766 then (setf state state-begin-dtd)
1769 (add-to-coll coll ch)
1770 (setq ch (get-next-char tokenbuf))
1774 (concatenate 'string
1775 "illegal char in DOCTYPE token: '"
1776 (compute-coll-string coll) "'"))
1779 (#.state-!-doctype-ext3
1780 (if* (xml-space-p ch) then nil
1781 elseif (eq #\> ch) then (return)
1784 (add-to-coll coll ch)
1785 (setq ch (get-next-char tokenbuf))
1789 (concatenate 'string
1790 "illegal char in DOCTYPE token following dtd: '"
1791 (compute-coll-string coll) "'"))
1795 ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
1796 (if* (xml-space-p ch) then nil
1797 elseif (xml-name-start-char-p ch)
1799 (setf state state-!-doctype-ext)
1801 elseif (eq #\> ch) then (return)
1803 then (setf state state-begin-dtd)
1805 (concatenate 'string
1806 "illegal character: '"
1808 "' in <! tag: " (string tag-to-return) " "
1809 (string (first contents-to-return))))
1813 (if* (xml-space-p ch)
1815 elseif (not (xml-char-p ch))
1816 then (xml-error "XML is not well formed") ;; no test
1817 else (un-next-char ch)
1818 (setf state state-readpi)))
1822 then (setf state state-readpi2)
1823 elseif (not (xml-char-p ch))
1824 then (xml-error "XML is not well formed") ;; no test
1825 else (add-to-coll coll ch)))
1830 elseif (eq #\? ch) then
1831 (add-to-coll coll #\?) ;; come back here to try again
1832 else (setf state state-readpi)
1833 (add-to-coll coll #\?)
1834 (add-to-coll coll ch)))
1836 (#.state-findattributename0
1837 (if* (xml-space-p ch) then (setf state state-findattributename)
1838 elseif (eq ch empty-delim)
1839 then (setf state state-noattributename)
1842 (add-to-coll coll ch)
1843 (setq ch (get-next-char tokenbuf))
1847 (concatenate 'string
1848 "expected space or tag end before: '"
1849 (compute-coll-string coll) "'"))))
1850 (#.state-findattributename
1851 ;; search until we find the start of an attribute name
1852 ;; or the end of the tag
1853 (if* (eq ch empty-delim)
1854 then (setf state state-noattributename)
1855 elseif (xml-space-p ch)
1856 then nil ;; skip whitespace
1857 elseif (xml-name-start-char-p ch)
1860 (setf state state-attribname)
1863 (add-to-coll coll ch)
1864 (setq ch (get-next-char tokenbuf))
1868 (concatenate 'string
1869 "illegal char in <?xml token: '"
1870 (compute-coll-string coll) "'"))
1874 ;; collect attribute name
1875 (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1877 (add-to-coll coll ch)
1878 elseif (xml-space-p ch) then
1879 (setq attrib-name (compute-tag coll))
1881 (setq state state-attribname2)
1883 (when (not (eq #\= ch))
1885 (add-to-coll coll ch)
1886 (setq ch (get-next-char tokenbuf))
1890 (concatenate 'string
1891 "illegal char in <?xml attribute token: '"
1892 (compute-coll-string coll) "'"))
1894 (setq attrib-name (compute-tag coll))
1896 (setq state state-attribstartvalue)))
1898 (#.state-attribname2
1899 (if* (eq #\= ch) then (setq state state-attribstartvalue)
1900 elseif (xml-space-p ch) then nil
1904 (add-to-coll coll ch)
1905 (setq ch (get-next-char tokenbuf))
1909 (concatenate 'string
1910 "illegal char in <?xml attribute token: '"
1911 (compute-coll-string coll) "'"))))
1912 (#.state-attribstartvalue
1913 ;; begin to collect value
1914 (if* (or (eq ch #\")
1916 then (setq value-delim ch)
1917 (setq state state-attribvaluedelim)
1918 elseif (xml-space-p ch) then nil
1921 (add-to-coll coll ch)
1922 (setq ch (get-next-char tokenbuf))
1926 (concatenate 'string
1927 "expected ' or \" before <?xml attribute token value: '"
1928 (compute-coll-string coll) "'"))
1931 (#.state-attribvaluedelim
1932 (if* (eq ch value-delim)
1933 then (setq attrib-value (compute-coll-string coll))
1935 (push attrib-name attribs-to-return)
1936 (push attrib-value attribs-to-return)
1937 (setq state state-findattributename0)
1938 elseif (and (xml-char-p ch) (not (eq #\< ch)))
1939 then (add-to-coll coll ch)
1942 (add-to-coll coll ch)
1943 (setq ch (get-next-char tokenbuf))
1947 (concatenate 'string
1948 "illegal character in attribute token value: '"
1949 (compute-coll-string coll) "'"))
1952 (#.state-noattributename
1955 (return) ;; ready to build return token
1958 (concatenate 'string
1959 "expected '>' found: '" (string ch) "' in <?xml token"))
1963 (error "need to support state:~s" state))
1965 (put-back-collector entity)
1967 (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
1968 (put-back-collector coll)
1969 (if* attribs-to-return
1970 then (values (cons tag-to-return
1971 (nreverse attribs-to-return))
1972 (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
1974 (values tag-to-return :start-tag :end-tag)
1976 (#.state-readtag-end-bracket
1977 ;; this is a :commant tag
1978 (let ((ret (compute-coll-string coll)))
1979 (put-back-collector coll)
1980 (values (cons tag-to-return (list ret)) :comment :nil)))
1982 (let ((next-char (collector-next coll)))
1983 (put-back-collector coll)
1984 (if* (zerop next-char)
1985 then (values nil :eof nil)
1986 else (values (compute-coll-string coll) :pcdata pcdatap))))
1988 (let ((ret (compute-coll-string coll)))
1989 (put-back-collector coll)
1990 (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
1991 ((#.state-readtag-!-conditional)
1992 (put-back-collector coll)
1993 (values (append (list tag-to-return) contents-to-return) :start-tag
1995 ((#.state-!-contents
1997 #.state-!-doctype-ext2
1998 #.state-!-doctype-ext3)
1999 (put-back-collector coll)
2000 (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
2003 (put-back-collector coll)
2004 (values (if* attribs-to-return
2005 then (cons tag-to-return
2006 (nreverse attribs-to-return))
2007 else tag-to-return) :start-tag :end-tag))
2010 (put-back-collector coll)
2011 (values (if* attribs-to-return
2012 then (cons tag-to-return
2013 (nreverse attribs-to-return))
2014 else tag-to-return) :start-tag nil))
2015 ((#.state-readtag-end2
2016 #.state-readtag-end3)
2017 (put-back-collector coll)
2018 (values tag-to-return :end-tag nil))
2019 (#.state-readtag-!-conditional7
2020 (let ((ret (compute-coll-string coll)))
2021 (put-back-collector coll)
2022 (values (append (list :cdata) (list ret)) :cdata nil)))
2024 ;; if ch is null that means we encountered unexpected EOF
2026 (put-back-collector coll)
2027 (xml-error "unexpected end of input"))
2028 (print (list tag-to-return attribs-to-return))
2029 (let ((ret (compute-coll-string coll)))
2030 (put-back-collector coll)
2031 (error "need to support state <post>:~s ~s ~s ~s" state
2037 (defun swallow-xml-token (tokenbuf external-callback)
2038 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
2039 (let ((xml (next-token tokenbuf external-callback nil)))
2040 (if* (and (eq (fourth xml) :standalone) (stringp (fifth xml))
2041 (equal (fifth xml) "yes")) then
2042 (xml-error "external XML entity cannot be standalone document")
2043 elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml))
2044 (equal (seventh xml) "yes")) then
2045 (xml-error "external XML entity cannot be standalone document"))))
2047 ;; return the string with entity references replaced by text
2048 ;; normalizing will happen later
2049 ;; we're ok on different types - just ignore IMPLIED & REQUIRED; and possibly skip FIXED
2050 (defun parse-default-value (value-list tokenbuf external-callback)
2051 (declare (optimize (speed 3) (safety 1)))
2053 (if* (stringp (first value-list)) then (setf value-string (first value-list))
2054 elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list)))
2055 (let ((tmp-result (parse-xml
2056 (concatenate 'string
2060 :external-callback external-callback
2062 (iostruct-general-entities tokenbuf))))
2063 (if* (stringp (first value-list)) then
2064 (setf (first value-list)
2065 (third (first (first tmp-result))))
2066 elseif (eq (first value-list) :FIXED) then
2067 (setf (second value-list)
2068 (third (first (first tmp-result)))))))
2071 (defun process-attlist (args attlist-data)
2072 (declare (optimize (speed 3) (safety 1)))
2073 (dolist (arg1 args attlist-data)
2074 ;;(format t "arg1: ~s~%" arg1)
2075 (dolist (item (rest arg1))
2076 ;;(format t "item: ~s~%" item)
2077 (when (eq :ATTLIST (first item))
2078 (let* ((name (second item))
2079 (name-data (assoc name attlist-data))
2080 (new-name-data (rest name-data)))
2081 ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
2082 (dolist (attrib-data (rest (rest item)))
2083 ;;(format t "attrib-data: ~s~%" attrib-data)
2085 (setf (rest (rest attrib-data))
2086 (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
2087 (when (not (assoc (first attrib-data) new-name-data))
2088 (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
2090 (rplacd (assoc name attlist-data) (nreverse new-name-data))
2091 else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))