r11859: Canonicalize whitespace
[xmlutils.git] / pxml2.cl
1 ;;
2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
3 ;;
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
9 ;;
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose.  See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA  02111-1307  USA
21 ;;
22 ;; Change Log
23 ;;
24 ;; 10/14/00 add namespace support
25
26 (in-package :net.xml.parser)
27
28 (pxml-dribble-bug-hook "$Id$")
29
30 ;; state titles can be better chosen and explained
31
32 (defvar *debug-xml* nil)
33
34 (defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities
35                                         content-only uri-to-package)
36   (declare (optimize (speed 3) (safety 1)))
37   (parse-xml (make-string-input-stream str) :external-callback external-callback
38              :general-entities general-entities
39              :parameter-entities parameter-entities :content-only content-only
40              :uri-to-package uri-to-package))
41
42 (defmethod parse-xml ((p stream) &key external-callback general-entities
43                                       parameter-entities content-only uri-to-package)
44   (declare (optimize (speed 3) (safety 1)))
45   (pxml-internal0 p nil external-callback general-entities parameter-entities content-only
46                   uri-to-package))
47
48 (eval-when (compile load eval)
49   (defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element
50   (defconstant state-docstart-misc 1) ;; looking for Misc, doctypedecl, 1st element
51   (defconstant state-docstart-misc2 2) ;; looking for Misc, 1st element
52   (defconstant state-element-done 3) ;; looking for Misc
53   (defconstant state-element-contents 4) ;; looking for element content
54   )
55
56 (defun all-xml-whitespace-p (val)
57   (dotimes (i (length val) t)
58     (when (not (xml-space-p (elt val i))) (return nil))))
59
60 (defun pxml-internal0 (p read-sequence-func external-callback
61                       general-entities parameter-entities content-only uri-to-package)
62   (declare (optimize (speed 3) (safety 1)))
63   (let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf)
64                                  :do-entity t
65                                  :read-sequence-func read-sequence-func)))
66     ;; set up stream right
67     (setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p)
68     ;; set up user specified entities
69     (setf (iostruct-parameter-entities tokenbuf) parameter-entities)
70     (setf (iostruct-general-entities tokenbuf) general-entities)
71     (setf (iostruct-uri-to-package tokenbuf) uri-to-package)
72     ;; look for Unicode file
73     (unicode-check p tokenbuf)
74     (unwind-protect
75         (values (pxml-internal tokenbuf external-callback content-only)
76                 (iostruct-uri-to-package tokenbuf))
77       (dolist (entity-buf (iostruct-entity-bufs tokenbuf))
78         (when (streamp (tokenbuf-stream entity-buf))
79           (close (tokenbuf-stream entity-buf))
80           (put-back-tokenbuf entity-buf))))
81     ))
82
83 (defun pxml-internal (tokenbuf external-callback content-only)
84   (declare (optimize (speed 3) (safety 1)))
85   (let ((state state-docstart)
86         (guts)
87         (pending)
88         (attlist-data)
89         (public-string)
90         (system-string)
91         (entity-open-tags)
92         )
93
94     (loop
95       (multiple-value-bind (val kind kind2)
96           (next-token tokenbuf external-callback attlist-data)
97         (when *debug-xml*
98           (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
99         (case state
100           (#.state-docstart
101            (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
102               then
103                    (check-xmldecl val tokenbuf)
104                    (when (not content-only) (push val guts))
105                    (setf state state-docstart-misc)
106             elseif (eq kind :comment)
107               then
108                    (when (not content-only) (push val guts))
109                    (setf state state-docstart-misc)
110             elseif (and (listp val) (eq :DOCTYPE (first val)))
111               then
112                    (if* (eq (third val) :SYSTEM) then
113                            (setf system-string (fourth val))
114                            (setf val (remove (third val) val))
115                            (setf val (remove (third val) val))
116                     elseif (eq (third val) :PUBLIC) then
117                            (setf public-string (normalize-public-value (fourth val)))
118                            (setf system-string (fifth val))
119                            (setf val (remove (third val) val))
120                            (setf val (remove (third val) val))
121                            (setf val (remove (third val) val)))
122                    (when system-string
123                      (if* external-callback then
124                              (let ((ext-stream (apply external-callback
125                                                       (list (parse-uri system-string)
126                                                             :DOCTYPE
127                                                             public-string
128                                                             ))))
129                                (when ext-stream
130                                  (let (ext-io (entity-buf (get-tokenbuf)))
131                                    (setf (tokenbuf-stream entity-buf) ext-stream)
132                                    (setf ext-io (make-iostruct :tokenbuf entity-buf
133                                                                :do-entity
134                                                                (iostruct-do-entity tokenbuf)
135                                                                :read-sequence-func
136                                                                (iostruct-read-sequence-func tokenbuf)))
137                                    (unicode-check ext-stream ext-io)
138                                    (setf (iostruct-parameter-entities ext-io)
139                                      (iostruct-parameter-entities tokenbuf))
140                                    (setf (iostruct-general-entities ext-io)
141                                      (iostruct-general-entities tokenbuf))
142                                    (unwind-protect
143                                        (setf val (append val
144                                                          (list (append
145                                                                 (list :external)
146                                                                 (parse-dtd
147                                                                  ext-io
148                                                                  t external-callback)))))
149                                      (setf (iostruct-seen-any-dtd tokenbuf) t)
150                                      (setf (iostruct-seen-external-dtd tokenbuf) t)
151                                      (setf (iostruct-seen-parameter-reference tokenbuf)
152                                        (iostruct-seen-parameter-reference ext-io))
153                                      (setf (iostruct-general-entities tokenbuf)
154                                        (iostruct-general-entities ext-io))
155                                      (setf (iostruct-parameter-entities tokenbuf)
156                                        (iostruct-parameter-entities ext-io))
157                                      (setf (iostruct-do-entity tokenbuf)
158                                        (iostruct-do-entity ext-io))
159                                      (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
160                                        (when (streamp (tokenbuf-stream entity-buf2))
161                                          (close (tokenbuf-stream entity-buf2))
162                                          (put-back-tokenbuf entity-buf2)))
163                                      (close (tokenbuf-stream entity-buf))
164                                      (put-back-tokenbuf entity-buf))
165                                    )))
166                         else
167                              (setf (iostruct-do-entity tokenbuf) nil)))
168                    (setf attlist-data
169                      (process-attlist (rest (rest val)) attlist-data))
170                    (when (not content-only) (push val guts))
171                    (setf state state-docstart-misc2)
172             elseif (eq kind :pi)
173               then
174                    (push val guts)
175                    (setf state state-docstart-misc)
176             elseif (eq kind :pcdata)
177               then
178                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
179                      (if* (not kind2) then
180                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
181                         else
182                              (xml-error (concatenate 'string
183                                           "unrecognized content '"
184                                           (subseq val 0 (min (length val) 40)) "'"))))
185                    (setf state state-docstart-misc)
186             elseif (or (symbolp val)
187                        (and (listp val) (symbolp (first val))))
188               then
189                    (when (eq kind :start-tag)
190                      (setf val (add-default-values val attlist-data)))
191                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
192                       then (push (list val) guts)
193                            (setf state state-element-done)
194                     elseif (eq kind :start-tag)
195                       then (push (list val) pending)
196                            ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
197                            (when (iostruct-entity-bufs tokenbuf)
198                              (push (if (symbolp val) val (first val)) entity-open-tags))
199                            (setf state state-element-contents)
200                       else (xml-error (concatenate 'string
201                                                    "encountered token at illegal syntax position: '"
202                                                    (string kind) "'"
203                                                    (if* (null guts) then
204                                                            " at start of contents"
205                                                       else
206                                                            (concatenate 'string
207                                                              " following: '"
208                                                              (format nil "~s" (first guts))
209                                                              "'")))))
210               else
211                    (print (list val kind kind2))
212                    (break "need to check for other allowable docstarts")))
213           (#.state-docstart-misc2
214            (if* (eq kind :pcdata)
215               then
216                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
217                      (if* (not kind2) then
218                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
219                         else
220                              (xml-error (concatenate 'string
221                                           "unrecognized content '"
222                                           (subseq val 0 (min (length val) 40)) "'"))))
223             elseif (and (listp val) (eq :comment (first val)))
224               then
225                    (when (not content-only) (push val guts))
226             elseif (eq kind :pi)
227               then
228                    (push val guts)
229             elseif (eq kind :eof)
230               then
231                    (xml-error "unexpected end of file encountered")
232             elseif (or (symbolp val)
233                        (and (listp val) (symbolp (first val))))
234               then
235                    (when (eq kind :start-tag)
236                      (setf val (add-default-values val attlist-data)))
237                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
238                       then (push (list val) guts)
239                            (setf state state-element-done)
240                     elseif (eq kind :start-tag)
241                       then (push (list val) pending)
242                            ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
243                            (when (iostruct-entity-bufs tokenbuf)
244                              (push (if (symbolp val) val (first val)) entity-open-tags))
245                            (setf state state-element-contents)
246                       else (xml-error (concatenate 'string
247                                         "encountered token at illegal syntax position: '"
248                                         (string kind) "'"
249                                         (if* (null guts) then
250                                                 " at start of contents"
251                                            else
252                                                 (concatenate 'string
253                                                   " following: '"
254                                                   (format nil "~s" (first guts))
255                                                   "'")))))
256               else
257                    (error "this branch unexpected <1>")))
258           (#.state-docstart-misc
259            (if* (eq kind :pcdata)
260               then
261                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
262                      (if* (not kind2) then
263                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
264                         else
265                              (xml-error (concatenate 'string
266                                           "unrecognized content '"
267                                           (subseq val 0 (min (length val) 40)) "'"))))
268             elseif (and (listp val) (eq :DOCTYPE (first val)))
269               then
270                    (if* (eq (third val) :SYSTEM) then
271                            (setf system-string (fourth val))
272                            (setf val (remove (third val) val))
273                            (setf val (remove (third val) val))
274                     elseif (eq (third val) :PUBLIC) then
275                            (setf public-string (normalize-public-value (fourth val)))
276                            (setf system-string (fifth val))
277                            (setf val (remove (third val) val))
278                            (setf val (remove (third val) val))
279                            (setf val (remove (third val) val)))
280                    (when system-string
281                      (if* external-callback then
282                              (let ((ext-stream (apply external-callback
283                                                       (list (parse-uri system-string)
284                                                             :DOCTYPE
285                                                             public-string
286                                                             ))))
287                                (when ext-stream
288                                  (let (ext-io (entity-buf (get-tokenbuf)))
289                                    (setf (tokenbuf-stream entity-buf) ext-stream)
290                                    (setf ext-io (make-iostruct :tokenbuf entity-buf
291                                                                :do-entity
292                                                                (iostruct-do-entity tokenbuf)
293                                                                :read-sequence-func
294                                                                (iostruct-read-sequence-func tokenbuf)))
295                                    (unicode-check ext-stream ext-io)
296                                    (setf (iostruct-parameter-entities ext-io)
297                                      (iostruct-parameter-entities tokenbuf))
298                                    (setf (iostruct-general-entities ext-io)
299                                      (iostruct-general-entities tokenbuf))
300                                    (unwind-protect
301                                        (setf val (append val
302                                                          (list (append
303                                                                 (list :external)
304                                                                 (parse-dtd
305                                                                  ext-io
306                                                                  t external-callback)))))
307                                      (setf (iostruct-seen-any-dtd tokenbuf) t)
308                                      (setf (iostruct-seen-external-dtd tokenbuf) t)
309                                      (setf (iostruct-seen-parameter-reference tokenbuf)
310                                        (iostruct-seen-parameter-reference ext-io))
311                                      (setf (iostruct-general-entities tokenbuf)
312                                        (iostruct-general-entities ext-io))
313                                      (setf (iostruct-parameter-entities tokenbuf)
314                                        (iostruct-parameter-entities ext-io))
315                                      (setf (iostruct-do-entity tokenbuf)
316                                        (iostruct-do-entity ext-io))
317                                      (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
318                                        (when (streamp (tokenbuf-stream entity-buf2))
319                                          (close (tokenbuf-stream entity-buf2))
320                                          (put-back-tokenbuf entity-buf2)))
321                                      (close (tokenbuf-stream entity-buf))
322                                      (put-back-tokenbuf entity-buf))
323                                    )))
324                         else
325                              (setf (iostruct-do-entity tokenbuf) nil)))
326                    (setf attlist-data
327                      (process-attlist (rest (rest val)) attlist-data))
328                    (when (not content-only) (push val guts))
329                    (setf state state-docstart-misc2)
330             elseif (and (listp val) (eq :comment (first val)))
331               then
332                    (when (not content-only) (push val guts))
333             elseif (eq kind :pi)
334               then
335                    (push val guts)
336             elseif (or (symbolp val)
337                        (and (listp val) (symbolp (first val))))
338               then
339                    (when (eq kind :start-tag)
340                      (setf val (add-default-values val attlist-data)))
341                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
342                       then (push (list val) guts)
343                            (setf state state-element-done)
344                     elseif (eq kind :start-tag)
345                       then (push (list val) pending)
346                            ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
347                            (when (iostruct-entity-bufs tokenbuf)
348                              (push (if (symbolp val) val (first val)) entity-open-tags))
349                            (setf state state-element-contents)
350                       else (xml-error (concatenate 'string
351                                         "encountered token at illegal syntax position: '"
352                                         (string kind) "'"
353                                         (concatenate 'string
354                                           " following: '"
355                                           (format nil "~s" (first guts))
356                                           "'"))))
357               else
358                    (print (list val kind kind2))
359                    (break "check for other docstart-misc states")))
360           (#.state-element-contents
361            (if* (or (symbolp val)
362                     (and (listp val) (symbolp (first val))))
363               then
364                    (when (eq kind :start-tag)
365                      (setf val (add-default-values val attlist-data)))
366                    (if* (eq kind :end-tag)
367                       then (let ((candidate (first (first pending))))
368                              (when (listp candidate) (setf candidate (first candidate)))
369                              (if* (eq candidate val)
370                                 then
371                                      (if* (iostruct-entity-bufs tokenbuf) then
372                                              (when (not (eq (first entity-open-tags) val))
373                                                (xml-error
374                                                 (concatenate 'string
375                                                   (string val)
376                                                   " element closed in entity that did not open it")))
377                                              (setf entity-open-tags (rest entity-open-tags))
378                                         else
379                                              (when (eq (first entity-open-tags) val)
380                                                (xml-error
381                                                 (concatenate 'string
382                                                   (string val)
383                                                   " element closed outside of entity that did not open it")))
384                                              )
385                                      (if* (= (length pending) 1)
386                                         then
387                                              (push (first pending) guts)
388                                              (setf state state-element-done)
389                                         else
390                                              (setf (second pending)
391                                                (append (second pending) (list (first pending)))))
392                                      (setf pending (rest pending))
393                                      ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
394                                 else (xml-error (format nil
395                                                         "encountered end tag: ~s expected: ~s"
396                                                         val candidate))))
397                     elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
398                       then
399                            (setf (first pending)
400                              (append (first pending) (list (list val))))
401                            ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
402                     elseif (eq kind :start-tag)
403                       then
404                            (push (list val) pending)
405                            ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
406                            (when (iostruct-entity-bufs tokenbuf)
407                              (push (if (symbolp val) val (first val)) entity-open-tags))
408                     elseif (eq kind :cdata) then
409                            (setf (first pending)
410                              (append (first pending) (rest val)))
411                            (let ((old (first pending))
412                                  (new))
413                              (dolist (item old)
414                                (if* (and (stringp (first new)) (stringp item)) then
415                                        (setf (first new)
416                                          (concatenate 'string (first new) item))
417                                   else (push item new)))
418                              (setf (first pending) (reverse new)))
419                     elseif (eq kind :comment) then
420                           (when (not content-only) (push val guts))
421                     elseif (eq kind :pi)
422                       then
423                            (setf (first pending)
424                              (append (first pending) (list val)))
425                     elseif (eq kind :eof)
426                       then
427                            (xml-error "unexpected end of file encountered")
428                       else (xml-error (format nil "unexpected token: ~s" val)))
429             elseif (eq kind :pcdata)
430               then
431                    (setf (first pending)
432                      (append (first pending) (list val)))
433                    (let ((old (first pending))
434                          (new))
435                      (dolist (item old)
436                        (if* (and (stringp (first new)) (stringp item)) then
437                                (setf (first new)
438                                  (concatenate 'string (first new) item))
439                           else (push item new)))
440                      (setf (first pending) (reverse new)))
441               else (xml-error (format nil "unexpected token: ~s" val))))
442           (#.state-element-done
443            (if* (eq kind :pcdata)
444               then
445                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
446                      (if* (not kind2) then
447                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
448                         else
449                              (xml-error (concatenate 'string
450                                           "unrecognized content '"
451                                           (subseq val 0 (min (length val) 40)) "'"))))
452             elseif (eq kind :eof) then
453                    (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
454                    (return (nreverse guts))
455             elseif (eq kind :comment) then
456                    (when (not content-only) (push val guts))
457             elseif (eq kind :pi)
458               then (push val guts)
459               else
460                    (xml-error (concatenate 'string
461                                 "encountered token at illegal syntax position: '"
462                                 (string kind) "'"
463                                 (concatenate 'string
464                                   " following: '"
465                                   (format nil "~s" (first guts))
466                                   "'")))
467                    ))
468           (t
469            (error "need to support state:~s token:~s  kind:~s kind2:~s <parse>" state val kind kind2)))
470         ))))
471
472 (eval-when (compile load eval)
473   (defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data
474   (defconstant state-readtagfirst 1) ;; seen < looking for /,?,!,name start
475   (defconstant state-readtag-? 2) ;; seen <? looking for space,char
476   (defconstant state-readtag-! 3) ;; seen <! looking for name,[,-
477   (defconstant state-readtag-end 4) ;; found </ looking for tag name
478   (defconstant state-readtag 5) ;; found < name start looking for more name, /, >
479   (defconstant state-findattributename 6) ;; found <?xml space looking for ?,>,space,name start
480   (defconstant state-readpi 7)
481   (defconstant state-noattributename 8)
482   (defconstant state-attribname 9) ;; found <?xml space name start looking for more name,=
483   (defconstant state-attribstartvalue 10) ;; found <?xml space name= looking for ',"
484   (defconstant state-attribvaluedelim 11)
485   (defconstant state-readtag-!-name 12) ;; seen <!name(start) looking for more name chars or space
486   (defconstant state-readtag-!-conditional 13) ;; found <![ looking for CDATA, INCLUDE, IGNORE
487   (defconstant state-readtag-!-comment 14)
488   (defconstant state-readtag-!-readcomment 15)
489   (defconstant state-readtag-!-readcomment2 16)
490   (defconstant state-readtag-end-bracket 17)
491   (defconstant state-readpi2 18) ;; found <?name space char looking for char,?
492   (defconstant state-prereadpi 19);; found <?name space looking for space,character
493   (defconstant state-pre-!-contents 20) ;; found <!name space looking for > or contents
494   (defconstant state-!-contents 21) ;; found <!name space name start looking for more name,>,[,space
495   (defconstant state-!-doctype 22) ;; found <!DOCTYPE space looking for space,>,[,name
496   (defconstant state-begin-dtd 23)
497   (defconstant state-!-doctype-ext 24) ;; found <!DOCTYPE space name space name start looking for name,space
498   (defconstant state-!-doctype-system 25) ;; found <!DOCTYPE name SYSTEM looking for ',"
499   (defconstant state-!-doctype-public 26) ;; found <!DOCTYPE name PUBLIC looking for ',"
500   (defconstant state-!-doctype-system2 27) ;; found <!DOCTYPE name SYSTEM " looking for chars,"
501   (defconstant state-!-doctype-system3 28) ;; found <!DOCTYPE name SYSTEM ' looking for chars,'
502   (defconstant state-!-doctype-ext2 29) ;; found <!DOCTYPE name SYSTEM/PUBLIC etc. looking for space,>,[
503   (defconstant state-!-doctype-ext3 30) ;; processed DTD looking for space,>
504   (defconstant state-!-doctype-public2 31) ;;  found <!DOCTYPE name PUBLIC " looking for text or "
505   (defconstant state-!-doctype-public3 32) ;;  found <!DOCTYPE name PUBLIC ' looking for text or '
506   (defconstant state-readtag2 33) ;; found <name looking for space,/,>,attrib name
507   (defconstant state-readtag3 34) ;; found <name/ or <name / looking for >
508   (defconstant state-readtag4 35) ;; found <name attrib-name start looking for more name,=
509   (defconstant state-readtag5 36) ;; found attrib= looking for ',"
510   (defconstant state-readtag6 37) ;; found attrib=['"] looking for end delimiter,value,reference
511   (defconstant state-readtag7 38) ;; found & inside attribute value, looking for # or name start
512   (defconstant state-readtag8 39) ;; found &# in attribute value looking for char code
513   (defconstant state-readtag9 40) ;; found &name start looking for more name,;
514   (defconstant state-readtag10 41) ;; found &#x in attribute value looking for hex code
515   (defconstant state-readtag11 42) ;; found &#[0-9] looking for more digits,;
516   (defconstant state-readtag-end2 43) ;; found </ & tag name start looking for more tag, space, >
517   (defconstant state-readtag-end3 44) ;; found </ end tag name space looking for >
518   (defconstant state-pcdata2 45) ;; seen & looking for name start
519   (defconstant state-pcdata3 46) ;; seen &# looking for character reference code
520   (defconstant state-pcdata4 47) ;; working on entity reference name looking for ;
521   (defconstant state-pcdata5 48) ;; working on hex character code reference
522   (defconstant state-pcdata6 49) ;; working on decimal character code reference
523   (defconstant state-findattributename0 50)
524   (defconstant state-readtag6a 51)
525   (defconstant state-readtag-!-conditional4 52)
526   (defconstant state-readtag-!-conditional5 53)
527   (defconstant state-readtag-!-conditional6 54)
528   (defconstant state-readtag-!-conditional7 55)
529   ;;(defconstant state-pcdata-parsed 56)
530   (defconstant state-pcdata7 57)
531   (defconstant state-pcdata8 58)
532   (defconstant state-readtag12 59)
533   (defconstant state-attribname2 60)
534   )
535
536 (defun next-token (tokenbuf external-callback attlist-data)
537   (declare (optimize (speed 3) (safety 1)))
538   ;; return two values:
539   ;;    the next token from the stream.
540   ;;    the kind of token
541   ;;
542   ;; if read-sequence-func is non-nil,
543   ;; read-sequence-func is called to fetch the next character
544   (macrolet ((add-to-entity-buf (entity-symbol p-value)
545                `(progn
546                   (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
547                         (iostruct-entity-bufs tokenbuf))))
548
549              (un-next-char (ch)
550                `(push ,ch (iostruct-unget-char tokenbuf)))
551
552              (clear-coll (coll)
553                `(setf (collector-next ,coll) 0))
554
555              (add-to-coll (coll ch)
556                `(let ((.next. (collector-next ,coll)))
557                   (if* (>= .next. (collector-max ,coll))
558                      then (grow-and-add ,coll ,ch)
559                      else (setf (schar (collector-data ,coll) .next.)
560                             ,ch)
561                           (setf (collector-next ,coll) (1+ .next.)))))
562
563              (to-preferred-case (ch)
564                ;; should check the case mode
565                `(char-downcase ,ch))
566
567              )
568
569     (let ((state state-pcdata)
570           (coll  (get-collector))
571           (entity  (get-collector))
572           (tag-to-return)
573           (tag-to-return-string)
574           (attrib-name)
575           (empty-delim)
576           (value-delim)
577           (attrib-value)
578           (attribs-to-return)
579           (contents-to-return)
580           (char-code 0)
581           (special-tag-count 0)
582           (attrib-value-tokenbuf)
583           (last-ch)
584           (cdatap t)
585           (pcdatap t)
586           (entity-source)
587           (ns-token)
588           (ch))
589
590       (loop
591
592         (setq ch (get-next-char tokenbuf))
593         (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
594                                   ch (char-code ch) state (iostruct-entity-names tokenbuf)))
595         (if* (null ch)
596            then (return) ; eof -- exit loop
597                 )
598
599
600         (case state
601           (#.state-pcdata
602           (if* (eq ch #\<)
603              then
604                   (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
605                   (if* (> (collector-next coll) 0)
606                      then               ; have collected something, return this string
607                           (un-next-char ch) ; push back the <
608                           (return)
609                       else ; collect a tag
610                           (setq state state-readtagfirst))
611            elseif (eq #\& ch)
612              then (setf state state-pcdata2)
613                   (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
614                   (setf pcdatap nil)
615            elseif (eq #\] ch) then (setf state state-pcdata7)
616            elseif (not (xml-char-p ch)) then
617                   (xml-error (concatenate 'string
618                                "Illegal character: "
619                                (string ch)
620                                " detected in input"))
621              else
622                   (add-to-coll coll ch)
623                   #+ignore
624                   (if* (not (eq ch #\return))
625                      then (add-to-coll coll ch))))
626
627           (#.state-pcdata7
628            (if* (eq #\] ch) then (setf state state-pcdata8)
629               else (setf state state-pcdata)
630                    (add-to-coll coll #\]) (un-next-char ch)))
631
632           (#.state-pcdata8
633            (if* (eq #\> ch) then
634                    (add-to-coll coll #\])
635                    (add-to-coll coll #\])
636                    (add-to-coll coll #\>)
637                    (dotimes (i 15)
638                      (add-to-coll coll ch)
639                      (setq ch (get-next-char tokenbuf))
640                      (if* (null ch)
641                         then (return)))
642                    (xml-error (concatenate 'string
643                                 "content cannot contain ']]>':'"
644                                 (compute-coll-string coll)
645                                 "'"))
646             elseif (eq #\] ch) then
647                    (add-to-coll coll #\])
648               else (setf state state-pcdata)
649                    (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
650
651           (#.state-pcdata2
652            (if* (eq #\# ch)
653               then (setf state state-pcdata3)
654             elseif (xml-name-start-char-p ch)
655               then (setf state state-pcdata4)
656                    (un-next-char ch)
657               else (clear-coll coll)
658                    (dotimes (i 15)
659                      (add-to-coll coll ch)
660                      (setq ch (get-next-char tokenbuf))
661                      (if* (null ch)
662                         then (return)))
663                    (xml-error (concatenate 'string
664                                 "illegal reference name, starting at: '&"
665                                 (compute-coll-string coll)
666                                 "'"))
667                    ))
668
669           (#.state-pcdata3
670            (if* (eq #\x ch)
671               then (setf state state-pcdata5)
672             elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
673               then (setf state state-pcdata6)
674                    (un-next-char ch)
675               else (clear-coll coll)
676                    (dotimes (i 15)
677                      (add-to-coll coll ch)
678                      (setq ch (get-next-char tokenbuf))
679                      (if* (null ch)
680                         then (return)))
681                    (xml-error (concatenate 'string
682                                 "illegal character reference code, starting at: '&#"
683                                 (compute-coll-string coll)
684                                 "'"))
685                    ))
686
687           (#.state-pcdata4
688            (if* (xml-name-char-p ch)
689               then (add-to-coll entity ch)
690             elseif (eq #\; ch)
691               then (let ((entity-symbol (compute-tag entity)))
692                      (clear-coll entity)
693                      (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
694                              (xml-error
695                               (concatenate 'string
696                                 (string entity-symbol)
697                                 " reference cannot be constructed from entity reference/character data sequence"))
698                         else
699                              (setf entity-source nil))
700                      (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
701                       elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
702                       elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
703                       elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
704                       elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
705                         else
706                              (let (p-value)
707                                (if* (and (iostruct-do-entity tokenbuf)
708                                          (setf p-value
709                                            (assoc entity-symbol
710                                                   (iostruct-general-entities tokenbuf)))) then
711                                        (setf p-value (rest p-value))
712                                        (when (member entity-symbol (iostruct-entity-names tokenbuf))
713                                          (xml-error (concatenate 'string
714                                                       "entity:"
715                                                       (string entity-symbol)
716                                                       " in recursive reference")))
717                                        (push entity-symbol (iostruct-entity-names tokenbuf))
718                                        (if* (stringp p-value) then
719                                                (add-to-entity-buf entity-symbol p-value)
720                                         elseif (null external-callback) then
721                                                (setf (iostruct-do-entity tokenbuf) nil)
722                                         elseif p-value then
723                                                (let ((entity-stream (apply external-callback p-value)))
724                                                  (if* entity-stream then
725                                                          (let ((entity-buf (get-tokenbuf)))
726                                                            (setf (tokenbuf-stream entity-buf) entity-stream)
727                                                            (unicode-check entity-stream tokenbuf)
728                                                            (push entity-buf
729                                                                  (iostruct-entity-bufs tokenbuf))
730                                                            ;; check for possible external textdecl
731                                                            (let ((count 0) cch
732                                                                  (string "<?xml "))
733                                                              (if* (dotimes (i (length string) t)
734                                                                     (setf cch (get-next-char tokenbuf))
735                                                                     (when (and (= i 5)
736                                                                                (xml-space-p cch))
737                                                                       (setf cch #\space))
738                                                                     (when (not (eq cch
739                                                                                    (schar string count)))
740                                                                       (return nil))
741                                                                     (incf count)) then
742                                                                      (setf count 5)
743                                                                      (loop
744                                                                        (when (< count 0) (return))
745                                                                        (un-next-char (schar string count))
746                                                                        (decf count))
747                                                                      ;; swallow <?xml token
748                                                                      (swallow-xml-token
749                                                                       tokenbuf
750                                                                       external-callback)
751                                                                 else
752                                                                      (un-next-char cch)
753                                                                      (decf count)
754                                                                      (loop
755                                                                        (when (< count 0) (return))
756                                                                        (un-next-char (schar string count))
757                                                                        (decf count))))
758                                                            )
759                                                     else
760                                                          (xml-error (concatenate 'string
761                                                                       "Reference to unparsed entity "
762                                                                       (string entity-symbol)))
763                                                          ))
764                                                )
765                                 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
766                                            (iostruct-standalonep tokenbuf)
767                                            (and (iostruct-seen-any-dtd tokenbuf)
768                                                 (not (iostruct-seen-external-dtd tokenbuf))
769                                                 (not (iostruct-seen-parameter-reference tokenbuf))))
770                                   then
771                                        (xml-error (concatenate 'string
772                                                     (string entity-symbol)
773                                                     " must have entity declaration before being referenced"))
774                                        ))
775                              ))
776                    (setq state state-pcdata)
777               else (let ((tmp (compute-coll-string entity)))
778                      (clear-coll coll)
779                      (dotimes (i 15)
780                        (add-to-coll coll ch)
781                        (setq ch (get-next-char tokenbuf))
782                        (if* (null ch)
783                           then (return)))
784                      (xml-error (concatenate 'string
785                                   "reference not terminated by ';', starting at: '&"
786                                   tmp
787                                   (compute-coll-string coll)
788                                   "'")))
789                    ))
790
791           (#.state-pcdata5
792            (let ((code (char-code ch)))
793              (if* (eq #\; ch)
794                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
795                              (xml-error
796                               (concatenate 'string
797                                 (string (code-char char-code))
798                                 " reference cannot be constructed from entity reference/character data sequence"))
799                         else
800                              (setf entity-source nil))
801                      (when (not (xml-char-p (code-char char-code)))
802                            (xml-error
803                             (concatenate 'string
804                               "Character reference: "
805                               (format nil "~s" char-code)
806                               " (decimal) is not valid XML input character")))
807                      (add-to-coll coll (code-char char-code))
808                      (setf char-code 0)
809                      (setq state state-pcdata)
810               elseif (<= (char-code #\0) code (char-code #\9))
811                 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
812               elseif (<= (char-code #\A) code (char-code #\F))
813                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
814               elseif (<= (char-code #\a) code (char-code #\f))
815                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
816                 else (clear-coll coll)
817                      (dotimes (i 15)
818                        (add-to-coll coll ch)
819                        (setq ch (get-next-char tokenbuf))
820                        (if* (null ch)
821                           then (return)))
822                      (xml-error (concatenate 'string
823                                   "illegal hexidecimal character reference code, starting at: '"
824                                   (compute-coll-string coll)
825                                   "', calculated char code: "
826                                   (format nil "~s" char-code)))
827                      )))
828
829           (#.state-pcdata6
830            (let ((code (char-code ch)))
831              (if* (eq #\; ch)
832                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
833                              (xml-error
834                               (concatenate 'string
835                                 (string (code-char char-code))
836                                 " reference cannot be constructed from entity reference/character data sequence"))
837                         else
838                              (setf entity-source nil))
839                      (when (not (xml-char-p (code-char char-code)))
840                            (xml-error
841                             (concatenate 'string
842                               "Character reference: "
843                               (format nil "~s" char-code)
844                               " (decimal) is not valid XML input character")))
845                      (add-to-coll coll (code-char char-code))
846                      (setf char-code 0)
847                      (setq state state-pcdata)
848               elseif (<= (char-code #\0) code (char-code #\9))
849                 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
850                 else (clear-coll coll)
851                      (dotimes (i 15)
852                        (add-to-coll coll ch)
853                        (setq ch (get-next-char tokenbuf))
854                        (if* (null ch)
855                           then (return)))
856                      (xml-error (concatenate 'string
857                                   "illegal decimal character reference code, starting at: '"
858                                   (compute-coll-string coll)
859                                   "', calculated char code: "
860                                   (format nil "~s" char-code)))
861                      )))
862
863           (#.state-readtag-end
864            (if* (xml-name-start-char-p ch)
865               then (setf state state-readtag-end2)
866                    (un-next-char ch)
867               else (clear-coll coll)
868                    (dotimes (i 15)
869                      (add-to-coll coll ch)
870                      (setq ch (get-next-char tokenbuf))
871                      (if* (null ch)
872                         then (return)))
873                    (xml-error (concatenate 'string
874                                 "illegal end tag name, starting at: '</"
875                                 (compute-coll-string coll)
876                                 "'"))
877                    ))
878
879           (#.state-readtag-end2
880            (if* (xml-name-char-p ch)
881               then (add-to-coll coll ch)
882             elseif (eq #\> ch) then
883                    (let ((tag-string (compute-coll-string coll)))
884                      (when (and (iostruct-ns-scope tokenbuf)
885                                 (string= tag-string
886                                     (first (first (iostruct-ns-scope tokenbuf)))))
887                        (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
888                          (setf (iostruct-ns-to-package tokenbuf)
889                            (remove (assoc item (iostruct-ns-to-package tokenbuf))
890                                    (iostruct-ns-to-package tokenbuf))))
891                        (setf (iostruct-ns-scope tokenbuf)
892                          (rest (iostruct-ns-scope tokenbuf)))))
893                    (setq tag-to-return (compute-tag coll *package*
894                                                     (iostruct-ns-to-package tokenbuf)))
895                    (return)
896             elseif (xml-space-p ch) then (setf state state-readtag-end3)
897                    (let ((tag-string (compute-coll-string coll)))
898                      (when (and (iostruct-ns-scope tokenbuf)
899                                 (string= tag-string
900                                     (first (first (iostruct-ns-scope tokenbuf)))))
901                        (setf (iostruct-ns-scope tokenbuf)
902                          (rest (iostruct-ns-scope tokenbuf)))))
903                    (setq tag-to-return (compute-tag coll *package*
904                                                     (iostruct-ns-to-package tokenbuf)))
905               else (let ((tmp (compute-coll-string coll)))
906                      (clear-coll coll)
907                      (dotimes (i 15)
908                        (add-to-coll coll ch)
909                        (setq ch (get-next-char tokenbuf))
910                        (if* (null ch)
911                           then (return)))
912                      (xml-error (concatenate 'string
913                                   "illegal end tag name, starting at: '</"
914                                   tmp
915                                   (compute-coll-string coll)
916                                   "'")))
917                    ))
918
919           (#.state-readtag-end3
920            (if* (xml-space-p ch) then nil
921             elseif (eq #\> ch) then (return)
922               else (let ((tmp (compute-coll-string coll)))
923                      (clear-coll coll)
924                      (dotimes (i 15)
925                        (add-to-coll coll ch)
926                        (setq ch (get-next-char tokenbuf))
927                        (if* (null ch)
928                           then (return)))
929                      (xml-error (concatenate 'string
930                                   "illegal end tag name, starting at: '"
931                                   (compute-coll-string coll)
932                                   "' end tag name: " tmp )))
933                    ))
934
935           (#.state-readtagfirst
936            ; starting to read a tag name
937            (if* (eq #\/ ch)
938               then (setf state state-readtag-end)
939             elseif (eq #\? ch)
940               then (setf state state-readtag-?)
941                    (setf empty-delim #\?)
942             elseif (eq #\! ch)
943               then (setf state state-readtag-!)
944                    (setf empty-delim nil)
945             elseif (xml-name-start-char-p ch)
946               then (setf state state-readtag)
947                    (setf empty-delim #\/)
948                    (un-next-char ch)
949               else (clear-coll coll)
950                    (dotimes (i 15)
951                      (add-to-coll coll ch)
952                      (setq ch (get-next-char tokenbuf))
953                      (if* (null ch)
954                         then (return)))
955                    (xml-error (concatenate 'string
956                                 "illegal character following '<', starting at '"
957                                 (compute-coll-string coll)
958                                 "'"))
959                    ))
960
961           (#.state-readtag-!
962            (if* (xml-name-start-char-p ch)
963               then
964                    (setf state state-readtag-!-name)
965                    (un-next-char ch)
966             elseif (eq #\[ ch)
967               then
968                    (setf state state-readtag-!-conditional)
969             elseif (eq #\- ch)
970               then
971                    (setf state state-readtag-!-comment)
972               else
973                    (clear-coll coll)
974                    (dotimes (i 15)
975                      (add-to-coll coll ch)
976                      (setq ch (get-next-char tokenbuf))
977                      (if* (null ch)
978                         then (return)))
979                    (xml-error (concatenate 'string
980                                 "illegal character following '<!', starting at '<!"
981                                 (compute-coll-string coll)
982                                 "'"))
983                    ))
984
985           (#.state-readtag-!-conditional
986            (if* (eq #\C ch) then
987                    (setf state state-readtag-!-conditional4)
988                    (setf special-tag-count 1)
989               else (clear-coll coll)
990                    (dotimes (i 15)
991                      (add-to-coll coll ch)
992                      (setq ch (get-next-char tokenbuf))
993                      (if* (null ch)
994                         then (return)))
995                    (xml-error (concatenate 'string
996                                 "illegal character following '<![', starting at '<!["
997                                 (compute-coll-string coll)
998                                 "'"))
999                    ))
1000
1001           (#.state-readtag-!-conditional4
1002            (if* (not (eq (elt "CDATA[" special-tag-count) ch))
1003               then (clear-coll coll)
1004                    (dotimes (i 15)
1005                      (add-to-coll coll ch)
1006                      (setq ch (get-next-char tokenbuf))
1007                      (if* (null ch)
1008                         then (return)))
1009                    (xml-error (concatenate 'string
1010                                 "illegal token following '<![', starting at '<!["
1011                                 (subseq "CDATA[" 0 special-tag-count)
1012                                 (compute-coll-string coll)
1013                                 "'"))
1014             elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
1015               else (incf special-tag-count)))
1016
1017           (#.state-readtag-!-conditional5
1018            (if* (eq #\] ch)
1019               then (setf state state-readtag-!-conditional6)
1020             elseif (not (xml-char-p ch)) then
1021                   (xml-error (concatenate 'string
1022                                "Illegal character: "
1023                                (string ch)
1024                                " detected in CDATA input"))
1025               else (add-to-coll coll ch)))
1026
1027           (#.state-readtag-!-conditional6
1028            (if* (eq #\] ch)
1029               then (setf state state-readtag-!-conditional7)
1030               else (setf state state-readtag-!-conditional5)
1031                    (add-to-coll coll #\])
1032                    (add-to-coll coll ch)))
1033
1034           (#.state-readtag-!-conditional7
1035            (if* (eq #\> ch)
1036               then
1037                    (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1038                            (xml-error
1039                             "CDATA cannot be constructed from entity reference/character data sequence")
1040                       else
1041                              (setf entity-source nil))
1042                    (return)
1043             elseif (eq #\] ch) then
1044                    (add-to-coll coll #\]) ;; come back here to check again
1045               else (setf state state-readtag-!-conditional5)
1046                    (add-to-coll coll #\])
1047                    (add-to-coll coll #\])
1048                    (add-to-coll coll ch)))
1049
1050           (#.state-readtag-!-comment
1051            (if* (eq #\- ch)
1052               then (setf state state-readtag-!-readcomment)
1053                    (setf tag-to-return :comment)
1054               else (clear-coll coll)
1055                    (dotimes (i 15)
1056                      (add-to-coll coll ch)
1057                      (setq ch (get-next-char tokenbuf))
1058                      (if* (null ch)
1059                         then (return)))
1060                    (xml-error (concatenate 'string
1061                                 "illegal token following '<![-', starting at '<!-"
1062                                 (compute-coll-string coll)
1063                                 "'"))
1064                    ))
1065
1066           (#.state-readtag-!-readcomment
1067            (if* (eq #\- ch)
1068               then (setf state state-readtag-!-readcomment2)
1069             elseif (not (xml-char-p ch)) then
1070                    (xml-error (concatenate 'string
1071                                 "Illegal character: "
1072                                 (string ch)
1073                                 " detected in input"))
1074               else (add-to-coll coll ch)))
1075
1076           (#.state-readtag-!-readcomment2
1077            (if* (eq #\- ch)
1078               then (setf state state-readtag-end-bracket)
1079               else (setf state state-readtag-!-readcomment)
1080                    (add-to-coll coll #\-) (add-to-coll coll ch)))
1081
1082           (#.state-readtag-end-bracket
1083            (if* (eq #\> ch)
1084               then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1085                            (xml-error
1086                             (concatenate 'string
1087                               (string tag-to-return)
1088                             " tag cannot be constructed from entity reference/character data sequence"))
1089                       else
1090                              (setf entity-source nil))
1091                    (return)
1092               else  (clear-coll coll)
1093                    (dotimes (i 15)
1094                      (add-to-coll coll ch)
1095                      (setq ch (get-next-char tokenbuf))
1096                      (if* (null ch)
1097                         then (return)))
1098                    (xml-error (concatenate 'string
1099                                 "illegal token following '--' comment terminator, starting at '--"
1100                                 (compute-coll-string coll)
1101                                 "'"))
1102                    ))
1103
1104           (#.state-readtag
1105            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1106               then
1107                    (add-to-coll coll ch)
1108               else
1109                    (if* (xml-space-p ch) then
1110                            (setf tag-to-return-string (compute-coll-string coll))
1111                            (setq tag-to-return
1112                              (compute-tag coll *package*
1113                                           (iostruct-ns-to-package tokenbuf)))
1114                            (clear-coll coll)
1115                            (setf state state-readtag2)
1116                     elseif (eq #\> ch) then
1117                            (setq tag-to-return
1118                              (compute-tag coll *package*
1119                                           (iostruct-ns-to-package tokenbuf)))
1120                            (clear-coll coll)
1121                            (return)
1122                     elseif (eq #\/ ch) then
1123                            (setq tag-to-return
1124                              (compute-tag coll *package*
1125                                           (iostruct-ns-to-package tokenbuf)))
1126                            (clear-coll coll)
1127                            (setf state state-readtag3)
1128                       else (dotimes (i 15)
1129                              (add-to-coll coll ch)
1130                              (setq ch (get-next-char tokenbuf))
1131                              (if* (null ch)
1132                                 then (return)))
1133                            (xml-error
1134                             (concatenate 'string
1135                               "illegal token name, starting at '"
1136                               (compute-coll-string coll)
1137                               "'"))
1138                            )))
1139
1140           (#.state-readtag2
1141            (if* (xml-space-p ch) then nil
1142             elseif (eq #\> ch) then (return)
1143             elseif (eq #\/ ch) then (setf state state-readtag3)
1144             elseif (xml-name-start-char-p ch) then
1145                    (un-next-char ch)
1146                    (setf state state-readtag4)
1147               else (clear-coll coll)
1148                    (dotimes (i 15)
1149                      (add-to-coll coll ch)
1150                      (setq ch (get-next-char tokenbuf))
1151                      (if* (null ch)
1152                         then (return)))
1153                    (xml-error
1154                     (concatenate 'string
1155                       "illegal token, starting at '"
1156                       (compute-coll-string coll)
1157                       "' following element token start: " (string tag-to-return)))
1158                    ))
1159
1160           (#.state-readtag4
1161            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1162               then
1163                    (add-to-coll coll ch)
1164             elseif (eq #\= ch) then
1165                    (setq attrib-name (compute-tag coll *package*
1166                                                   (iostruct-ns-to-package tokenbuf)))
1167                    (clear-coll coll)
1168                    (let ((name (symbol-name attrib-name)))
1169                      (when (and (>= (length name) 5)
1170                                 (string= name "xmlns" :end1 5))
1171                        (if* (= (length name) 5)
1172                           then
1173                                (setf ns-token :none)
1174                         elseif (eq (schar name 5) #\:)
1175                           then
1176                                (setf ns-token (subseq name 6)))))
1177                    (setf state state-readtag5)
1178             elseif (xml-space-p ch) then
1179                    (setq attrib-name (compute-tag coll *package*
1180                                                   (iostruct-ns-to-package tokenbuf)))
1181                    (clear-coll coll)
1182                    (let ((name (symbol-name attrib-name)))
1183                      (when (and (>= (length name) 5)
1184                                 (string= name "xmlns" :end1 5))
1185                        (if* (= (length name) 5)
1186                           then
1187                                (setf ns-token :none)
1188                           else
1189                                (setf ns-token (subseq name 6)))))
1190                    (setf state state-readtag12)
1191               else (let ((tmp (compute-coll-string coll)))
1192                      (clear-coll coll)
1193                      (dotimes (i 15)
1194                        (add-to-coll coll ch)
1195                        (setq ch (get-next-char tokenbuf))
1196                        (if* (null ch)
1197                           then (return)))
1198                      (xml-error
1199                       (concatenate 'string
1200                         "looking for attribute '=', found: '"
1201                       (compute-coll-string coll)
1202                       "' following attribute name: " tmp)))
1203                    ))
1204
1205           (#.state-readtag12
1206            (if* (xml-space-p ch) then nil
1207             elseif (eq #\= ch) then (setf state state-readtag5)
1208               else
1209                  (dotimes (i 15)
1210                    (add-to-coll coll ch)
1211                    (setq ch (get-next-char tokenbuf))
1212                    (if* (null ch)
1213                       then (return)))
1214                    (xml-error
1215                     (concatenate 'string
1216                       "looking for attribute '=', found: '"
1217                       (compute-coll-string coll)
1218                       "' following attribute name: " (string attrib-name)))))
1219
1220           (#.state-readtag5
1221            ;; begin to collect attribute value
1222            (if* (or (eq ch #\")
1223                     (eq ch #\'))
1224               then (setq value-delim ch)
1225                    (let* ((tag-defaults (assoc tag-to-return attlist-data))
1226                           (this-attrib (assoc attrib-name tag-defaults)))
1227                      (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
1228                        (setf cdatap nil))
1229                      )
1230                    (setq state state-readtag6)
1231             elseif (xml-space-p ch) then nil
1232               else
1233                    (dotimes (i 15)
1234                      (add-to-coll coll ch)
1235                      (setq ch (get-next-char tokenbuf))
1236                      (if* (null ch)
1237                         then (return)))
1238                    (xml-error
1239                     (concatenate 'string
1240                       "attribute value not delimited by ' or \" : '"
1241                       (compute-coll-string coll)
1242                       "' following attribute: " (string attrib-name)))
1243                    ))
1244
1245           (#.state-readtag6
1246            (let ((from-entity (and attrib-value-tokenbuf
1247                                    (eq attrib-value-tokenbuf
1248                                        (first (iostruct-entity-bufs tokenbuf))))))
1249              (when (not from-entity) (setf attrib-value-tokenbuf nil))
1250              (if* from-entity then
1251                      (if* (eq #\newline ch) then (setf ch #\space)
1252                       elseif (eq #\return ch) then (setf ch #\space)
1253                       elseif (eq #\tab ch) then (setf ch #\space)
1254                              ))
1255              (if* (and (not from-entity) (eq ch value-delim))
1256                 then (setq attrib-value (compute-coll-string coll))
1257                      (when (not cdatap)
1258                        (setf attrib-value (normalize-attrib-value attrib-value)))
1259                      (clear-coll coll)
1260                      (push attrib-name attribs-to-return)
1261                      (push attrib-value attribs-to-return)
1262                      (when ns-token
1263                        (let ((package (assoc (parse-uri attrib-value)
1264                                              (iostruct-uri-to-package tokenbuf)
1265                                              :test 'uri=)))
1266                          (if* package then (setf package (rest package))
1267                             else
1268                                  (setf package
1269                                    (let ((i 0) new-package)
1270                                      (loop
1271                                        (let* ((candidate (concatenate 'string
1272                                                            "net.xml.namespace."
1273                                                            (format nil "~s" i)))
1274                                               (exists (find-package candidate)))
1275                                          (if* exists
1276                                             then (incf i)
1277                                             else (setf new-package (make-package candidate))
1278                                                  (setf (iostruct-uri-to-package tokenbuf)
1279                                                    (acons (parse-uri attrib-value) new-package
1280                                                           (iostruct-uri-to-package tokenbuf)))
1281                                                  (return new-package)))))))
1282                          (setf (iostruct-ns-to-package tokenbuf)
1283                            (acons ns-token package (iostruct-ns-to-package tokenbuf)))
1284                          )
1285                        (if* (and (first (iostruct-ns-scope tokenbuf))
1286                                  (string= (first (first (iostruct-ns-scope tokenbuf)))
1287                                      tag-to-return-string))
1288                           then
1289                                (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
1290                           else
1291                                (push (list tag-to-return-string (list ns-token))
1292                                      (iostruct-ns-scope tokenbuf)))
1293                        (setf ns-token nil))
1294                      (setq state state-readtag6a)
1295               elseif (eq #\newline ch) then
1296                      (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
1297               elseif (or (eq #\tab ch) (eq #\return ch)) then
1298                      (add-to-coll coll #\space)
1299               elseif (eq #\& ch)
1300                  then (setq state state-readtag7)
1301                       (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
1302               elseif (and (xml-char-p ch) (not (eq #\< ch)))
1303                 then (add-to-coll coll ch)
1304                 else
1305                      (dotimes (i 15)
1306                        (add-to-coll coll ch)
1307                        (setq ch (get-next-char tokenbuf))
1308                        (if* (null ch)
1309                           then (return)))
1310                      (xml-error
1311                       (concatenate 'string
1312                         "attribute value cannot contain '<': '"
1313                         (compute-coll-string coll)
1314                         "' following attribute: " (string attrib-name)))
1315                      )
1316              (setf last-ch ch)))
1317
1318           (#.state-readtag6a
1319            (if* (xml-space-p ch) then (setf state state-readtag2)
1320             elseif (eq #\> ch) then (setf state state-readtag2)
1321                    (return)
1322             elseif (eq #\/ ch) then (setf state state-readtag3)
1323             else (clear-coll coll)
1324                    (dotimes (i 15)
1325                      (add-to-coll coll ch)
1326                      (setq ch (get-next-char tokenbuf))
1327                      (if* (null ch)
1328                         then (return)))
1329                    (xml-error
1330                     (concatenate 'string
1331                       "illegal token, starting at '"
1332                       (compute-coll-string coll)
1333                       "' following element token start: " (string tag-to-return)))
1334                    ))
1335
1336           (#.state-readtag7
1337            (if* (eq #\# ch)
1338               then (setf state state-readtag8)
1339             elseif (xml-name-start-char-p ch)
1340               then (setf state state-readtag9)
1341                    (un-next-char ch)
1342               else (clear-coll coll)
1343                    (dotimes (i 15)
1344                      (add-to-coll coll ch)
1345                      (setq ch (get-next-char tokenbuf))
1346                      (if* (null ch)
1347                         then (return)))
1348                    (xml-error
1349                     (concatenate 'string
1350                       "attribute value contains illegal reference name: '&"
1351                       (compute-coll-string coll)
1352                       "' in attribute value for: " (string attrib-name)))
1353                    ))
1354
1355           (#.state-readtag8
1356            (if* (eq #\x ch)
1357               then (setf state state-readtag10)
1358             elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
1359               then (setf state state-readtag11)
1360                    (un-next-char ch)
1361               else (clear-coll coll)
1362                    (dotimes (i 15)
1363                      (add-to-coll coll ch)
1364                      (setq ch (get-next-char tokenbuf))
1365                      (if* (null ch)
1366                         then (return)))
1367                    (xml-error
1368                     (concatenate 'string
1369                       "attribute value contains illegal character reference code: '"
1370                       (compute-coll-string coll)
1371                       "' in attribute value for: " (string attrib-name)))
1372                    ))
1373
1374           (#.state-readtag10
1375            (let ((code (char-code ch)))
1376              (if* (eq #\; ch)
1377                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1378                              (xml-error
1379                               (concatenate 'string
1380                                 (string (code-char char-code))
1381                                 " reference cannot be constructed from entity reference/character data sequence"))
1382                         else
1383                              (setf entity-source nil))
1384                      (add-to-coll coll (code-char char-code))
1385                      (setf char-code 0)
1386                      (setq state state-readtag6)
1387               elseif (<= (char-code #\0) code (char-code #\9))
1388                 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
1389               elseif (<= (char-code #\A) code (char-code #\F))
1390                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
1391               elseif (<= (char-code #\a) code (char-code #\f))
1392                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
1393                 else (clear-coll coll)
1394                      (dotimes (i 15)
1395                        (add-to-coll coll ch)
1396                        (setq ch (get-next-char tokenbuf))
1397                        (if* (null ch)
1398                           then (return)))
1399                      (xml-error
1400                       (concatenate 'string
1401                         "attribute value contains illegal hexidecimal character reference code: '"
1402                         (compute-coll-string coll)
1403                         "' in attribute value for: " (string attrib-name)))
1404                      )))
1405
1406           (#.state-readtag11
1407            (let ((code (char-code ch)))
1408              (if* (eq #\; ch)
1409                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1410                              (xml-error
1411                               (concatenate 'string
1412                                 (string (code-char char-code))
1413                                 " reference cannot be constructed from entity reference/character data sequence"))
1414                         else
1415                              (setf entity-source nil))
1416                      (add-to-coll coll (code-char char-code))
1417                      (setf char-code 0)
1418                      (setq state state-readtag6)
1419               elseif (<= (char-code #\0) code (char-code #\9))
1420                 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
1421                 else (clear-coll coll)
1422                      (dotimes (i 15)
1423                        (add-to-coll coll ch)
1424                        (setq ch (get-next-char tokenbuf))
1425                        (if* (null ch)
1426                           then (return)))
1427                      (xml-error
1428                       (concatenate 'string
1429                         "attribute value contains illegal decimal character reference code: '"
1430                         (compute-coll-string coll)
1431                         "' in attribute value for: " (string attrib-name)))
1432                      )))
1433
1434           (#.state-readtag9
1435            (if* (xml-name-char-p ch)
1436               then (add-to-coll entity ch)
1437             elseif (eq #\; ch)
1438               then (let ((entity-symbol (compute-tag entity)))
1439                      (clear-coll entity)
1440                      (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1441                              (xml-error
1442                               (concatenate 'string
1443                                 (string entity-symbol)
1444                                 " reference cannot be constructed from entity reference/character data sequence"))
1445                         else
1446                              (setf entity-source nil))
1447                      (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
1448                       elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
1449                       elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
1450                       elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
1451                       elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
1452                         else (let (p-value)
1453                                (if* (and (iostruct-do-entity tokenbuf)
1454                                          (setf p-value
1455                                            (assoc entity-symbol
1456                                                   (iostruct-general-entities tokenbuf)))) then
1457                                        (setf p-value (rest p-value))
1458                                        (when (member entity-symbol (iostruct-entity-names tokenbuf))
1459                                          (xml-error (concatenate 'string
1460                                                       "entity:"
1461                                                       (string entity-symbol)
1462                                                       " in recursive reference")))
1463                                        (push entity-symbol (iostruct-entity-names tokenbuf))
1464                                        (if* (stringp p-value) then
1465                                                (add-to-entity-buf entity-symbol p-value)
1466                                                (when (not attrib-value-tokenbuf)
1467                                                  (setf attrib-value-tokenbuf
1468                                                    (first (iostruct-entity-bufs tokenbuf))))
1469                                         elseif (null external-callback) then
1470                                                (setf (iostruct-do-entity tokenbuf) nil)
1471                                         elseif p-value then
1472                                                (let ((entity-stream (apply external-callback p-value)))
1473                                                  (if* entity-stream then
1474                                                          (let ((entity-buf (get-tokenbuf)))
1475                                                            (setf (tokenbuf-stream entity-buf) entity-stream)
1476                                                            (unicode-check entity-stream tokenbuf)
1477                                                            (push entity-buf
1478                                                                  (iostruct-entity-bufs tokenbuf))
1479                                                            ;; check for possible external textdecl
1480                                                            (let ((count 0) cch
1481                                                                  (string "<?xml "))
1482                                                              (if* (dotimes (i (length string) t)
1483                                                                     (setf cch (get-next-char tokenbuf))
1484                                                                     (when (and (= i 5)
1485                                                                                (xml-space-p cch))
1486                                                                       (setf cch #\space))
1487                                                                     (when (not (eq cch
1488                                                                                    (schar string count)))
1489                                                                       (return nil))
1490                                                                     (incf count)) then
1491                                                                      (setf count 5)
1492                                                                      (loop
1493                                                                        (when (< count 0) (return))
1494                                                                        (un-next-char (schar string count))
1495                                                                        (decf count))
1496                                                                      ;; swallow <?xml token
1497                                                                      (swallow-xml-token
1498                                                                       tokenbuf
1499                                                                       external-callback)
1500                                                                 else
1501                                                                      (un-next-char cch)
1502                                                                      (decf count)
1503                                                                      (loop
1504                                                                        (when (< count 0) (return))
1505                                                                        (un-next-char (schar string count))
1506                                                                        (decf count))))
1507                                                            )
1508                                                     else
1509                                                          (xml-error (concatenate 'string
1510                                                                       "Reference to unparsed entity "
1511                                                                       (string entity-symbol)))
1512                                                          ))
1513                                                )
1514                                 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
1515                                            (and (iostruct-seen-any-dtd tokenbuf)
1516                                                 (not (iostruct-seen-external-dtd tokenbuf))
1517                                                 (not (iostruct-seen-parameter-reference tokenbuf))))
1518                                   then
1519                                        (xml-error (concatenate 'string
1520                                                     (string entity-symbol)
1521                                                     " must have entity declaration before being referenced"))
1522                                        ))
1523                              ))
1524                    (setq state state-readtag6)
1525               else (dotimes (i 15)
1526                      (add-to-coll coll ch)
1527                      (setq ch (get-next-char tokenbuf))
1528                      (if* (null ch)
1529                         then (return)))
1530                    (xml-error
1531                     (concatenate 'string
1532                       "attribute value contains illegal reference name: '&"
1533                       (compute-coll-string coll)
1534                       "' in attribute value for: " (string attrib-name)))
1535                    ))
1536
1537           (#.state-readtag3
1538            (if* (eq #\> ch) then (return)
1539               else (clear-coll coll)
1540                    (dotimes (i 15)
1541                      (add-to-coll coll ch)
1542                      (setq ch (get-next-char tokenbuf))
1543                      (if* (null ch)
1544                         then (return)))
1545                    (xml-error
1546                     (concatenate 'string
1547                       "expected '>' found '"
1548                       (compute-coll-string coll)
1549                       "' in element: " (string tag-to-return)))
1550                    ))
1551
1552           (#.state-readtag-!-name
1553            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1554               then
1555                    (add-to-coll coll ch)
1556               else
1557                    (when (not (xml-space-p ch))
1558                      (xml-error (concatenate 'string
1559                                   "expecting whitespace following: '<!"
1560                                   (compute-coll-string coll)
1561                                   "' ; got: '" (string ch) "'")))
1562                    (setq tag-to-return (compute-tag coll))
1563                    (clear-coll coll)
1564                    (setf state state-pre-!-contents)))
1565
1566           (#.state-readtag-?
1567            (if* (xml-name-char-p ch)
1568               then
1569                    (add-to-coll coll ch)
1570               else
1571                    (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
1572                      (xml-error (concatenate 'string
1573                                   "expecting name following: '<?"
1574                                   (compute-coll-string coll)
1575                                   "' ; got: '" (string ch) "'"))
1576                      )
1577                    (when (= (collector-next coll) 0)
1578                      (xml-error "null <? token"))
1579                    (if* (and (= (collector-next coll) 3)
1580                              (eq (elt (collector-data coll) 0) #\x)
1581                              (eq (elt (collector-data coll) 1) #\m)
1582                              (eq (elt (collector-data coll) 2) #\l)
1583                              )
1584                       then
1585                            (when (eq #\? ch) (xml-error "null <?xml token"))
1586                            (setq tag-to-return :xml)
1587                            (setf state state-findattributename)
1588                     elseif (and (= (collector-next coll) 3)
1589                                 (or (eq (elt (collector-data coll) 0) #\x)
1590                                     (eq (elt (collector-data coll) 0) #\X))
1591                                 (or (eq (elt (collector-data coll) 1) #\m)
1592                                     (eq (elt (collector-data coll) 1) #\M))
1593                                 (or (eq (elt (collector-data coll) 2) #\l)
1594                                     (eq (elt (collector-data coll) 2) #\L))
1595                                 ) then
1596                            (xml-error "<?xml tag must be all lower case")
1597                       else
1598                            (setq tag-to-return (compute-tag coll))
1599                            (when (eq #\? ch) (un-next-char ch))
1600                            (setf state state-prereadpi))
1601                    (clear-coll coll)))
1602
1603           (#.state-pre-!-contents
1604            (if* (xml-space-p ch)
1605               then nil
1606             elseif (not (xml-char-p ch))
1607               then (xml-error (concatenate 'string   ;; no test for this...
1608                                 "illegal character '"
1609                                 (string ch)
1610                                 " following <!" (string tag-to-return)))
1611             elseif (eq #\> ch)
1612               then (return)
1613               else (un-next-char ch)
1614                    (setf state state-!-contents)))
1615
1616           (#.state-begin-dtd
1617            (un-next-char ch)
1618            (let ((val (parse-dtd tokenbuf nil external-callback)))
1619              (setf (iostruct-seen-any-dtd tokenbuf) t)
1620              (push (append (list :[) val)
1621                    contents-to-return))
1622              (setf state state-!-doctype-ext3))
1623
1624           (#.state-!-contents
1625            (if* (xml-name-char-p ch)
1626               then (add-to-coll coll ch)
1627             elseif (eq #\> ch)
1628               then (push (compute-coll-string coll) contents-to-return)
1629                    (clear-coll coll)
1630                    (return)
1631             elseif (eq #\[ ch)
1632               then (push (compute-tag coll) contents-to-return)
1633                    (clear-coll coll)
1634                    (setf state state-begin-dtd)
1635             elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
1636                    ;; look at tag-to-return and set state accordingly
1637               then (push (compute-tag coll) contents-to-return)
1638                    (clear-coll coll)
1639                    (setf state state-!-doctype)
1640               else (xml-error
1641                     (concatenate 'string
1642                       "illegal name: '"
1643                       (string tag-to-return)
1644                       "' in <! tag: "))
1645                    ))
1646
1647           (#.state-!-doctype-ext
1648            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1649               then
1650                    (add-to-coll coll ch)
1651               else
1652                    (when (not (xml-space-p ch))
1653                      (dotimes (i 15)
1654                        (add-to-coll coll ch)
1655                        (setq ch (get-next-char tokenbuf))
1656                        (if* (null ch)
1657                           then (return)))
1658                      (xml-error
1659                       (concatenate 'string
1660                         "illegal character in '"
1661                         (compute-coll-string coll)
1662                         "' in <! tag: " (string tag-to-return) " "
1663                         (string (first contents-to-return))
1664                       ))
1665                      )
1666                    (let ((token (compute-tag coll)))
1667                      (push token contents-to-return)
1668                      (clear-coll coll)
1669                      (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
1670                       elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
1671                         else (xml-error
1672                               (concatenate 'string
1673                                 "expected 'SYSTEM' or 'PUBLIC' got '"
1674                                 (string (first contents-to-return))
1675                                 "' in <! tag: " (string tag-to-return) " "
1676                                 (string (second contents-to-return))))
1677                              )
1678                      )))
1679
1680           (#.state-!-doctype-public
1681            (if* (xml-space-p ch) then nil
1682             elseif (eq #\" ch) then (setf state state-!-doctype-public2)
1683             elseif (eq #\' ch) then (setf state state-!-doctype-public3)
1684               else (xml-error
1685                     (concatenate 'string
1686                       "expected quote or double-quote got: '"
1687                       (string ch)
1688                       "' in <! tag: " (string tag-to-return) " "
1689                       (string (second contents-to-return)) " "
1690                       (string (first contents-to-return))
1691                       ))
1692                    ))
1693
1694           (#.state-!-doctype-system
1695            (if* (xml-space-p ch) then nil
1696             elseif (eq #\" ch) then (setf state state-!-doctype-system2)
1697             elseif (eq #\' ch) then (setf state state-!-doctype-system3)
1698               else (xml-error
1699                     (concatenate 'string
1700                       "expected quote or double-quote got: '"
1701                       (string ch)
1702                       "' in <! tag: " (string tag-to-return) " "
1703                       (string (second contents-to-return)) " "
1704                       (string (first contents-to-return))
1705                       ))
1706                    ))
1707
1708           (#.state-!-doctype-public2
1709            (if* (eq #\" ch) then (push (compute-coll-string coll)
1710                                        contents-to-return)
1711                    (clear-coll coll)
1712                    (setf state state-!-doctype-system)
1713             elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1714               else (dotimes (i 15)
1715                      (add-to-coll coll ch)
1716                      (setq ch (get-next-char tokenbuf))
1717                      (if* (null ch)
1718                         then (return)))
1719                    (xml-error
1720                     (concatenate 'string
1721                       "illegal character in DOCTYPE PUBLIC string: '"
1722                       (compute-coll-string coll) "'"))
1723                    ))
1724
1725           (#.state-!-doctype-public3
1726            (if* (eq #\' ch) then (push (compute-coll-string coll)
1727                                        contents-to-return)
1728                    (clear-coll coll)
1729                    (setf state state-!-doctype-system)
1730             elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1731               else (dotimes (i 15)
1732                      (add-to-coll coll ch)
1733                      (setq ch (get-next-char tokenbuf))
1734                      (if* (null ch)
1735                         then (return)))
1736                    (xml-error
1737                     (concatenate 'string
1738                       "illegal character in DOCTYPE PUBLIC string: '"
1739                       (compute-coll-string coll) "'"))
1740                    ))
1741
1742           (#.state-!-doctype-system2
1743            (when (not (xml-char-p ch))
1744              (xml-error "XML is not well formed")) ;; not tested
1745            (if* (eq #\" ch) then (push (compute-coll-string coll)
1746                                        contents-to-return)
1747                    (clear-coll coll)
1748                    (setf state state-!-doctype-ext2)
1749               else (add-to-coll coll ch)))
1750
1751           (#.state-!-doctype-system3
1752            (when (not (xml-char-p ch))
1753              (xml-error "XML is not well formed")) ;; not tested
1754            (if* (eq #\' ch) then (push (compute-coll-string coll)
1755                                        contents-to-return)
1756                    (clear-coll coll)
1757                    (setf state state-!-doctype-ext2)
1758               else (add-to-coll coll ch)))
1759
1760           (#.state-!-doctype-ext2
1761            (if* (xml-space-p ch) then nil
1762             elseif (eq #\> ch) then (return)
1763             elseif (eq #\[ ch)
1764               then (setf state state-begin-dtd)
1765               else
1766                    (dotimes (i 15)
1767                      (add-to-coll coll ch)
1768                      (setq ch (get-next-char tokenbuf))
1769                      (if* (null ch)
1770                         then (return)))
1771                    (xml-error
1772                     (concatenate 'string
1773                       "illegal char in DOCTYPE token: '"
1774                       (compute-coll-string coll) "'"))
1775                    ))
1776
1777           (#.state-!-doctype-ext3
1778            (if* (xml-space-p ch) then nil
1779             elseif (eq #\> ch) then (return)
1780               else
1781                    (dotimes (i 15)
1782                      (add-to-coll coll ch)
1783                      (setq ch (get-next-char tokenbuf))
1784                      (if* (null ch)
1785                         then (return)))
1786                    (xml-error
1787                     (concatenate 'string
1788                       "illegal char in DOCTYPE token following dtd: '"
1789                       (compute-coll-string coll) "'"))
1790                    ))
1791
1792           (#.state-!-doctype
1793            ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
1794            (if* (xml-space-p ch) then nil
1795             elseif (xml-name-start-char-p ch)
1796               then
1797                    (setf state state-!-doctype-ext)
1798                    (un-next-char ch)
1799             elseif (eq #\> ch) then (return)
1800             elseif (eq #\[ ch)
1801               then (setf state state-begin-dtd)
1802               else (xml-error
1803                     (concatenate 'string
1804                       "illegal character: '"
1805                       (string ch)
1806                       "' in <! tag: " (string tag-to-return) " "
1807                       (string (first contents-to-return))))
1808                    ))
1809
1810           (#.state-prereadpi
1811            (if* (xml-space-p ch)
1812               then nil
1813             elseif (not (xml-char-p ch))
1814               then (xml-error "XML is not well formed") ;; no test
1815               else (un-next-char ch)
1816                    (setf state state-readpi)))
1817
1818           (#.state-readpi
1819            (if* (eq #\? ch)
1820               then (setf state state-readpi2)
1821             elseif (not (xml-char-p ch))
1822               then (xml-error "XML is not well formed") ;; no test
1823               else (add-to-coll coll ch)))
1824
1825           (#.state-readpi2
1826            (if* (eq #\> ch)
1827               then (return)
1828             elseif (eq #\? ch) then
1829                    (add-to-coll coll #\?) ;; come back here to try again
1830               else (setf state state-readpi)
1831                    (add-to-coll coll #\?)
1832                    (add-to-coll coll ch)))
1833
1834           (#.state-findattributename0
1835            (if* (xml-space-p ch) then (setf state state-findattributename)
1836             elseif (eq ch empty-delim)
1837               then (setf state state-noattributename)
1838               else
1839                    (dotimes (i 15)
1840                      (add-to-coll coll ch)
1841                      (setq ch (get-next-char tokenbuf))
1842                      (if* (null ch)
1843                         then (return)))
1844                    (xml-error
1845                     (concatenate 'string
1846                       "expected space or tag end before: '"
1847                       (compute-coll-string coll) "'"))))
1848           (#.state-findattributename
1849            ;; search until we find the start of an attribute name
1850            ;; or the end of the tag
1851            (if* (eq ch empty-delim)
1852               then (setf state state-noattributename)
1853             elseif (xml-space-p ch)
1854               then nil ;; skip whitespace
1855             elseif (xml-name-start-char-p ch)
1856               then
1857                    (un-next-char ch)
1858                    (setf state state-attribname)
1859               else
1860                    (dotimes (i 15)
1861                      (add-to-coll coll ch)
1862                      (setq ch (get-next-char tokenbuf))
1863                      (if* (null ch)
1864                         then (return)))
1865                    (xml-error
1866                     (concatenate 'string
1867                       "illegal char in <?xml token: '"
1868                       (compute-coll-string coll) "'"))
1869                    ))
1870
1871           (#.state-attribname
1872            ;; collect attribute name
1873            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1874               then
1875                    (add-to-coll coll ch)
1876             elseif (xml-space-p ch) then
1877                    (setq attrib-name (compute-tag coll))
1878                    (clear-coll coll)
1879                    (setq state state-attribname2)
1880               else
1881                    (when (not (eq #\= ch))
1882                      (dotimes (i 15)
1883                        (add-to-coll coll ch)
1884                        (setq ch (get-next-char tokenbuf))
1885                        (if* (null ch)
1886                           then (return)))
1887                      (xml-error
1888                       (concatenate 'string
1889                         "illegal char in <?xml attribute token: '"
1890                         (compute-coll-string coll) "'"))
1891                      )
1892                    (setq attrib-name (compute-tag coll))
1893                    (clear-coll coll)
1894                    (setq state state-attribstartvalue)))
1895
1896           (#.state-attribname2
1897            (if* (eq #\= ch) then (setq state state-attribstartvalue)
1898             elseif (xml-space-p ch) then nil
1899               else
1900                    (un-next-char ch)
1901                    (dotimes (i 15)
1902                        (add-to-coll coll ch)
1903                        (setq ch (get-next-char tokenbuf))
1904                        (if* (null ch)
1905                           then (return)))
1906                      (xml-error
1907                       (concatenate 'string
1908                         "illegal char in <?xml attribute token: '"
1909                         (compute-coll-string coll) "'"))))
1910           (#.state-attribstartvalue
1911            ;; begin to collect value
1912            (if* (or (eq ch #\")
1913                     (eq ch #\'))
1914               then (setq value-delim ch)
1915                    (setq state state-attribvaluedelim)
1916             elseif (xml-space-p ch) then nil
1917               else
1918                    (dotimes (i 15)
1919                        (add-to-coll coll ch)
1920                        (setq ch (get-next-char tokenbuf))
1921                        (if* (null ch)
1922                           then (return)))
1923                      (xml-error
1924                       (concatenate 'string
1925                         "expected ' or \" before  <?xml attribute token value: '"
1926                         (compute-coll-string coll) "'"))
1927                    ))
1928
1929            (#.state-attribvaluedelim
1930             (if* (eq ch value-delim)
1931                then (setq attrib-value (compute-coll-string coll))
1932                     (clear-coll coll)
1933                     (push attrib-name attribs-to-return)
1934                     (push attrib-value attribs-to-return)
1935                     (setq state state-findattributename0)
1936              elseif (and (xml-char-p ch) (not (eq #\< ch)))
1937                then (add-to-coll coll ch)
1938                else
1939                     (dotimes (i 15)
1940                        (add-to-coll coll ch)
1941                        (setq ch (get-next-char tokenbuf))
1942                        (if* (null ch)
1943                           then (return)))
1944                      (xml-error
1945                       (concatenate 'string
1946                         "illegal character in attribute token value: '"
1947                         (compute-coll-string coll) "'"))
1948                     ))
1949
1950            (#.state-noattributename
1951             (if* (eq #\> ch)
1952                then
1953                     (return) ;; ready to build return token
1954                else
1955                     (xml-error
1956                      (concatenate 'string
1957                        "expected '>' found: '" (string ch) "' in <?xml token"))
1958                     ))
1959
1960           (t
1961            (error "need to support state:~s" state))
1962           ))
1963       (put-back-collector entity)
1964       (case state
1965         (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
1966          (put-back-collector coll)
1967          (if* attribs-to-return
1968                  then (values (cons tag-to-return
1969                                     (nreverse attribs-to-return))
1970                               (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
1971             else
1972                  (values tag-to-return :start-tag :end-tag)
1973                  ))
1974         (#.state-readtag-end-bracket
1975          ;; this is a :commant tag
1976          (let ((ret (compute-coll-string coll)))
1977            (put-back-collector coll)
1978            (values (cons tag-to-return (list ret)) :comment :nil)))
1979         (#.state-pcdata
1980          (let ((next-char (collector-next coll)))
1981            (put-back-collector coll)
1982            (if* (zerop next-char)
1983               then (values nil :eof nil)
1984               else (values (compute-coll-string coll) :pcdata pcdatap))))
1985         (#.state-readpi2
1986          (let ((ret (compute-coll-string coll)))
1987            (put-back-collector coll)
1988            (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
1989         ((#.state-readtag-!-conditional)
1990          (put-back-collector coll)
1991          (values (append (list tag-to-return) contents-to-return) :start-tag
1992                  :end-tag))
1993         ((#.state-!-contents
1994           #.state-!-doctype
1995           #.state-!-doctype-ext2
1996           #.state-!-doctype-ext3)
1997          (put-back-collector coll)
1998          (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
1999                  :end-tag))
2000         (#.state-readtag3
2001          (put-back-collector coll)
2002          (values (if* attribs-to-return
2003                     then (cons tag-to-return
2004                                (nreverse attribs-to-return))
2005                     else tag-to-return) :start-tag :end-tag))
2006         ((#.state-readtag2
2007           #.state-readtag)
2008          (put-back-collector coll)
2009          (values (if* attribs-to-return
2010                     then (cons tag-to-return
2011                                (nreverse attribs-to-return))
2012                     else tag-to-return) :start-tag nil))
2013         ((#.state-readtag-end2
2014           #.state-readtag-end3)
2015          (put-back-collector coll)
2016          (values tag-to-return :end-tag nil))
2017         (#.state-readtag-!-conditional7
2018          (let ((ret (compute-coll-string coll)))
2019            (put-back-collector coll)
2020            (values (append (list :cdata) (list ret)) :cdata nil)))
2021         (t
2022          ;; if ch is null that means we encountered unexpected EOF
2023          (when (null ch)
2024            (put-back-collector coll)
2025            (xml-error "unexpected end of input"))
2026          (print (list tag-to-return attribs-to-return))
2027          (let ((ret (compute-coll-string coll)))
2028            (put-back-collector coll)
2029            (error "need to support state <post>:~s  ~s ~s ~s" state
2030                   tag-to-return
2031                   contents-to-return
2032                   ret))))
2033       )))
2034
2035 (defun swallow-xml-token (tokenbuf external-callback)
2036   (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
2037   (let ((xml (next-token tokenbuf external-callback nil)))
2038     (if* (and (eq (fourth xml) :standalone) (stringp (fifth xml))
2039               (equal (fifth xml) "yes")) then
2040             (xml-error "external XML entity cannot be standalone document")
2041      elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml))
2042                  (equal (seventh xml) "yes")) then
2043             (xml-error "external XML entity cannot be standalone document"))))
2044
2045 ;; return the string with entity references replaced by text
2046 ;; normalizing will happen later
2047 ;; we're ok on different types - just ignore IMPLIED & REQUIRED; and possibly skip FIXED
2048 (defun parse-default-value (value-list tokenbuf external-callback)
2049   (declare (optimize (speed 3) (safety 1)))
2050   (let (value-string)
2051     (if* (stringp (first value-list)) then (setf value-string (first value-list))
2052      elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list)))
2053     (let ((tmp-result (parse-xml
2054                       (concatenate 'string
2055                         "<item x='"
2056                         value-string
2057                         "'/>")
2058                       :external-callback external-callback
2059                       :general-entities
2060                       (iostruct-general-entities tokenbuf))))
2061       (if* (stringp (first value-list)) then
2062               (setf (first value-list)
2063                 (third (first (first tmp-result))))
2064          elseif (eq (first value-list) :FIXED) then
2065               (setf (second value-list)
2066                 (third (first (first tmp-result)))))))
2067   value-list)
2068
2069 (defun process-attlist (args attlist-data)
2070   (declare (optimize (speed 3) (safety 1)))
2071   (dolist (arg1 args attlist-data)
2072     ;;(format t "arg1: ~s~%" arg1)
2073     (dolist (item (rest arg1))
2074       ;;(format t "item: ~s~%" item)
2075       (when (eq :ATTLIST (first item))
2076         (let* ((name (second item))
2077                (name-data (assoc name attlist-data))
2078                (new-name-data (rest name-data)))
2079           ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
2080           (dolist (attrib-data (rest (rest item)))
2081             ;;(format t "attrib-data: ~s~%" attrib-data)
2082             #+ignore
2083             (setf (rest (rest attrib-data))
2084               (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
2085             (when (not (assoc (first attrib-data) new-name-data))
2086               (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
2087           (if* name-data then
2088                   (rplacd (assoc name attlist-data) (nreverse new-name-data))
2089              else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))
2090
2091 (provide :pxml)