r3357: remove load-compile-op from .asd file
[xmlutils.git] / pxml2.cl
1 ;;
2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
3 ;;
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
9 ;;
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose.  See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA  02111-1307  USA
21 ;;
22 ;; $Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
23
24 ;; Change Log 
25 ;;
26 ;; 10/14/00 add namespace support
27
28 (in-package :net.xml.parser)
29
30 (pxml-dribble-bug-hook "$Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
31
32 ;; state titles can be better chosen and explained
33
34 (defvar *debug-xml* nil)
35
36 (defmethod parse-xml ((str string) &key external-callback general-entities parameter-entities
37                                         content-only uri-to-package)
38   (declare (optimize (speed 3) (safety 1)))
39   (parse-xml (make-string-input-stream str) :external-callback external-callback
40              :general-entities general-entities
41              :parameter-entities parameter-entities :content-only content-only
42              :uri-to-package uri-to-package))
43
44 (defmethod parse-xml ((p stream) &key external-callback general-entities
45                                       parameter-entities content-only uri-to-package)
46   (declare (optimize (speed 3) (safety 1)))
47   (pxml-internal0 p nil external-callback general-entities parameter-entities content-only
48                   uri-to-package))
49
50 (eval-when (compile load eval)
51   (defconstant state-docstart 0) ;; looking for XMLdecl, Misc, doctypedecl, 1st element
52   (defconstant state-docstart-misc 1) ;; looking for Misc, doctypedecl, 1st element
53   (defconstant state-docstart-misc2 2) ;; looking for Misc, 1st element
54   (defconstant state-element-done 3) ;; looking for Misc
55   (defconstant state-element-contents 4) ;; looking for element content
56   )
57
58 (defun all-xml-whitespace-p (val)
59   (dotimes (i (length val) t)
60     (when (not (xml-space-p (elt val i))) (return nil))))
61
62 (defun pxml-internal0 (p read-sequence-func external-callback
63                       general-entities parameter-entities content-only uri-to-package)
64   (declare (optimize (speed 3) (safety 1)))
65   (let ((tokenbuf (make-iostruct :tokenbuf (get-tokenbuf)
66                                  :do-entity t
67                                  :read-sequence-func read-sequence-func)))
68     ;; set up stream right
69     (setf (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) p)
70     ;; set up user specified entities
71     (setf (iostruct-parameter-entities tokenbuf) parameter-entities)
72     (setf (iostruct-general-entities tokenbuf) general-entities)
73     (setf (iostruct-uri-to-package tokenbuf) uri-to-package)
74     ;; look for Unicode file
75     (unicode-check p tokenbuf)
76     (unwind-protect
77         (values (pxml-internal tokenbuf external-callback content-only)
78                 (iostruct-uri-to-package tokenbuf))
79       (dolist (entity-buf (iostruct-entity-bufs tokenbuf))
80         (when (streamp (tokenbuf-stream entity-buf))
81           (close (tokenbuf-stream entity-buf))
82           (put-back-tokenbuf entity-buf))))
83     ))
84
85 (defun pxml-internal (tokenbuf external-callback content-only)
86   (declare (optimize (speed 3) (safety 1)))
87   (let ((state state-docstart)
88         (guts)
89         (pending)
90         (attlist-data)
91         (public-string)
92         (system-string)
93         (entity-open-tags)
94         )
95
96     (loop
97       (multiple-value-bind (val kind kind2)
98           (next-token tokenbuf external-callback attlist-data)
99         (when *debug-xml*
100           (format t "val: ~s kind: ~s kind2: ~s state: ~s~%" val kind kind2 state))
101         (case state
102           (#.state-docstart
103            (if* (and (listp val) (eq :xml (first val)) (eq kind :xml) (eq kind2 :end-tag))
104               then
105                    (check-xmldecl val tokenbuf)
106                    (when (not content-only) (push val guts))
107                    (setf state state-docstart-misc)
108             elseif (eq kind :comment)
109               then
110                    (when (not content-only) (push val guts))
111                    (setf state state-docstart-misc)
112             elseif (and (listp val) (eq :DOCTYPE (first val)))
113               then
114                    (if* (eq (third val) :SYSTEM) then
115                            (setf system-string (fourth val))
116                            (setf val (remove (third val) val))
117                            (setf val (remove (third val) val))
118                     elseif (eq (third val) :PUBLIC) then
119                            (setf public-string (normalize-public-value (fourth val)))
120                            (setf system-string (fifth val))
121                            (setf val (remove (third val) val))
122                            (setf val (remove (third val) val))
123                            (setf val (remove (third val) val)))
124                    (when system-string
125                      (if* external-callback then
126                              (let ((ext-stream (apply external-callback
127                                                       (list (parse-uri system-string)
128                                                             :DOCTYPE
129                                                             public-string
130                                                             ))))
131                                (when ext-stream
132                                  (let (ext-io (entity-buf (get-tokenbuf)))
133                                    (setf (tokenbuf-stream entity-buf) ext-stream)
134                                    (setf ext-io (make-iostruct :tokenbuf entity-buf
135                                                                :do-entity
136                                                                (iostruct-do-entity tokenbuf)
137                                                                :read-sequence-func
138                                                                (iostruct-read-sequence-func tokenbuf)))
139                                    (unicode-check ext-stream ext-io)
140                                    (setf (iostruct-parameter-entities ext-io)
141                                      (iostruct-parameter-entities tokenbuf))
142                                    (setf (iostruct-general-entities ext-io)
143                                      (iostruct-general-entities tokenbuf))
144                                    (unwind-protect
145                                        (setf val (append val
146                                                          (list (append
147                                                                 (list :external)
148                                                                 (parse-dtd
149                                                                  ext-io
150                                                                  t external-callback)))))
151                                      (setf (iostruct-seen-any-dtd tokenbuf) t)
152                                      (setf (iostruct-seen-external-dtd tokenbuf) t)
153                                      (setf (iostruct-seen-parameter-reference tokenbuf)
154                                        (iostruct-seen-parameter-reference ext-io))
155                                      (setf (iostruct-general-entities tokenbuf)
156                                        (iostruct-general-entities ext-io))
157                                      (setf (iostruct-parameter-entities tokenbuf)
158                                        (iostruct-parameter-entities ext-io))
159                                      (setf (iostruct-do-entity tokenbuf)
160                                        (iostruct-do-entity ext-io))
161                                      (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
162                                        (when (streamp (tokenbuf-stream entity-buf2))
163                                          (close (tokenbuf-stream entity-buf2))
164                                          (put-back-tokenbuf entity-buf2)))
165                                      (close (tokenbuf-stream entity-buf))
166                                      (put-back-tokenbuf entity-buf))
167                                    )))
168                         else
169                              (setf (iostruct-do-entity tokenbuf) nil)))
170                    (setf attlist-data
171                      (process-attlist (rest (rest val)) attlist-data))
172                    (when (not content-only) (push val guts))
173                    (setf state state-docstart-misc2)
174             elseif (eq kind :pi)
175               then
176                    (push val guts)
177                    (setf state state-docstart-misc)
178             elseif (eq kind :pcdata)
179               then
180                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
181                      (if* (not kind2) then
182                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
183                         else
184                              (xml-error (concatenate 'string
185                                           "unrecognized content '"
186                                           (subseq val 0 (min (length val) 40)) "'"))))
187                    (setf state state-docstart-misc)
188             elseif (or (symbolp val)
189                        (and (listp val) (symbolp (first val))))
190               then
191                    (when (eq kind :start-tag)
192                      (setf val (add-default-values val attlist-data)))
193                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
194                       then (push (list val) guts)
195                            (setf state state-element-done)
196                     elseif (eq kind :start-tag)
197                       then (push (list val) pending)
198                            ;;(format t "pending: ~s guts: ~s <1>~%" pending guts)
199                            (when (iostruct-entity-bufs tokenbuf)
200                              (push (if (symbolp val) val (first val)) entity-open-tags))
201                            (setf state state-element-contents)
202                       else (xml-error (concatenate 'string
203                                                    "encountered token at illegal syntax position: '"
204                                                    (string kind) "'"
205                                                    (if* (null guts) then
206                                                            " at start of contents"
207                                                       else
208                                                            (concatenate 'string
209                                                              " following: '"
210                                                              (format nil "~s" (first guts))
211                                                              "'")))))
212               else
213                    (print (list val kind kind2))
214                    (break "need to check for other allowable docstarts")))
215           (#.state-docstart-misc2
216            (if* (eq kind :pcdata)
217               then
218                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
219                      (if* (not kind2) then
220                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
221                         else
222                              (xml-error (concatenate 'string
223                                           "unrecognized content '"
224                                           (subseq val 0 (min (length val) 40)) "'"))))
225             elseif (and (listp val) (eq :comment (first val)))
226               then
227                    (when (not content-only) (push val guts))
228             elseif (eq kind :pi)
229               then
230                    (push val guts)
231             elseif (eq kind :eof)
232               then
233                    (xml-error "unexpected end of file encountered")
234             elseif (or (symbolp val)
235                        (and (listp val) (symbolp (first val))))
236               then
237                    (when (eq kind :start-tag)
238                      (setf val (add-default-values val attlist-data)))
239                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
240                       then (push (list val) guts)
241                            (setf state state-element-done)
242                     elseif (eq kind :start-tag)
243                       then (push (list val) pending)
244                            ;;(format t "pending: ~s guts: ~s <2>~%" pending guts)
245                            (when (iostruct-entity-bufs tokenbuf)
246                              (push (if (symbolp val) val (first val)) entity-open-tags))
247                            (setf state state-element-contents)
248                       else (xml-error (concatenate 'string
249                                         "encountered token at illegal syntax position: '"
250                                         (string kind) "'"
251                                         (if* (null guts) then
252                                                 " at start of contents"
253                                            else
254                                                 (concatenate 'string
255                                                   " following: '"
256                                                   (format nil "~s" (first guts))
257                                                   "'")))))
258               else
259                    (error "this branch unexpected <1>")))
260           (#.state-docstart-misc
261            (if* (eq kind :pcdata)
262               then
263                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
264                      (if* (not kind2) then
265                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
266                         else
267                              (xml-error (concatenate 'string
268                                           "unrecognized content '"
269                                           (subseq val 0 (min (length val) 40)) "'"))))
270             elseif (and (listp val) (eq :DOCTYPE (first val)))
271               then
272                    (if* (eq (third val) :SYSTEM) then
273                            (setf system-string (fourth val))
274                            (setf val (remove (third val) val))
275                            (setf val (remove (third val) val))
276                     elseif (eq (third val) :PUBLIC) then
277                            (setf public-string (normalize-public-value (fourth val)))
278                            (setf system-string (fifth val))
279                            (setf val (remove (third val) val))
280                            (setf val (remove (third val) val))
281                            (setf val (remove (third val) val)))
282                    (when system-string
283                      (if* external-callback then
284                              (let ((ext-stream (apply external-callback
285                                                       (list (parse-uri system-string)
286                                                             :DOCTYPE
287                                                             public-string
288                                                             ))))
289                                (when ext-stream
290                                  (let (ext-io (entity-buf (get-tokenbuf)))
291                                    (setf (tokenbuf-stream entity-buf) ext-stream)
292                                    (setf ext-io (make-iostruct :tokenbuf entity-buf
293                                                                :do-entity
294                                                                (iostruct-do-entity tokenbuf)
295                                                                :read-sequence-func
296                                                                (iostruct-read-sequence-func tokenbuf)))
297                                    (unicode-check ext-stream ext-io)
298                                    (setf (iostruct-parameter-entities ext-io)
299                                      (iostruct-parameter-entities tokenbuf))
300                                    (setf (iostruct-general-entities ext-io)
301                                      (iostruct-general-entities tokenbuf))
302                                    (unwind-protect
303                                        (setf val (append val
304                                                          (list (append
305                                                                 (list :external)
306                                                                 (parse-dtd
307                                                                  ext-io
308                                                                  t external-callback)))))
309                                      (setf (iostruct-seen-any-dtd tokenbuf) t)
310                                      (setf (iostruct-seen-external-dtd tokenbuf) t)
311                                      (setf (iostruct-seen-parameter-reference tokenbuf)
312                                        (iostruct-seen-parameter-reference ext-io))
313                                      (setf (iostruct-general-entities tokenbuf)
314                                        (iostruct-general-entities ext-io))
315                                      (setf (iostruct-parameter-entities tokenbuf)
316                                        (iostruct-parameter-entities ext-io))
317                                      (setf (iostruct-do-entity tokenbuf)
318                                        (iostruct-do-entity ext-io))
319                                      (dolist (entity-buf2 (iostruct-entity-bufs ext-io))
320                                        (when (streamp (tokenbuf-stream entity-buf2))
321                                          (close (tokenbuf-stream entity-buf2))
322                                          (put-back-tokenbuf entity-buf2)))
323                                      (close (tokenbuf-stream entity-buf))
324                                      (put-back-tokenbuf entity-buf))
325                                    )))
326                         else
327                              (setf (iostruct-do-entity tokenbuf) nil)))
328                    (setf attlist-data
329                      (process-attlist (rest (rest val)) attlist-data))
330                    (when (not content-only) (push val guts))
331                    (setf state state-docstart-misc2)
332             elseif (and (listp val) (eq :comment (first val)))
333               then
334                    (when (not content-only) (push val guts))
335             elseif (eq kind :pi)
336               then
337                    (push val guts)
338             elseif (or (symbolp val)
339                        (and (listp val) (symbolp (first val))))
340               then
341                    (when (eq kind :start-tag)
342                      (setf val (add-default-values val attlist-data)))
343                    (if* (and (eq kind :start-tag) (eq kind2 :end-tag))
344                       then (push (list val) guts)
345                            (setf state state-element-done)
346                     elseif (eq kind :start-tag)
347                       then (push (list val) pending)
348                            ;;(format t "pending: ~s guts: ~s <3>~%" pending guts)
349                            (when (iostruct-entity-bufs tokenbuf)
350                              (push (if (symbolp val) val (first val)) entity-open-tags))
351                            (setf state state-element-contents)
352                       else (xml-error (concatenate 'string
353                                         "encountered token at illegal syntax position: '"
354                                         (string kind) "'"
355                                         (concatenate 'string
356                                           " following: '"
357                                           (format nil "~s" (first guts))
358                                           "'"))))
359               else
360                    (print (list val kind kind2))
361                    (break "check for other docstart-misc states")))
362           (#.state-element-contents
363            (if* (or (symbolp val)
364                     (and (listp val) (symbolp (first val))))
365               then
366                    (when (eq kind :start-tag)
367                      (setf val (add-default-values val attlist-data)))
368                    (if* (eq kind :end-tag)
369                       then (let ((candidate (first (first pending))))
370                              (when (listp candidate) (setf candidate (first candidate)))
371                              (if* (eq candidate val)
372                                 then
373                                      (if* (iostruct-entity-bufs tokenbuf) then
374                                              (when (not (eq (first entity-open-tags) val))
375                                                (xml-error
376                                                 (concatenate 'string
377                                                   (string val)
378                                                   " element closed in entity that did not open it")))
379                                              (setf entity-open-tags (rest entity-open-tags))
380                                         else
381                                              (when (eq (first entity-open-tags) val)
382                                                (xml-error
383                                                 (concatenate 'string
384                                                   (string val)
385                                                   " element closed outside of entity that did not open it")))
386                                              )
387                                      (if* (= (length pending) 1)
388                                         then
389                                              (push (first pending) guts)
390                                              (setf state state-element-done)
391                                         else
392                                              (setf (second pending)
393                                                (append (second pending) (list (first pending)))))
394                                      (setf pending (rest pending))
395                                      ;;(format t "pending: ~s guts: ~s <4>~%" pending guts)
396                                 else (xml-error (format nil
397                                                         "encountered end tag: ~s expected: ~s"
398                                                         val candidate))))
399                     elseif (and (eq kind :start-tag) (eq kind2 :end-tag))
400                       then
401                            (setf (first pending)
402                              (append (first pending) (list (list val))))
403                            ;;(format t "pending: ~s guts: ~s <5>~%" pending guts)
404                     elseif (eq kind :start-tag)
405                       then
406                            (push (list val) pending)
407                            ;;(format t "pending: ~s guts: ~s <6>~%" pending guts)
408                            (when (iostruct-entity-bufs tokenbuf)
409                              (push (if (symbolp val) val (first val)) entity-open-tags))
410                     elseif (eq kind :cdata) then
411                            (setf (first pending)
412                              (append (first pending) (rest val)))
413                            (let ((old (first pending))
414                                  (new))
415                              (dolist (item old)
416                                (if* (and (stringp (first new)) (stringp item)) then
417                                        (setf (first new)
418                                          (concatenate 'string (first new) item))
419                                   else (push item new)))
420                              (setf (first pending) (reverse new)))
421                     elseif (eq kind :comment) then
422                           (when (not content-only) (push val guts))
423                     elseif (eq kind :pi)
424                       then
425                            (setf (first pending)
426                              (append (first pending) (list val)))
427                     elseif (eq kind :eof)
428                       then
429                            (xml-error "unexpected end of file encountered")
430                       else (xml-error (format nil "unexpected token: ~s" val)))
431             elseif (eq kind :pcdata)
432               then
433                    (setf (first pending)
434                      (append (first pending) (list val)))
435                    (let ((old (first pending))
436                          (new))
437                      (dolist (item old)
438                        (if* (and (stringp (first new)) (stringp item)) then
439                                (setf (first new)
440                                  (concatenate 'string (first new) item))
441                           else (push item new)))
442                      (setf (first pending) (reverse new)))
443               else (xml-error (format nil "unexpected token: ~s" val))))
444           (#.state-element-done
445            (if* (eq kind :pcdata)
446               then
447                    (when (or (not kind2) (not (all-xml-whitespace-p val)))
448                      (if* (not kind2) then
449                              (xml-error "An entity reference occured where only whitespace or the first element may occur")
450                         else
451                              (xml-error (concatenate 'string
452                                           "unrecognized content '"
453                                           (subseq val 0 (min (length val) 40)) "'"))))
454             elseif (eq kind :eof) then
455                    (put-back-tokenbuf (iostruct-tokenbuf tokenbuf))
456                    (return (nreverse guts))
457             elseif (eq kind :comment) then
458                    (when (not content-only) (push val guts))
459             elseif (eq kind :pi)
460               then (push val guts)
461               else
462                    (xml-error (concatenate 'string
463                                 "encountered token at illegal syntax position: '"
464                                 (string kind) "'"
465                                 (concatenate 'string
466                                   " following: '"
467                                   (format nil "~s" (first guts))
468                                   "'")))
469                    ))
470           (t
471            (error "need to support state:~s token:~s  kind:~s kind2:~s <parse>" state val kind kind2)))
472         ))))
473
474 (eval-when (compile load eval)
475   (defconstant state-pcdata 0) ;;looking for < (tag start), & (reference); all else is string data
476   (defconstant state-readtagfirst 1) ;; seen < looking for /,?,!,name start
477   (defconstant state-readtag-? 2) ;; seen <? looking for space,char
478   (defconstant state-readtag-! 3) ;; seen <! looking for name,[,-
479   (defconstant state-readtag-end 4) ;; found </ looking for tag name
480   (defconstant state-readtag 5) ;; found < name start looking for more name, /, >
481   (defconstant state-findattributename 6) ;; found <?xml space looking for ?,>,space,name start
482   (defconstant state-readpi 7)
483   (defconstant state-noattributename 8)
484   (defconstant state-attribname 9) ;; found <?xml space name start looking for more name,=
485   (defconstant state-attribstartvalue 10) ;; found <?xml space name= looking for ',"
486   (defconstant state-attribvaluedelim 11)
487   (defconstant state-readtag-!-name 12) ;; seen <!name(start) looking for more name chars or space
488   (defconstant state-readtag-!-conditional 13) ;; found <![ looking for CDATA, INCLUDE, IGNORE
489   (defconstant state-readtag-!-comment 14)
490   (defconstant state-readtag-!-readcomment 15)
491   (defconstant state-readtag-!-readcomment2 16)
492   (defconstant state-readtag-end-bracket 17)
493   (defconstant state-readpi2 18) ;; found <?name space char looking for char,?
494   (defconstant state-prereadpi 19);; found <?name space looking for space,character
495   (defconstant state-pre-!-contents 20) ;; found <!name space looking for > or contents
496   (defconstant state-!-contents 21) ;; found <!name space name start looking for more name,>,[,space
497   (defconstant state-!-doctype 22) ;; found <!DOCTYPE space looking for space,>,[,name
498   (defconstant state-begin-dtd 23)
499   (defconstant state-!-doctype-ext 24) ;; found <!DOCTYPE space name space name start looking for name,space
500   (defconstant state-!-doctype-system 25) ;; found <!DOCTYPE name SYSTEM looking for ',"
501   (defconstant state-!-doctype-public 26) ;; found <!DOCTYPE name PUBLIC looking for ',"
502   (defconstant state-!-doctype-system2 27) ;; found <!DOCTYPE name SYSTEM " looking for chars,"
503   (defconstant state-!-doctype-system3 28) ;; found <!DOCTYPE name SYSTEM ' looking for chars,'
504   (defconstant state-!-doctype-ext2 29) ;; found <!DOCTYPE name SYSTEM/PUBLIC etc. looking for space,>,[
505   (defconstant state-!-doctype-ext3 30) ;; processed DTD looking for space,>
506   (defconstant state-!-doctype-public2 31) ;;  found <!DOCTYPE name PUBLIC " looking for text or "
507   (defconstant state-!-doctype-public3 32) ;;  found <!DOCTYPE name PUBLIC ' looking for text or '
508   (defconstant state-readtag2 33) ;; found <name looking for space,/,>,attrib name
509   (defconstant state-readtag3 34) ;; found <name/ or <name / looking for >
510   (defconstant state-readtag4 35) ;; found <name attrib-name start looking for more name,=
511   (defconstant state-readtag5 36) ;; found attrib= looking for ',"
512   (defconstant state-readtag6 37) ;; found attrib=['"] looking for end delimiter,value,reference
513   (defconstant state-readtag7 38) ;; found & inside attribute value, looking for # or name start
514   (defconstant state-readtag8 39) ;; found &# in attribute value looking for char code
515   (defconstant state-readtag9 40) ;; found &name start looking for more name,;
516   (defconstant state-readtag10 41) ;; found &#x in attribute value looking for hex code
517   (defconstant state-readtag11 42) ;; found &#[0-9] looking for more digits,;
518   (defconstant state-readtag-end2 43) ;; found </ & tag name start looking for more tag, space, >
519   (defconstant state-readtag-end3 44) ;; found </ end tag name space looking for >
520   (defconstant state-pcdata2 45) ;; seen & looking for name start
521   (defconstant state-pcdata3 46) ;; seen &# looking for character reference code
522   (defconstant state-pcdata4 47) ;; working on entity reference name looking for ;
523   (defconstant state-pcdata5 48) ;; working on hex character code reference
524   (defconstant state-pcdata6 49) ;; working on decimal character code reference
525   (defconstant state-findattributename0 50)
526   (defconstant state-readtag6a 51)
527   (defconstant state-readtag-!-conditional4 52)
528   (defconstant state-readtag-!-conditional5 53)
529   (defconstant state-readtag-!-conditional6 54)
530   (defconstant state-readtag-!-conditional7 55)
531   ;;(defconstant state-pcdata-parsed 56)
532   (defconstant state-pcdata7 57)
533   (defconstant state-pcdata8 58)
534   (defconstant state-readtag12 59)
535   (defconstant state-attribname2 60)
536   )
537
538 (defun next-token (tokenbuf external-callback attlist-data)
539   (declare (optimize (speed 3) (safety 1)))
540   ;; return two values:
541   ;;    the next token from the stream.
542   ;;    the kind of token
543   ;;
544   ;; if read-sequence-func is non-nil,
545   ;; read-sequence-func is called to fetch the next character
546   (macrolet ((add-to-entity-buf (entity-symbol p-value)
547                `(progn
548                   (push (make-tokenbuf :cur 0 :max (length p-value) :data p-value)
549                         (iostruct-entity-bufs tokenbuf))))
550
551              (un-next-char (ch)
552                `(push ,ch (iostruct-unget-char tokenbuf)))
553
554              (clear-coll (coll)
555                `(setf (collector-next ,coll) 0))
556
557              (add-to-coll (coll ch)
558                `(let ((.next. (collector-next ,coll)))
559                   (if* (>= .next. (collector-max ,coll))
560                      then (grow-and-add ,coll ,ch)
561                      else (setf (schar (collector-data ,coll) .next.)
562                             ,ch)
563                           (setf (collector-next ,coll) (1+ .next.)))))
564
565              (to-preferred-case (ch)
566                ;; should check the case mode
567                `(char-downcase ,ch))
568
569              )
570
571     (let ((state state-pcdata)
572           (coll  (get-collector))
573           (entity  (get-collector))
574           (tag-to-return)
575           (tag-to-return-string)
576           (attrib-name)
577           (empty-delim)
578           (value-delim)
579           (attrib-value)
580           (attribs-to-return)
581           (contents-to-return)
582           (char-code 0)
583           (special-tag-count 0)
584           (attrib-value-tokenbuf)
585           (last-ch)
586           (cdatap t)
587           (pcdatap t)
588           (entity-source)
589           (ns-token)
590           (ch))
591
592       (loop
593
594         (setq ch (get-next-char tokenbuf))
595         (when *debug-xml* (format t "ch: ~s code: ~x state:~s entity-names:~s~%"
596                                   ch (char-code ch) state (iostruct-entity-names tokenbuf)))
597         (if* (null ch)
598            then (return) ; eof -- exit loop
599                 )
600
601
602         (case state
603           (#.state-pcdata
604           (if* (eq ch #\<)
605              then
606                   (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
607                   (if* (> (collector-next coll) 0)
608                      then               ; have collected something, return this string
609                           (un-next-char ch) ; push back the <
610                           (return)
611                       else ; collect a tag
612                           (setq state state-readtagfirst))
613            elseif (eq #\& ch)
614              then (setf state state-pcdata2)
615                   (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
616                   (setf pcdatap nil)
617            elseif (eq #\] ch) then (setf state state-pcdata7)
618            elseif (not (xml-char-p ch)) then
619                   (xml-error (concatenate 'string
620                                "Illegal character: "
621                                (string ch)
622                                " detected in input"))
623              else
624                   (add-to-coll coll ch)
625                   #+ignore
626                   (if* (not (eq ch #\return))
627                      then (add-to-coll coll ch))))
628
629           (#.state-pcdata7
630            (if* (eq #\] ch) then (setf state state-pcdata8)
631               else (setf state state-pcdata)
632                    (add-to-coll coll #\]) (un-next-char ch)))
633
634           (#.state-pcdata8
635            (if* (eq #\> ch) then
636                    (add-to-coll coll #\])
637                    (add-to-coll coll #\])
638                    (add-to-coll coll #\>)
639                    (dotimes (i 15)
640                      (add-to-coll coll ch)
641                      (setq ch (get-next-char tokenbuf))
642                      (if* (null ch)
643                         then (return)))
644                    (xml-error (concatenate 'string
645                                 "content cannot contain ']]>':'"
646                                 (compute-coll-string coll)
647                                 "'"))
648             elseif (eq #\] ch) then
649                    (add-to-coll coll #\])
650               else (setf state state-pcdata)
651                    (add-to-coll coll #\]) (add-to-coll coll #\]) (un-next-char ch)))
652
653           (#.state-pcdata2
654            (if* (eq #\# ch)
655               then (setf state state-pcdata3)
656             elseif (xml-name-start-char-p ch)
657               then (setf state state-pcdata4)
658                    (un-next-char ch)
659               else (clear-coll coll)
660                    (dotimes (i 15)
661                      (add-to-coll coll ch)
662                      (setq ch (get-next-char tokenbuf))
663                      (if* (null ch)
664                         then (return)))
665                    (xml-error (concatenate 'string
666                                 "illegal reference name, starting at: '&"
667                                 (compute-coll-string coll)
668                                 "'"))
669                    ))
670
671           (#.state-pcdata3
672            (if* (eq #\x ch)
673               then (setf state state-pcdata5)
674             elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
675               then (setf state state-pcdata6)
676                    (un-next-char ch)
677               else (clear-coll coll)
678                    (dotimes (i 15)
679                      (add-to-coll coll ch)
680                      (setq ch (get-next-char tokenbuf))
681                      (if* (null ch)
682                         then (return)))
683                    (xml-error (concatenate 'string
684                                 "illegal character reference code, starting at: '&#"
685                                 (compute-coll-string coll)
686                                 "'"))
687                    ))
688
689           (#.state-pcdata4
690            (if* (xml-name-char-p ch)
691               then (add-to-coll entity ch)
692             elseif (eq #\; ch)
693               then (let ((entity-symbol (compute-tag entity)))
694                      (clear-coll entity)
695                      (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
696                              (xml-error
697                               (concatenate 'string
698                                 (string entity-symbol)
699                                 " reference cannot be constructed from entity reference/character data sequence"))
700                         else
701                              (setf entity-source nil))
702                      (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
703                       elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
704                       elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
705                       elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
706                       elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
707                         else
708                              (let (p-value)
709                                (if* (and (iostruct-do-entity tokenbuf)
710                                          (setf p-value
711                                            (assoc entity-symbol
712                                                   (iostruct-general-entities tokenbuf)))) then
713                                        (setf p-value (rest p-value))
714                                        (when (member entity-symbol (iostruct-entity-names tokenbuf))
715                                          (xml-error (concatenate 'string
716                                                       "entity:"
717                                                       (string entity-symbol)
718                                                       " in recursive reference")))
719                                        (push entity-symbol (iostruct-entity-names tokenbuf))
720                                        (if* (stringp p-value) then
721                                                (add-to-entity-buf entity-symbol p-value)
722                                         elseif (null external-callback) then
723                                                (setf (iostruct-do-entity tokenbuf) nil)
724                                         elseif p-value then
725                                                (let ((entity-stream (apply external-callback p-value)))
726                                                  (if* entity-stream then
727                                                          (let ((entity-buf (get-tokenbuf)))
728                                                            (setf (tokenbuf-stream entity-buf) entity-stream)
729                                                            (unicode-check entity-stream tokenbuf)
730                                                            (push entity-buf
731                                                                  (iostruct-entity-bufs tokenbuf))
732                                                            ;; check for possible external textdecl
733                                                            (let ((count 0) cch
734                                                                  (string "<?xml "))
735                                                              (if* (dotimes (i (length string) t)
736                                                                     (setf cch (get-next-char tokenbuf))
737                                                                     (when (and (= i 5)
738                                                                                (xml-space-p cch))
739                                                                       (setf cch #\space))
740                                                                     (when (not (eq cch
741                                                                                    (schar string count)))
742                                                                       (return nil))
743                                                                     (incf count)) then
744                                                                      (setf count 5)
745                                                                      (loop
746                                                                        (when (< count 0) (return))
747                                                                        (un-next-char (schar string count))
748                                                                        (decf count))
749                                                                      ;; swallow <?xml token
750                                                                      (swallow-xml-token
751                                                                       tokenbuf
752                                                                       external-callback)
753                                                                 else
754                                                                      (un-next-char cch)
755                                                                      (decf count)
756                                                                      (loop
757                                                                        (when (< count 0) (return))
758                                                                        (un-next-char (schar string count))
759                                                                        (decf count))))
760                                                            )
761                                                     else
762                                                          (xml-error (concatenate 'string
763                                                                       "Reference to unparsed entity "
764                                                                       (string entity-symbol)))
765                                                          ))
766                                                )
767                                 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
768                                            (iostruct-standalonep tokenbuf)
769                                            (and (iostruct-seen-any-dtd tokenbuf)
770                                                 (not (iostruct-seen-external-dtd tokenbuf))
771                                                 (not (iostruct-seen-parameter-reference tokenbuf))))
772                                   then
773                                        (xml-error (concatenate 'string
774                                                     (string entity-symbol)
775                                                     " must have entity declaration before being referenced"))
776                                        ))
777                              ))
778                    (setq state state-pcdata)
779               else (let ((tmp (compute-coll-string entity)))
780                      (clear-coll coll)
781                      (dotimes (i 15)
782                        (add-to-coll coll ch)
783                        (setq ch (get-next-char tokenbuf))
784                        (if* (null ch)
785                           then (return)))
786                      (xml-error (concatenate 'string
787                                   "reference not terminated by ';', starting at: '&"
788                                   tmp
789                                   (compute-coll-string coll)
790                                   "'")))
791                    ))
792
793           (#.state-pcdata5
794            (let ((code (char-code ch)))
795              (if* (eq #\; ch)
796                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
797                              (xml-error
798                               (concatenate 'string
799                                 (string (code-char char-code))
800                                 " reference cannot be constructed from entity reference/character data sequence"))
801                         else
802                              (setf entity-source nil))
803                      (when (not (xml-char-p (code-char char-code)))
804                            (xml-error
805                             (concatenate 'string
806                               "Character reference: "
807                               (format nil "~s" char-code)
808                               " (decimal) is not valid XML input character")))
809                      (add-to-coll coll (code-char char-code))
810                      (setf char-code 0)
811                      (setq state state-pcdata)
812               elseif (<= (char-code #\0) code (char-code #\9))
813                 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
814               elseif (<= (char-code #\A) code (char-code #\F))
815                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
816               elseif (<= (char-code #\a) code (char-code #\f))
817                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
818                 else (clear-coll coll)
819                      (dotimes (i 15)
820                        (add-to-coll coll ch)
821                        (setq ch (get-next-char tokenbuf))
822                        (if* (null ch)
823                           then (return)))
824                      (xml-error (concatenate 'string
825                                   "illegal hexidecimal character reference code, starting at: '"
826                                   (compute-coll-string coll)
827                                   "', calculated char code: "
828                                   (format nil "~s" char-code)))
829                      )))
830
831           (#.state-pcdata6
832            (let ((code (char-code ch)))
833              (if* (eq #\; ch)
834                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
835                              (xml-error
836                               (concatenate 'string
837                                 (string (code-char char-code))
838                                 " reference cannot be constructed from entity reference/character data sequence"))
839                         else
840                              (setf entity-source nil))
841                      (when (not (xml-char-p (code-char char-code)))
842                            (xml-error
843                             (concatenate 'string
844                               "Character reference: "
845                               (format nil "~s" char-code)
846                               " (decimal) is not valid XML input character")))
847                      (add-to-coll coll (code-char char-code))
848                      (setf char-code 0)
849                      (setq state state-pcdata)
850               elseif (<= (char-code #\0) code (char-code #\9))
851                 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
852                 else (clear-coll coll)
853                      (dotimes (i 15)
854                        (add-to-coll coll ch)
855                        (setq ch (get-next-char tokenbuf))
856                        (if* (null ch)
857                           then (return)))
858                      (xml-error (concatenate 'string
859                                   "illegal decimal character reference code, starting at: '"
860                                   (compute-coll-string coll)
861                                   "', calculated char code: "
862                                   (format nil "~s" char-code)))
863                      )))
864
865           (#.state-readtag-end
866            (if* (xml-name-start-char-p ch)
867               then (setf state state-readtag-end2)
868                    (un-next-char ch)
869               else (clear-coll coll)
870                    (dotimes (i 15)
871                      (add-to-coll coll ch)
872                      (setq ch (get-next-char tokenbuf))
873                      (if* (null ch)
874                         then (return)))
875                    (xml-error (concatenate 'string
876                                 "illegal end tag name, starting at: '</"
877                                 (compute-coll-string coll)
878                                 "'"))
879                    ))
880
881           (#.state-readtag-end2
882            (if* (xml-name-char-p ch)
883               then (add-to-coll coll ch)
884             elseif (eq #\> ch) then
885                    (let ((tag-string (compute-coll-string coll)))
886                      (when (and (iostruct-ns-scope tokenbuf)
887                                 (string= tag-string
888                                     (first (first (iostruct-ns-scope tokenbuf)))))
889                        (dolist (item (second (first (iostruct-ns-scope tokenbuf))))
890                          (setf (iostruct-ns-to-package tokenbuf)
891                            (remove (assoc item (iostruct-ns-to-package tokenbuf))
892                                    (iostruct-ns-to-package tokenbuf))))
893                        (setf (iostruct-ns-scope tokenbuf)
894                          (rest (iostruct-ns-scope tokenbuf)))))
895                    (setq tag-to-return (compute-tag coll *package*
896                                                     (iostruct-ns-to-package tokenbuf)))
897                    (return)
898             elseif (xml-space-p ch) then (setf state state-readtag-end3)
899                    (let ((tag-string (compute-coll-string coll)))
900                      (when (and (iostruct-ns-scope tokenbuf)
901                                 (string= tag-string
902                                     (first (first (iostruct-ns-scope tokenbuf)))))
903                        (setf (iostruct-ns-scope tokenbuf)
904                          (rest (iostruct-ns-scope tokenbuf)))))
905                    (setq tag-to-return (compute-tag coll *package*
906                                                     (iostruct-ns-to-package tokenbuf)))
907               else (let ((tmp (compute-coll-string coll)))
908                      (clear-coll coll)
909                      (dotimes (i 15)
910                        (add-to-coll coll ch)
911                        (setq ch (get-next-char tokenbuf))
912                        (if* (null ch)
913                           then (return)))
914                      (xml-error (concatenate 'string
915                                   "illegal end tag name, starting at: '</"
916                                   tmp
917                                   (compute-coll-string coll)
918                                   "'")))
919                    ))
920
921           (#.state-readtag-end3
922            (if* (xml-space-p ch) then nil
923             elseif (eq #\> ch) then (return)
924               else (let ((tmp (compute-coll-string coll)))
925                      (clear-coll coll)
926                      (dotimes (i 15)
927                        (add-to-coll coll ch)
928                        (setq ch (get-next-char tokenbuf))
929                        (if* (null ch)
930                           then (return)))
931                      (xml-error (concatenate 'string
932                                   "illegal end tag name, starting at: '"
933                                   (compute-coll-string coll)
934                                   "' end tag name: " tmp )))
935                    ))
936
937           (#.state-readtagfirst
938            ; starting to read a tag name
939            (if* (eq #\/ ch)
940               then (setf state state-readtag-end)
941             elseif (eq #\? ch)
942               then (setf state state-readtag-?)
943                    (setf empty-delim #\?)
944             elseif (eq #\! ch)
945               then (setf state state-readtag-!)
946                    (setf empty-delim nil)
947             elseif (xml-name-start-char-p ch)
948               then (setf state state-readtag)
949                    (setf empty-delim #\/)
950                    (un-next-char ch)
951               else (clear-coll coll)
952                    (dotimes (i 15)
953                      (add-to-coll coll ch)
954                      (setq ch (get-next-char tokenbuf))
955                      (if* (null ch)
956                         then (return)))
957                    (xml-error (concatenate 'string
958                                 "illegal character following '<', starting at '"
959                                 (compute-coll-string coll)
960                                 "'"))
961                    ))
962
963           (#.state-readtag-!
964            (if* (xml-name-start-char-p ch)
965               then
966                    (setf state state-readtag-!-name)
967                    (un-next-char ch)
968             elseif (eq #\[ ch)
969               then
970                    (setf state state-readtag-!-conditional)
971             elseif (eq #\- ch)
972               then
973                    (setf state state-readtag-!-comment)
974               else
975                    (clear-coll coll)
976                    (dotimes (i 15)
977                      (add-to-coll coll ch)
978                      (setq ch (get-next-char tokenbuf))
979                      (if* (null ch)
980                         then (return)))
981                    (xml-error (concatenate 'string
982                                 "illegal character following '<!', starting at '<!"
983                                 (compute-coll-string coll)
984                                 "'"))
985                    ))
986
987           (#.state-readtag-!-conditional
988            (if* (eq #\C ch) then
989                    (setf state state-readtag-!-conditional4)
990                    (setf special-tag-count 1)
991               else (clear-coll coll)
992                    (dotimes (i 15)
993                      (add-to-coll coll ch)
994                      (setq ch (get-next-char tokenbuf))
995                      (if* (null ch)
996                         then (return)))
997                    (xml-error (concatenate 'string
998                                 "illegal character following '<![', starting at '<!["
999                                 (compute-coll-string coll)
1000                                 "'"))
1001                    ))
1002
1003           (#.state-readtag-!-conditional4
1004            (if* (not (eq (elt "CDATA[" special-tag-count) ch))
1005               then (clear-coll coll)
1006                    (dotimes (i 15)
1007                      (add-to-coll coll ch)
1008                      (setq ch (get-next-char tokenbuf))
1009                      (if* (null ch)
1010                         then (return)))
1011                    (xml-error (concatenate 'string
1012                                 "illegal token following '<![', starting at '<!["
1013                                 (subseq "CDATA[" 0 special-tag-count)
1014                                 (compute-coll-string coll)
1015                                 "'"))
1016             elseif (eq #\[ ch) then (setf state state-readtag-!-conditional5)
1017               else (incf special-tag-count)))
1018
1019           (#.state-readtag-!-conditional5
1020            (if* (eq #\] ch)
1021               then (setf state state-readtag-!-conditional6)
1022             elseif (not (xml-char-p ch)) then
1023                   (xml-error (concatenate 'string
1024                                "Illegal character: "
1025                                (string ch)
1026                                " detected in CDATA input"))
1027               else (add-to-coll coll ch)))
1028
1029           (#.state-readtag-!-conditional6
1030            (if* (eq #\] ch)
1031               then (setf state state-readtag-!-conditional7)
1032               else (setf state state-readtag-!-conditional5)
1033                    (add-to-coll coll #\])
1034                    (add-to-coll coll ch)))
1035
1036           (#.state-readtag-!-conditional7
1037            (if* (eq #\> ch)
1038               then
1039                    (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1040                            (xml-error
1041                             "CDATA cannot be constructed from entity reference/character data sequence")
1042                       else
1043                              (setf entity-source nil))
1044                    (return)
1045             elseif (eq #\] ch) then
1046                    (add-to-coll coll #\]) ;; come back here to check again
1047               else (setf state state-readtag-!-conditional5)
1048                    (add-to-coll coll #\])
1049                    (add-to-coll coll #\])
1050                    (add-to-coll coll ch)))
1051
1052           (#.state-readtag-!-comment
1053            (if* (eq #\- ch)
1054               then (setf state state-readtag-!-readcomment)
1055                    (setf tag-to-return :comment)
1056               else (clear-coll coll)
1057                    (dotimes (i 15)
1058                      (add-to-coll coll ch)
1059                      (setq ch (get-next-char tokenbuf))
1060                      (if* (null ch)
1061                         then (return)))
1062                    (xml-error (concatenate 'string
1063                                 "illegal token following '<![-', starting at '<!-"
1064                                 (compute-coll-string coll)
1065                                 "'"))
1066                    ))
1067
1068           (#.state-readtag-!-readcomment
1069            (if* (eq #\- ch)
1070               then (setf state state-readtag-!-readcomment2)
1071             elseif (not (xml-char-p ch)) then
1072                    (xml-error (concatenate 'string
1073                                 "Illegal character: "
1074                                 (string ch)
1075                                 " detected in input"))
1076               else (add-to-coll coll ch)))
1077
1078           (#.state-readtag-!-readcomment2
1079            (if* (eq #\- ch)
1080               then (setf state state-readtag-end-bracket)
1081               else (setf state state-readtag-!-readcomment)
1082                    (add-to-coll coll #\-) (add-to-coll coll ch)))
1083
1084           (#.state-readtag-end-bracket
1085            (if* (eq #\> ch)
1086               then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1087                            (xml-error
1088                             (concatenate 'string
1089                               (string tag-to-return)
1090                             " tag cannot be constructed from entity reference/character data sequence"))
1091                       else
1092                              (setf entity-source nil))
1093                    (return)
1094               else  (clear-coll coll)
1095                    (dotimes (i 15)
1096                      (add-to-coll coll ch)
1097                      (setq ch (get-next-char tokenbuf))
1098                      (if* (null ch)
1099                         then (return)))
1100                    (xml-error (concatenate 'string
1101                                 "illegal token following '--' comment terminator, starting at '--"
1102                                 (compute-coll-string coll)
1103                                 "'"))
1104                    ))
1105
1106           (#.state-readtag
1107            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1108               then
1109                    (add-to-coll coll ch)
1110               else
1111                    (if* (xml-space-p ch) then
1112                            (setf tag-to-return-string (compute-coll-string coll))
1113                            (setq tag-to-return
1114                              (compute-tag coll *package*
1115                                           (iostruct-ns-to-package tokenbuf)))
1116                            (clear-coll coll)
1117                            (setf state state-readtag2)
1118                     elseif (eq #\> ch) then
1119                            (setq tag-to-return
1120                              (compute-tag coll *package*
1121                                           (iostruct-ns-to-package tokenbuf)))
1122                            (clear-coll coll)
1123                            (return)
1124                     elseif (eq #\/ ch) then
1125                            (setq tag-to-return
1126                              (compute-tag coll *package*
1127                                           (iostruct-ns-to-package tokenbuf)))
1128                            (clear-coll coll)
1129                            (setf state state-readtag3)
1130                       else (dotimes (i 15)
1131                              (add-to-coll coll ch)
1132                              (setq ch (get-next-char tokenbuf))
1133                              (if* (null ch)
1134                                 then (return)))
1135                            (xml-error
1136                             (concatenate 'string
1137                               "illegal token name, starting at '"
1138                               (compute-coll-string coll)
1139                               "'"))
1140                            )))
1141
1142           (#.state-readtag2
1143            (if* (xml-space-p ch) then nil
1144             elseif (eq #\> ch) then (return)
1145             elseif (eq #\/ ch) then (setf state state-readtag3)
1146             elseif (xml-name-start-char-p ch) then
1147                    (un-next-char ch)
1148                    (setf state state-readtag4)
1149               else (clear-coll coll)
1150                    (dotimes (i 15)
1151                      (add-to-coll coll ch)
1152                      (setq ch (get-next-char tokenbuf))
1153                      (if* (null ch)
1154                         then (return)))
1155                    (xml-error
1156                     (concatenate 'string
1157                       "illegal token, starting at '"
1158                       (compute-coll-string coll)
1159                       "' following element token start: " (string tag-to-return)))
1160                    ))
1161
1162           (#.state-readtag4
1163            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1164               then
1165                    (add-to-coll coll ch)
1166             elseif (eq #\= ch) then
1167                    (setq attrib-name (compute-tag coll *package*
1168                                                   (iostruct-ns-to-package tokenbuf)))
1169                    (clear-coll coll)
1170                    (let ((name (symbol-name attrib-name)))
1171                      (when (and (>= (length name) 5)
1172                                 (string= name "xmlns" :end1 5))
1173                        (if* (= (length name) 5)
1174                           then
1175                                (setf ns-token :none)
1176                         elseif (eq (schar name 5) #\:)
1177                           then
1178                                (setf ns-token (subseq name 6)))))
1179                    (setf state state-readtag5)
1180             elseif (xml-space-p ch) then
1181                    (setq attrib-name (compute-tag coll *package*
1182                                                   (iostruct-ns-to-package tokenbuf)))
1183                    (clear-coll coll)
1184                    (let ((name (symbol-name attrib-name)))
1185                      (when (and (>= (length name) 5)
1186                                 (string= name "xmlns" :end1 5))
1187                        (if* (= (length name) 5)
1188                           then
1189                                (setf ns-token :none)
1190                           else
1191                                (setf ns-token (subseq name 6)))))
1192                    (setf state state-readtag12)
1193               else (let ((tmp (compute-coll-string coll)))
1194                      (clear-coll coll)
1195                      (dotimes (i 15)
1196                        (add-to-coll coll ch)
1197                        (setq ch (get-next-char tokenbuf))
1198                        (if* (null ch)
1199                           then (return)))
1200                      (xml-error
1201                       (concatenate 'string
1202                         "looking for attribute '=', found: '"
1203                       (compute-coll-string coll)
1204                       "' following attribute name: " tmp)))
1205                    ))
1206
1207           (#.state-readtag12
1208            (if* (xml-space-p ch) then nil
1209             elseif (eq #\= ch) then (setf state state-readtag5)
1210               else
1211                  (dotimes (i 15)
1212                    (add-to-coll coll ch)
1213                    (setq ch (get-next-char tokenbuf))
1214                    (if* (null ch)
1215                       then (return)))
1216                    (xml-error
1217                     (concatenate 'string
1218                       "looking for attribute '=', found: '"
1219                       (compute-coll-string coll)
1220                       "' following attribute name: " (string attrib-name)))))
1221
1222           (#.state-readtag5
1223            ;; begin to collect attribute value
1224            (if* (or (eq ch #\")
1225                     (eq ch #\'))
1226               then (setq value-delim ch)
1227                    (let* ((tag-defaults (assoc tag-to-return attlist-data))
1228                           (this-attrib (assoc attrib-name tag-defaults)))
1229                      (when (and (second this-attrib) (not (eq (second this-attrib) :CDATA)))
1230                        (setf cdatap nil))
1231                      )
1232                    (setq state state-readtag6)
1233             elseif (xml-space-p ch) then nil
1234               else
1235                    (dotimes (i 15)
1236                      (add-to-coll coll ch)
1237                      (setq ch (get-next-char tokenbuf))
1238                      (if* (null ch)
1239                         then (return)))
1240                    (xml-error
1241                     (concatenate 'string
1242                       "attribute value not delimited by ' or \" : '"
1243                       (compute-coll-string coll)
1244                       "' following attribute: " (string attrib-name)))
1245                    ))
1246
1247           (#.state-readtag6
1248            (let ((from-entity (and attrib-value-tokenbuf
1249                                    (eq attrib-value-tokenbuf
1250                                        (first (iostruct-entity-bufs tokenbuf))))))
1251              (when (not from-entity) (setf attrib-value-tokenbuf nil))
1252              (if* from-entity then
1253                      (if* (eq #\newline ch) then (setf ch #\space)
1254                       elseif (eq #\return ch) then (setf ch #\space)
1255                       elseif (eq #\tab ch) then (setf ch #\space)
1256                              ))
1257              (if* (and (not from-entity) (eq ch value-delim))
1258                 then (setq attrib-value (compute-coll-string coll))
1259                      (when (not cdatap)
1260                        (setf attrib-value (normalize-attrib-value attrib-value)))
1261                      (clear-coll coll)
1262                      (push attrib-name attribs-to-return)
1263                      (push attrib-value attribs-to-return)
1264                      (when ns-token
1265                        (let ((package (assoc (parse-uri attrib-value)
1266                                              (iostruct-uri-to-package tokenbuf)
1267                                              :test 'uri=)))
1268                          (if* package then (setf package (rest package))
1269                             else
1270                                  (setf package
1271                                    (let ((i 0) new-package)
1272                                      (loop
1273                                        (let* ((candidate (concatenate 'string
1274                                                            "net.xml.namespace."
1275                                                            (format nil "~s" i)))
1276                                               (exists (find-package candidate)))
1277                                          (if* exists
1278                                             then (incf i)
1279                                             else (setf new-package (make-package candidate))
1280                                                  (setf (iostruct-uri-to-package tokenbuf)
1281                                                    (acons (parse-uri attrib-value) new-package
1282                                                           (iostruct-uri-to-package tokenbuf)))
1283                                                  (return new-package)))))))
1284                          (setf (iostruct-ns-to-package tokenbuf)
1285                            (acons ns-token package (iostruct-ns-to-package tokenbuf)))
1286                          )
1287                        (if* (and (first (iostruct-ns-scope tokenbuf))
1288                                  (string= (first (first (iostruct-ns-scope tokenbuf)))
1289                                      tag-to-return-string))
1290                           then
1291                                (push ns-token (second (first (iostruct-ns-scope tokenbuf))))
1292                           else
1293                                (push (list tag-to-return-string (list ns-token))
1294                                      (iostruct-ns-scope tokenbuf)))
1295                        (setf ns-token nil))
1296                      (setq state state-readtag6a)
1297               elseif (eq #\newline ch) then
1298                      (when (not (eq #\return last-ch)) (add-to-coll coll #\space))
1299               elseif (or (eq #\tab ch) (eq #\return ch)) then
1300                      (add-to-coll coll #\space)
1301               elseif (eq #\& ch)
1302                  then (setq state state-readtag7)
1303                       (setf entity-source (first (iostruct-entity-bufs tokenbuf)))
1304               elseif (and (xml-char-p ch) (not (eq #\< ch)))
1305                 then (add-to-coll coll ch)
1306                 else
1307                      (dotimes (i 15)
1308                        (add-to-coll coll ch)
1309                        (setq ch (get-next-char tokenbuf))
1310                        (if* (null ch)
1311                           then (return)))
1312                      (xml-error
1313                       (concatenate 'string
1314                         "attribute value cannot contain '<': '"
1315                         (compute-coll-string coll)
1316                         "' following attribute: " (string attrib-name)))
1317                      )
1318              (setf last-ch ch)))
1319
1320           (#.state-readtag6a
1321            (if* (xml-space-p ch) then (setf state state-readtag2)
1322             elseif (eq #\> ch) then (setf state state-readtag2)
1323                    (return)
1324             elseif (eq #\/ ch) then (setf state state-readtag3)
1325             else (clear-coll coll)
1326                    (dotimes (i 15)
1327                      (add-to-coll coll ch)
1328                      (setq ch (get-next-char tokenbuf))
1329                      (if* (null ch)
1330                         then (return)))
1331                    (xml-error
1332                     (concatenate 'string
1333                       "illegal token, starting at '"
1334                       (compute-coll-string coll)
1335                       "' following element token start: " (string tag-to-return)))
1336                    ))
1337
1338           (#.state-readtag7
1339            (if* (eq #\# ch)
1340               then (setf state state-readtag8)
1341             elseif (xml-name-start-char-p ch)
1342               then (setf state state-readtag9)
1343                    (un-next-char ch)
1344               else (clear-coll coll)
1345                    (dotimes (i 15)
1346                      (add-to-coll coll ch)
1347                      (setq ch (get-next-char tokenbuf))
1348                      (if* (null ch)
1349                         then (return)))
1350                    (xml-error
1351                     (concatenate 'string
1352                       "attribute value contains illegal reference name: '&"
1353                       (compute-coll-string coll)
1354                       "' in attribute value for: " (string attrib-name)))
1355                    ))
1356
1357           (#.state-readtag8
1358            (if* (eq #\x ch)
1359               then (setf state state-readtag10)
1360             elseif (<= (char-code #\0) (char-code ch) (char-code #\9))
1361               then (setf state state-readtag11)
1362                    (un-next-char ch)
1363               else (clear-coll coll)
1364                    (dotimes (i 15)
1365                      (add-to-coll coll ch)
1366                      (setq ch (get-next-char tokenbuf))
1367                      (if* (null ch)
1368                         then (return)))
1369                    (xml-error
1370                     (concatenate 'string
1371                       "attribute value contains illegal character reference code: '"
1372                       (compute-coll-string coll)
1373                       "' in attribute value for: " (string attrib-name)))
1374                    ))
1375
1376           (#.state-readtag10
1377            (let ((code (char-code ch)))
1378              (if* (eq #\; ch)
1379                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1380                              (xml-error
1381                               (concatenate 'string
1382                                 (string (code-char char-code))
1383                                 " reference cannot be constructed from entity reference/character data sequence"))
1384                         else
1385                              (setf entity-source nil))
1386                      (add-to-coll coll (code-char char-code))
1387                      (setf char-code 0)
1388                      (setq state state-readtag6)
1389               elseif (<= (char-code #\0) code (char-code #\9))
1390                 then (setf char-code (+ (* char-code 16) (- code (char-code #\0))))
1391               elseif (<= (char-code #\A) code (char-code #\F))
1392                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\A))))
1393               elseif (<= (char-code #\a) code (char-code #\f))
1394                 then (setf char-code (+ 10 (* char-code 16) (- code (char-code #\a))))
1395                 else (clear-coll coll)
1396                      (dotimes (i 15)
1397                        (add-to-coll coll ch)
1398                        (setq ch (get-next-char tokenbuf))
1399                        (if* (null ch)
1400                           then (return)))
1401                      (xml-error
1402                       (concatenate 'string
1403                         "attribute value contains illegal hexidecimal character reference code: '"
1404                         (compute-coll-string coll)
1405                         "' in attribute value for: " (string attrib-name)))
1406                      )))
1407
1408           (#.state-readtag11
1409            (let ((code (char-code ch)))
1410              (if* (eq #\; ch)
1411                 then (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1412                              (xml-error
1413                               (concatenate 'string
1414                                 (string (code-char char-code))
1415                                 " reference cannot be constructed from entity reference/character data sequence"))
1416                         else
1417                              (setf entity-source nil))
1418                      (add-to-coll coll (code-char char-code))
1419                      (setf char-code 0)
1420                      (setq state state-readtag6)
1421               elseif (<= (char-code #\0) code (char-code #\9))
1422                 then (setf char-code (+ (* char-code 10) (- code (char-code #\0))))
1423                 else (clear-coll coll)
1424                      (dotimes (i 15)
1425                        (add-to-coll coll ch)
1426                        (setq ch (get-next-char tokenbuf))
1427                        (if* (null ch)
1428                           then (return)))
1429                      (xml-error
1430                       (concatenate 'string
1431                         "attribute value contains illegal decimal character reference code: '"
1432                         (compute-coll-string coll)
1433                         "' in attribute value for: " (string attrib-name)))
1434                      )))
1435
1436           (#.state-readtag9
1437            (if* (xml-name-char-p ch)
1438               then (add-to-coll entity ch)
1439             elseif (eq #\; ch)
1440               then (let ((entity-symbol (compute-tag entity)))
1441                      (clear-coll entity)
1442                      (if* (not (eq entity-source (first (iostruct-entity-bufs tokenbuf)))) then
1443                              (xml-error
1444                               (concatenate 'string
1445                                 (string entity-symbol)
1446                                 " reference cannot be constructed from entity reference/character data sequence"))
1447                         else
1448                              (setf entity-source nil))
1449                      (if* (string= (symbol-name entity-symbol) "amp") then (add-to-coll coll #\&)
1450                       elseif (string= (symbol-name entity-symbol) "lt") then (add-to-coll coll #\<)
1451                       elseif (string= (symbol-name entity-symbol) "gt") then (add-to-coll coll #\>)
1452                       elseif (string= (symbol-name entity-symbol) "apos") then (add-to-coll coll #\')
1453                       elseif (string= (symbol-name entity-symbol) "quot") then (add-to-coll coll #\")
1454                         else (let (p-value)
1455                                (if* (and (iostruct-do-entity tokenbuf)
1456                                          (setf p-value
1457                                            (assoc entity-symbol
1458                                                   (iostruct-general-entities tokenbuf)))) then
1459                                        (setf p-value (rest p-value))
1460                                        (when (member entity-symbol (iostruct-entity-names tokenbuf))
1461                                          (xml-error (concatenate 'string
1462                                                       "entity:"
1463                                                       (string entity-symbol)
1464                                                       " in recursive reference")))
1465                                        (push entity-symbol (iostruct-entity-names tokenbuf))
1466                                        (if* (stringp p-value) then
1467                                                (add-to-entity-buf entity-symbol p-value)
1468                                                (when (not attrib-value-tokenbuf)
1469                                                  (setf attrib-value-tokenbuf
1470                                                    (first (iostruct-entity-bufs tokenbuf))))
1471                                         elseif (null external-callback) then
1472                                                (setf (iostruct-do-entity tokenbuf) nil)
1473                                         elseif p-value then
1474                                                (let ((entity-stream (apply external-callback p-value)))
1475                                                  (if* entity-stream then
1476                                                          (let ((entity-buf (get-tokenbuf)))
1477                                                            (setf (tokenbuf-stream entity-buf) entity-stream)
1478                                                            (unicode-check entity-stream tokenbuf)
1479                                                            (push entity-buf
1480                                                                  (iostruct-entity-bufs tokenbuf))
1481                                                            ;; check for possible external textdecl
1482                                                            (let ((count 0) cch
1483                                                                  (string "<?xml "))
1484                                                              (if* (dotimes (i (length string) t)
1485                                                                     (setf cch (get-next-char tokenbuf))
1486                                                                     (when (and (= i 5)
1487                                                                                (xml-space-p cch))
1488                                                                       (setf cch #\space))
1489                                                                     (when (not (eq cch
1490                                                                                    (schar string count)))
1491                                                                       (return nil))
1492                                                                     (incf count)) then
1493                                                                      (setf count 5)
1494                                                                      (loop
1495                                                                        (when (< count 0) (return))
1496                                                                        (un-next-char (schar string count))
1497                                                                        (decf count))
1498                                                                      ;; swallow <?xml token
1499                                                                      (swallow-xml-token
1500                                                                       tokenbuf
1501                                                                       external-callback)
1502                                                                 else
1503                                                                      (un-next-char cch)
1504                                                                      (decf count)
1505                                                                      (loop
1506                                                                        (when (< count 0) (return))
1507                                                                        (un-next-char (schar string count))
1508                                                                        (decf count))))
1509                                                            )
1510                                                     else
1511                                                          (xml-error (concatenate 'string
1512                                                                       "Reference to unparsed entity "
1513                                                                       (string entity-symbol)))
1514                                                          ))
1515                                                )
1516                                 elseif (or (not (iostruct-seen-any-dtd tokenbuf))
1517                                            (and (iostruct-seen-any-dtd tokenbuf)
1518                                                 (not (iostruct-seen-external-dtd tokenbuf))
1519                                                 (not (iostruct-seen-parameter-reference tokenbuf))))
1520                                   then
1521                                        (xml-error (concatenate 'string
1522                                                     (string entity-symbol)
1523                                                     " must have entity declaration before being referenced"))
1524                                        ))
1525                              ))
1526                    (setq state state-readtag6)
1527               else (dotimes (i 15)
1528                      (add-to-coll coll ch)
1529                      (setq ch (get-next-char tokenbuf))
1530                      (if* (null ch)
1531                         then (return)))
1532                    (xml-error
1533                     (concatenate 'string
1534                       "attribute value contains illegal reference name: '&"
1535                       (compute-coll-string coll)
1536                       "' in attribute value for: " (string attrib-name)))
1537                    ))
1538
1539           (#.state-readtag3
1540            (if* (eq #\> ch) then (return)
1541               else (clear-coll coll)
1542                    (dotimes (i 15)
1543                      (add-to-coll coll ch)
1544                      (setq ch (get-next-char tokenbuf))
1545                      (if* (null ch)
1546                         then (return)))
1547                    (xml-error
1548                     (concatenate 'string
1549                       "expected '>' found '"
1550                       (compute-coll-string coll)
1551                       "' in element: " (string tag-to-return)))
1552                    ))
1553
1554           (#.state-readtag-!-name
1555            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1556               then
1557                    (add-to-coll coll ch)
1558               else
1559                    (when (not (xml-space-p ch))
1560                      (xml-error (concatenate 'string
1561                                   "expecting whitespace following: '<!"
1562                                   (compute-coll-string coll)
1563                                   "' ; got: '" (string ch) "'")))
1564                    (setq tag-to-return (compute-tag coll))
1565                    (clear-coll coll)
1566                    (setf state state-pre-!-contents)))
1567
1568           (#.state-readtag-?
1569            (if* (xml-name-char-p ch)
1570               then
1571                    (add-to-coll coll ch)
1572               else
1573                    (when (and (not (xml-space-p ch)) (not (eq #\? ch)))
1574                      (xml-error (concatenate 'string
1575                                   "expecting name following: '<?"
1576                                   (compute-coll-string coll)
1577                                   "' ; got: '" (string ch) "'"))
1578                      )
1579                    (when (= (collector-next coll) 0)
1580                      (xml-error "null <? token"))
1581                    (if* (and (= (collector-next coll) 3)
1582                              (eq (elt (collector-data coll) 0) #\x)
1583                              (eq (elt (collector-data coll) 1) #\m)
1584                              (eq (elt (collector-data coll) 2) #\l)
1585                              )
1586                       then
1587                            (when (eq #\? ch) (xml-error "null <?xml token"))
1588                            (setq tag-to-return :xml)
1589                            (setf state state-findattributename)
1590                     elseif (and (= (collector-next coll) 3)
1591                                 (or (eq (elt (collector-data coll) 0) #\x)
1592                                     (eq (elt (collector-data coll) 0) #\X))
1593                                 (or (eq (elt (collector-data coll) 1) #\m)
1594                                     (eq (elt (collector-data coll) 1) #\M))
1595                                 (or (eq (elt (collector-data coll) 2) #\l)
1596                                     (eq (elt (collector-data coll) 2) #\L))
1597                                 ) then
1598                            (xml-error "<?xml tag must be all lower case")
1599                       else
1600                            (setq tag-to-return (compute-tag coll))
1601                            (when (eq #\? ch) (un-next-char ch))
1602                            (setf state state-prereadpi))
1603                    (clear-coll coll)))
1604
1605           (#.state-pre-!-contents
1606            (if* (xml-space-p ch)
1607               then nil
1608             elseif (not (xml-char-p ch))
1609               then (xml-error (concatenate 'string   ;; no test for this...
1610                                 "illegal character '"
1611                                 (string ch)
1612                                 " following <!" (string tag-to-return)))
1613             elseif (eq #\> ch)
1614               then (return)
1615               else (un-next-char ch)
1616                    (setf state state-!-contents)))
1617
1618           (#.state-begin-dtd
1619            (un-next-char ch)
1620            (let ((val (parse-dtd tokenbuf nil external-callback)))
1621              (setf (iostruct-seen-any-dtd tokenbuf) t)
1622              (push (append (list :[) val)
1623                    contents-to-return))
1624              (setf state state-!-doctype-ext3))
1625
1626           (#.state-!-contents
1627            (if* (xml-name-char-p ch)
1628               then (add-to-coll coll ch)
1629             elseif (eq #\> ch)
1630               then (push (compute-coll-string coll) contents-to-return)
1631                    (clear-coll coll)
1632                    (return)
1633             elseif (eq #\[ ch)
1634               then (push (compute-tag coll) contents-to-return)
1635                    (clear-coll coll)
1636                    (setf state state-begin-dtd)
1637             elseif (and (xml-space-p ch) (eq tag-to-return :DOCTYPE))
1638                    ;; look at tag-to-return and set state accordingly
1639               then (push (compute-tag coll) contents-to-return)
1640                    (clear-coll coll)
1641                    (setf state state-!-doctype)
1642               else (xml-error
1643                     (concatenate 'string
1644                       "illegal name: '"
1645                       (string tag-to-return)
1646                       "' in <! tag: "))
1647                    ))
1648
1649           (#.state-!-doctype-ext
1650            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1651               then
1652                    (add-to-coll coll ch)
1653               else
1654                    (when (not (xml-space-p ch))
1655                      (dotimes (i 15)
1656                        (add-to-coll coll ch)
1657                        (setq ch (get-next-char tokenbuf))
1658                        (if* (null ch)
1659                           then (return)))
1660                      (xml-error
1661                       (concatenate 'string
1662                         "illegal character in '"
1663                         (compute-coll-string coll)
1664                         "' in <! tag: " (string tag-to-return) " "
1665                         (string (first contents-to-return))
1666                       ))
1667                      )
1668                    (let ((token (compute-tag coll)))
1669                      (push token contents-to-return)
1670                      (clear-coll coll)
1671                      (if* (eq :SYSTEM token) then (setf state state-!-doctype-system)
1672                       elseif (eq :PUBLIC token) then (setf state state-!-doctype-public)
1673                         else (xml-error
1674                               (concatenate 'string
1675                                 "expected 'SYSTEM' or 'PUBLIC' got '"
1676                                 (string (first contents-to-return))
1677                                 "' in <! tag: " (string tag-to-return) " "
1678                                 (string (second contents-to-return))))
1679                              )
1680                      )))
1681
1682           (#.state-!-doctype-public
1683            (if* (xml-space-p ch) then nil
1684             elseif (eq #\" ch) then (setf state state-!-doctype-public2)
1685             elseif (eq #\' ch) then (setf state state-!-doctype-public3)
1686               else (xml-error
1687                     (concatenate 'string
1688                       "expected quote or double-quote got: '"
1689                       (string ch)
1690                       "' in <! tag: " (string tag-to-return) " "
1691                       (string (second contents-to-return)) " "
1692                       (string (first contents-to-return))
1693                       ))
1694                    ))
1695
1696           (#.state-!-doctype-system
1697            (if* (xml-space-p ch) then nil
1698             elseif (eq #\" ch) then (setf state state-!-doctype-system2)
1699             elseif (eq #\' ch) then (setf state state-!-doctype-system3)
1700               else (xml-error
1701                     (concatenate 'string
1702                       "expected quote or double-quote got: '"
1703                       (string ch)
1704                       "' in <! tag: " (string tag-to-return) " "
1705                       (string (second contents-to-return)) " "
1706                       (string (first contents-to-return))
1707                       ))
1708                    ))
1709
1710           (#.state-!-doctype-public2
1711            (if* (eq #\" ch) then (push (compute-coll-string coll)
1712                                        contents-to-return)
1713                    (clear-coll coll)
1714                    (setf state state-!-doctype-system)
1715             elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1716               else (dotimes (i 15)
1717                      (add-to-coll coll ch)
1718                      (setq ch (get-next-char tokenbuf))
1719                      (if* (null ch)
1720                         then (return)))
1721                    (xml-error
1722                     (concatenate 'string
1723                       "illegal character in DOCTYPE PUBLIC string: '"
1724                       (compute-coll-string coll) "'"))
1725                    ))
1726
1727           (#.state-!-doctype-public3
1728            (if* (eq #\' ch) then (push (compute-coll-string coll)
1729                                        contents-to-return)
1730                    (clear-coll coll)
1731                    (setf state state-!-doctype-system)
1732             elseif (pub-id-char-p ch) then (add-to-coll coll ch)
1733               else (dotimes (i 15)
1734                      (add-to-coll coll ch)
1735                      (setq ch (get-next-char tokenbuf))
1736                      (if* (null ch)
1737                         then (return)))
1738                    (xml-error
1739                     (concatenate 'string
1740                       "illegal character in DOCTYPE PUBLIC string: '"
1741                       (compute-coll-string coll) "'"))
1742                    ))
1743
1744           (#.state-!-doctype-system2
1745            (when (not (xml-char-p ch))
1746              (xml-error "XML is not well formed")) ;; not tested
1747            (if* (eq #\" ch) then (push (compute-coll-string coll)
1748                                        contents-to-return)
1749                    (clear-coll coll)
1750                    (setf state state-!-doctype-ext2)
1751               else (add-to-coll coll ch)))
1752
1753           (#.state-!-doctype-system3
1754            (when (not (xml-char-p ch))
1755              (xml-error "XML is not well formed")) ;; not tested
1756            (if* (eq #\' ch) then (push (compute-coll-string coll)
1757                                        contents-to-return)
1758                    (clear-coll coll)
1759                    (setf state state-!-doctype-ext2)
1760               else (add-to-coll coll ch)))
1761
1762           (#.state-!-doctype-ext2
1763            (if* (xml-space-p ch) then nil
1764             elseif (eq #\> ch) then (return)
1765             elseif (eq #\[ ch)
1766               then (setf state state-begin-dtd)
1767               else
1768                    (dotimes (i 15)
1769                      (add-to-coll coll ch)
1770                      (setq ch (get-next-char tokenbuf))
1771                      (if* (null ch)
1772                         then (return)))
1773                    (xml-error
1774                     (concatenate 'string
1775                       "illegal char in DOCTYPE token: '"
1776                       (compute-coll-string coll) "'"))
1777                    ))
1778
1779           (#.state-!-doctype-ext3
1780            (if* (xml-space-p ch) then nil
1781             elseif (eq #\> ch) then (return)
1782               else
1783                    (dotimes (i 15)
1784                      (add-to-coll coll ch)
1785                      (setq ch (get-next-char tokenbuf))
1786                      (if* (null ch)
1787                         then (return)))
1788                    (xml-error
1789                     (concatenate 'string
1790                       "illegal char in DOCTYPE token following dtd: '"
1791                       (compute-coll-string coll) "'"))
1792                    ))
1793
1794           (#.state-!-doctype
1795            ;; skip whitespace; possible exits: >, SYSTEM, PUBLIC, [
1796            (if* (xml-space-p ch) then nil
1797             elseif (xml-name-start-char-p ch)
1798               then
1799                    (setf state state-!-doctype-ext)
1800                    (un-next-char ch)
1801             elseif (eq #\> ch) then (return)
1802             elseif (eq #\[ ch)
1803               then (setf state state-begin-dtd)
1804               else (xml-error
1805                     (concatenate 'string
1806                       "illegal character: '"
1807                       (string ch)
1808                       "' in <! tag: " (string tag-to-return) " "
1809                       (string (first contents-to-return))))
1810                    ))
1811
1812           (#.state-prereadpi
1813            (if* (xml-space-p ch)
1814               then nil
1815             elseif (not (xml-char-p ch))
1816               then (xml-error "XML is not well formed") ;; no test
1817               else (un-next-char ch)
1818                    (setf state state-readpi)))
1819
1820           (#.state-readpi
1821            (if* (eq #\? ch)
1822               then (setf state state-readpi2)
1823             elseif (not (xml-char-p ch))
1824               then (xml-error "XML is not well formed") ;; no test
1825               else (add-to-coll coll ch)))
1826
1827           (#.state-readpi2
1828            (if* (eq #\> ch)
1829               then (return)
1830             elseif (eq #\? ch) then
1831                    (add-to-coll coll #\?) ;; come back here to try again
1832               else (setf state state-readpi)
1833                    (add-to-coll coll #\?)
1834                    (add-to-coll coll ch)))
1835
1836           (#.state-findattributename0
1837            (if* (xml-space-p ch) then (setf state state-findattributename)
1838             elseif (eq ch empty-delim)
1839               then (setf state state-noattributename)
1840               else
1841                    (dotimes (i 15)
1842                      (add-to-coll coll ch)
1843                      (setq ch (get-next-char tokenbuf))
1844                      (if* (null ch)
1845                         then (return)))
1846                    (xml-error
1847                     (concatenate 'string
1848                       "expected space or tag end before: '"
1849                       (compute-coll-string coll) "'"))))
1850           (#.state-findattributename
1851            ;; search until we find the start of an attribute name
1852            ;; or the end of the tag
1853            (if* (eq ch empty-delim)
1854               then (setf state state-noattributename)
1855             elseif (xml-space-p ch)
1856               then nil ;; skip whitespace
1857             elseif (xml-name-start-char-p ch)
1858               then
1859                    (un-next-char ch)
1860                    (setf state state-attribname)
1861               else
1862                    (dotimes (i 15)
1863                      (add-to-coll coll ch)
1864                      (setq ch (get-next-char tokenbuf))
1865                      (if* (null ch)
1866                         then (return)))
1867                    (xml-error
1868                     (concatenate 'string
1869                       "illegal char in <?xml token: '"
1870                       (compute-coll-string coll) "'"))
1871                    ))
1872
1873           (#.state-attribname
1874            ;; collect attribute name
1875            (if* (xml-name-char-p ch) ;; starting char already passed more restrictive test
1876               then
1877                    (add-to-coll coll ch)
1878             elseif (xml-space-p ch) then
1879                    (setq attrib-name (compute-tag coll))
1880                    (clear-coll coll)
1881                    (setq state state-attribname2)
1882               else
1883                    (when (not (eq #\= ch))
1884                      (dotimes (i 15)
1885                        (add-to-coll coll ch)
1886                        (setq ch (get-next-char tokenbuf))
1887                        (if* (null ch)
1888                           then (return)))
1889                      (xml-error
1890                       (concatenate 'string
1891                         "illegal char in <?xml attribute token: '"
1892                         (compute-coll-string coll) "'"))
1893                      )
1894                    (setq attrib-name (compute-tag coll))
1895                    (clear-coll coll)
1896                    (setq state state-attribstartvalue)))
1897
1898           (#.state-attribname2
1899            (if* (eq #\= ch) then (setq state state-attribstartvalue)
1900             elseif (xml-space-p ch) then nil
1901               else
1902                    (un-next-char ch)
1903                    (dotimes (i 15)
1904                        (add-to-coll coll ch)
1905                        (setq ch (get-next-char tokenbuf))
1906                        (if* (null ch)
1907                           then (return)))
1908                      (xml-error
1909                       (concatenate 'string
1910                         "illegal char in <?xml attribute token: '"
1911                         (compute-coll-string coll) "'"))))
1912           (#.state-attribstartvalue
1913            ;; begin to collect value
1914            (if* (or (eq ch #\")
1915                     (eq ch #\'))
1916               then (setq value-delim ch)
1917                    (setq state state-attribvaluedelim)
1918             elseif (xml-space-p ch) then nil
1919               else
1920                    (dotimes (i 15)
1921                        (add-to-coll coll ch)
1922                        (setq ch (get-next-char tokenbuf))
1923                        (if* (null ch)
1924                           then (return)))
1925                      (xml-error
1926                       (concatenate 'string
1927                         "expected ' or \" before  <?xml attribute token value: '"
1928                         (compute-coll-string coll) "'"))
1929                    ))
1930
1931            (#.state-attribvaluedelim
1932             (if* (eq ch value-delim)
1933                then (setq attrib-value (compute-coll-string coll))
1934                     (clear-coll coll)
1935                     (push attrib-name attribs-to-return)
1936                     (push attrib-value attribs-to-return)
1937                     (setq state state-findattributename0)
1938              elseif (and (xml-char-p ch) (not (eq #\< ch)))
1939                then (add-to-coll coll ch)
1940                else
1941                     (dotimes (i 15)
1942                        (add-to-coll coll ch)
1943                        (setq ch (get-next-char tokenbuf))
1944                        (if* (null ch)
1945                           then (return)))
1946                      (xml-error
1947                       (concatenate 'string
1948                         "illegal character in attribute token value: '"
1949                         (compute-coll-string coll) "'"))
1950                     ))
1951
1952            (#.state-noattributename
1953             (if* (eq #\> ch)
1954                then
1955                     (return) ;; ready to build return token
1956                else
1957                     (xml-error
1958                      (concatenate 'string
1959                        "expected '>' found: '" (string ch) "' in <?xml token"))
1960                     ))
1961
1962           (t
1963            (error "need to support state:~s" state))
1964           ))
1965       (put-back-collector entity)
1966       (case state
1967         (#.state-noattributename ;; it's a bug if this state occurs with a non-empty element
1968          (put-back-collector coll)
1969          (if* attribs-to-return
1970                  then (values (cons tag-to-return
1971                                     (nreverse attribs-to-return))
1972                               (if (eq tag-to-return :xml) :xml :start-tag) :end-tag)
1973             else
1974                  (values tag-to-return :start-tag :end-tag)
1975                  ))
1976         (#.state-readtag-end-bracket
1977          ;; this is a :commant tag
1978          (let ((ret (compute-coll-string coll)))
1979            (put-back-collector coll)
1980            (values (cons tag-to-return (list ret)) :comment :nil)))
1981         (#.state-pcdata
1982          (let ((next-char (collector-next coll)))
1983            (put-back-collector coll)
1984            (if* (zerop next-char)
1985               then (values nil :eof nil)
1986               else (values (compute-coll-string coll) :pcdata pcdatap))))
1987         (#.state-readpi2
1988          (let ((ret (compute-coll-string coll)))
1989            (put-back-collector coll)
1990            (values (append (list :pi tag-to-return) (list ret)) :pi nil)))
1991         ((#.state-readtag-!-conditional)
1992          (put-back-collector coll)
1993          (values (append (list tag-to-return) contents-to-return) :start-tag
1994                  :end-tag))
1995         ((#.state-!-contents
1996           #.state-!-doctype
1997           #.state-!-doctype-ext2
1998           #.state-!-doctype-ext3)
1999          (put-back-collector coll)
2000          (values (append (list tag-to-return) (nreverse contents-to-return)) :start-tag
2001                  :end-tag))
2002         (#.state-readtag3
2003          (put-back-collector coll)
2004          (values (if* attribs-to-return
2005                     then (cons tag-to-return
2006                                (nreverse attribs-to-return))
2007                     else tag-to-return) :start-tag :end-tag))
2008         ((#.state-readtag2
2009           #.state-readtag)
2010          (put-back-collector coll)
2011          (values (if* attribs-to-return
2012                     then (cons tag-to-return
2013                                (nreverse attribs-to-return))
2014                     else tag-to-return) :start-tag nil))
2015         ((#.state-readtag-end2
2016           #.state-readtag-end3)
2017          (put-back-collector coll)
2018          (values tag-to-return :end-tag nil))
2019         (#.state-readtag-!-conditional7
2020          (let ((ret (compute-coll-string coll)))
2021            (put-back-collector coll)
2022            (values (append (list :cdata) (list ret)) :cdata nil)))
2023         (t
2024          ;; if ch is null that means we encountered unexpected EOF
2025          (when (null ch)
2026            (put-back-collector coll)
2027            (xml-error "unexpected end of input"))
2028          (print (list tag-to-return attribs-to-return))
2029          (let ((ret (compute-coll-string coll)))
2030            (put-back-collector coll)
2031            (error "need to support state <post>:~s  ~s ~s ~s" state
2032                   tag-to-return
2033                   contents-to-return
2034                   ret))))
2035       )))
2036
2037 (defun swallow-xml-token (tokenbuf external-callback)
2038   (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
2039   (let ((xml (next-token tokenbuf external-callback nil)))
2040     (if* (and (eq (fourth xml) :standalone) (stringp (fifth xml))
2041               (equal (fifth xml) "yes")) then
2042             (xml-error "external XML entity cannot be standalone document")
2043      elseif (and (eq (sixth xml) :standalone) (stringp (seventh xml))
2044                  (equal (seventh xml) "yes")) then
2045             (xml-error "external XML entity cannot be standalone document"))))
2046
2047 ;; return the string with entity references replaced by text
2048 ;; normalizing will happen later
2049 ;; we're ok on different types - just ignore IMPLIED & REQUIRED; and possibly skip FIXED
2050 (defun parse-default-value (value-list tokenbuf external-callback)
2051   (declare (optimize (speed 3) (safety 1)))
2052   (let (value-string)
2053     (if* (stringp (first value-list)) then (setf value-string (first value-list))
2054      elseif (eq (first value-list) :FIXED) then (setf value-string (second value-list)))
2055     (let ((tmp-result (parse-xml
2056                       (concatenate 'string
2057                         "<item x='"
2058                         value-string
2059                         "'/>")
2060                       :external-callback external-callback
2061                       :general-entities
2062                       (iostruct-general-entities tokenbuf))))
2063       (if* (stringp (first value-list)) then
2064               (setf (first value-list)
2065                 (third (first (first tmp-result))))
2066          elseif (eq (first value-list) :FIXED) then
2067               (setf (second value-list)
2068                 (third (first (first tmp-result)))))))
2069   value-list)
2070
2071 (defun process-attlist (args attlist-data)
2072   (declare (optimize (speed 3) (safety 1)))
2073   (dolist (arg1 args attlist-data)
2074     ;;(format t "arg1: ~s~%" arg1)
2075     (dolist (item (rest arg1))
2076       ;;(format t "item: ~s~%" item)
2077       (when (eq :ATTLIST (first item))
2078         (let* ((name (second item))
2079                (name-data (assoc name attlist-data))
2080                (new-name-data (rest name-data)))
2081           ;;(format t "name: ~s name-data: ~s new-name-data: ~s~%" name name-data new-name-data)
2082           (dolist (attrib-data (rest (rest item)))
2083             ;;(format t "attrib-data: ~s~%" attrib-data)
2084             #+ignore
2085             (setf (rest (rest attrib-data))
2086               (parse-default-value (rest (rest attrib-data)) tokenbuf external-callback))
2087             (when (not (assoc (first attrib-data) new-name-data))
2088               (setf new-name-data (acons (first attrib-data) (rest attrib-data) new-name-data))))
2089           (if* name-data then
2090                   (rplacd (assoc name attlist-data) (nreverse new-name-data))
2091              else (setf attlist-data (acons name (nreverse new-name-data) attlist-data))))))))
2092
2093 (provide :pxml)