2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose. See the GNU
13 ;; Lesser General Public License for more details.
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA 02111-1307 USA
24 ;; 10/14/00 add namespace support; xml-error fix
26 (in-package :net.xml.parser)
28 (pxml-dribble-bug-hook "$Id$")
30 (defparameter *collectors* (list nil nil nil nil nil nil nil nil))
32 (defun put-back-collector (col)
33 (declare (optimize (speed 3) (safety 1)))
35 (do ((cols *collectors* (cdr cols)))
39 (if* (null (car cols))
40 then (setf (car cols) col)
43 (defun pub-id-char-p (char)
44 (declare (optimize (speed 3) (safety 1)))
45 (let ((code (char-code char)))
46 (or (= #x20 code) (= #xD code) (= #xA code)
47 (<= (char-code #\a) code (char-code #\z))
48 (<= (char-code #\A) code (char-code #\Z))
49 (<= (char-code #\0) code (char-code #\9))
50 (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
51 #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
53 (defparameter *keyword-package* (find-package :keyword))
55 ;; cache of tokenbuf structs
56 (defparameter *tokenbufs* (list nil nil nil nil))
59 unget-char ;; character pushed back
60 tokenbuf ;; main input tokenbuf
61 read-sequence-func ;; optional alternative to read-sequence
62 entity-bufs ;; active entity tokenbufs
63 entity-names ;; active entity names
66 do-entity ;; still substituting entity text
69 seen-parameter-reference
77 cur ;; next index to use to grab from tokenbuf
78 max ;; index one beyond last character
79 data ;; character array
80 stream ;; for external sources
83 (defun get-tokenbuf ()
84 (declare (optimize (speed 3) (safety 1)))
87 (do* ((bufs *tokenbufs* (cdr bufs))
88 (this (car bufs) (car bufs)))
91 then (setf (car bufs) nil)
95 then (setf (tokenbuf-cur buf) 0)
96 (setf (tokenbuf-max buf) 0)
97 (setf (tokenbuf-stream buf) nil)
102 :data (make-array 1024 :element-type 'character)))))
105 next ; next index to set
106 max ; 1+max index to set
110 (defun compute-tag (coll &optional (package *keyword-package*) ns-to-package)
111 (declare (optimize (speed 3) (safety 1)))
112 ;; compute the symbol named by what's in the collector
113 (if* (not ns-to-package)
114 then (excl::intern* (collector-data coll) (collector-next coll) package)
116 (let (new-package (data (collector-data coll)))
117 (if* (and (eq (schar data 0) #\x)
118 (eq (schar data 1) #\m)
119 (eq (schar data 2) #\l)
120 (eq (schar data 3) #\n)
121 (eq (schar data 4) #\s)
122 (or (eq (schar data 5) #\:)
123 (= (collector-next coll) 5)))
124 then ;; putting xmlns: in :none namespace
125 (setf new-package (assoc :none ns-to-package))
126 (when new-package (setf package (rest new-package)))
127 (excl::intern* (collector-data coll) (collector-next coll) package)
129 (let ((colon-index -1)
130 (data (collector-data coll)))
131 (dotimes (i (collector-next coll))
132 (when (eq (schar data i) #\:)
135 (if* (> colon-index -1) then
136 (let ((string1 (make-string colon-index))
138 (dotimes (i colon-index)
139 (setf (schar string1 i) (schar data i)))
140 (setf new-package (assoc string1 ns-to-package :test 'string=))
143 (setf string2 (make-string (- (collector-next coll)
145 (dotimes (i (- (collector-next coll)
147 (setf (schar string2 i)
148 (schar data (+ colon-index 1 i))))
149 (excl::intern string2 (rest new-package))
151 (excl::intern* (collector-data coll)
152 (collector-next coll) package)))
154 (let ((new-package (assoc :none ns-to-package)))
156 (setf package (rest new-package))))
157 (excl::intern* (collector-data coll)
158 (collector-next coll) package)))
162 (defun compute-coll-string (coll)
163 (declare (optimize (speed 3) (safety 1)))
164 ;; return the string that's in the collection
165 (let ((str (make-string (collector-next coll)))
166 (from (collector-data coll)))
167 (dotimes (i (collector-next coll))
168 (setf (schar str i) (schar from i)))
172 (defun grow-and-add (coll ch)
173 (declare (optimize (speed 3) (safety 1)))
174 ;; increase the size of the data portion of the collector and then
175 ;; add the given char at the end
176 (let* ((odata (collector-data coll))
177 (ndata (make-string (* 2 (length odata)))))
178 (dotimes (i (length odata))
179 (setf (schar ndata i) (schar odata i)))
180 (setf (collector-data coll) ndata)
181 (setf (collector-max coll) (length ndata))
182 (let ((next (collector-next coll)))
183 (setf (schar ndata next) ch)
184 (setf (collector-next coll) (1+ next)))))
186 (defun put-back-tokenbuf (buf)
187 (declare (optimize (speed 3) (safety 1)))
189 (do ((bufs *tokenbufs* (cdr bufs)))
193 (if* (null (car bufs))
194 then (setf (car bufs) buf)
197 (defun get-collector ()
198 (declare (optimize (speed 3) (safety 1)))
201 (do* ((cols *collectors* (cdr cols))
202 (this (car cols) (car cols)))
205 then (setf (car cols) nil)
209 then (setf (collector-next col) 0)
214 :data (make-string 100)))))
216 (defmacro next-char (tokenbuf read-sequence-func)
217 `(let ((cur (tokenbuf-cur ,tokenbuf))
218 (tb (tokenbuf-data ,tokenbuf)))
219 (if* (>= cur (tokenbuf-max ,tokenbuf))
221 (if* (or (not (tokenbuf-stream ,tokenbuf))
222 (zerop (setf (tokenbuf-max ,tokenbuf)
223 (if* ,read-sequence-func
224 then (funcall ,read-sequence-func tb
225 (tokenbuf-stream ,tokenbuf))
226 else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
227 then (setq cur nil) ;; eof
231 (let ((cc (schar tb cur)))
232 (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
233 (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
235 (defun get-next-char (iostruct)
236 (declare (optimize (speed 3) (safety 1)))
237 (let* (from-stream (tmp-char
239 (if* (iostruct-unget-char iostruct) then
240 ;; from-stream is used to do input CR/LF normalization
242 (setf char (first (iostruct-unget-char iostruct)))
243 (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
245 elseif (iostruct-entity-bufs iostruct) then
248 (setf entity-buf (first (iostruct-entity-bufs iostruct)))
249 (if* (streamp (tokenbuf-stream entity-buf))
250 then (setf from-stream t)
251 else (setf from-stream nil))
252 (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
254 (when (streamp (tokenbuf-stream entity-buf))
255 (close (tokenbuf-stream entity-buf))
256 (put-back-tokenbuf entity-buf))
257 (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
258 (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
259 (when (not (iostruct-entity-bufs iostruct)) (return))))
261 else (next-char (iostruct-tokenbuf iostruct)
262 (iostruct-read-sequence-func iostruct)))
263 else (setf from-stream t)
264 (next-char (iostruct-tokenbuf iostruct)
265 (iostruct-read-sequence-func iostruct))))))
266 (if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char)))
268 (defun unicode-check (p tokenbuf)
269 #-allegro (return-from unicode-check t)
271 (declare (ignorable tokenbuf) (optimize (speed 3) (safety 1)))
272 ;; need no-OO check because external format support isn't completely done yet
274 (when (not (typep p 'string-input-simple-stream))
276 (let ((format (ignore-errors (excl:sniff-for-unicode p))))
277 (if* (eq format (find-external-format :unicode))
279 (setf (stream-external-format p) format)
281 (setf (stream-external-format p) (find-external-format :utf8))))
283 (let* ((c (read-char p nil)) c2
284 (c-code (if c (char-code c) nil)))
285 (if* (eq #xFF c-code) then
286 (setf c2 (read-char p nil))
287 (setf c-code (if c (char-code c2) nil))
288 (if* (eq #xFE c-code) then
289 (format t "set unicode~%")
290 (setf (stream-external-format p)
291 (find-external-format
293 #-allegro :fat-little))
295 (xml-error "stream has incomplete Unicode marker"))
296 else (setf (stream-external-format p)
297 (find-external-format :utf8))
299 (push c (iostruct-unget-char tokenbuf))
300 #+ignore (unread-char c p) ;; bug when there is single ^M in file
303 (defun add-default-values (val attlist-data)
304 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
307 (let* ((tag-defaults (assoc val attlist-data)) defaults)
308 (dolist (def (rest tag-defaults))
309 (if* (stringp (third def)) then
310 (push (first def) defaults)
311 (push (if (eq (second def) :CDATA) (third def)
312 (normalize-attrib-value (third def))) defaults)
313 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
314 (push (first def) defaults)
315 (push (if (eq (second def) :CDATA) (fourth def)
316 (normalize-attrib-value (fourth def))) defaults)
319 (setf val (append (list val) (nreverse defaults)))
323 ;; first make sure there are no errors in given list
324 (let ((pairs (rest val)))
326 (when (null pairs) (return))
327 (let ((this-one (first pairs)))
328 (setf pairs (rest (rest pairs)))
329 (when (member this-one pairs)
330 (xml-error (concatenate 'string "Entity: "
334 " attribute values"))))))
335 (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
336 (dolist (def (rest tag-defaults))
337 (let ((old (member (first def) (rest val))))
339 (if* (stringp (third def)) then
340 (push (first def) defaults)
341 (push (third def) defaults)
342 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
343 (push (first def) defaults)
344 (push (fourth def) defaults))
346 (push (first old) defaults)
347 (push (second old) defaults))))
349 ;; now look for attributes in original list that weren't in dtd
350 (let ((tmp-val (rest val)) att att-val)
352 (when (null tmp-val) (return))
353 (setf att (first tmp-val))
354 (setf att-val (second tmp-val))
355 (setf tmp-val (rest (rest tmp-val)))
356 (when (not (member att defaults))
358 (push att-val defaults))))
359 (setf val (append (list (first val)) (nreverse defaults)))
363 (defun normalize-public-value (public-value)
364 (setf public-value (string-trim '(#\space) public-value))
365 (let ((count 0) (stop (length public-value)) (last-ch nil) cch)
367 (when (= count stop) (return public-value))
368 (setf cch (schar public-value count))
369 (if* (and (eq cch #\space) (eq last-ch #\space)) then
371 (remove #\space public-value :start count :count 1))
374 (setf last-ch cch)))))
377 (defun normalize-attrib-value (attrib-value &optional first-pass)
378 (declare (optimize (speed 3) (safety 1)))
380 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
382 (when (= count stop) (return))
383 (setf cch (schar attrib-value count))
384 (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
385 elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
386 (setf (schar attrib-value count) #\space)
387 elseif (and (eq cch #\newline) (eq last-ch #\return)) then
389 (remove #\space attrib-value :start count :count 1))
392 (setf last-ch cch))))
393 (setf attrib-value (string-trim '(#\space) attrib-value))
394 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
396 (when (= count stop) (return attrib-value))
397 (setf cch (schar attrib-value count))
398 (if* (and (eq cch #\space) (eq last-ch #\space)) then
400 (remove #\space attrib-value :start count :count 1))
403 (setf last-ch cch)))))
405 (defun check-xmldecl (val tokenbuf)
406 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
407 (when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val)))))
408 (xml-error "XML declaration tag does not include correct 'version' attribute"))
409 (when (and (fourth val)
410 (or (not (symbolp (fourth val)))
411 (and (not (string= "standalone" (symbol-name (fourth val))))
412 (not (string= "encoding" (symbol-name (fourth val)))))))
413 (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute"))
414 (when (and (fourth val) (string= "standalone" (symbol-name (fourth val))))
415 (if* (equal (fifth val) "yes") then
416 (setf (iostruct-standalonep tokenbuf) t)
417 elseif (not (equal (fifth val) "no")) then
418 (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
419 (dotimes (i (length (third val)))
420 (let ((c (schar (third val) i)))
421 (when (and (not (alpha-char-p c))
422 (not (digit-char-p c))
423 (not (member c '(#\. #\_ #\- #\:)))
425 (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
426 (if* (and (fourth val) (eql :encoding (fourth val)))
427 then (dotimes (i (length (fifth val)))
428 (let ((c (schar (fifth val) i)))
429 (when (and (not (alpha-char-p c))
431 (and (not (digit-char-p c))
432 (not (member c '(#\. #\_ #\-))))
434 (xml-error "XML declaration tag does not include correct 'encoding' attribute value"))))
436 ;; if we have a stream we're reading from set its external-format
438 ;; note - tokenbuf is really an iostruct, not a tokenbuf
440 (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf))
441 then (setf (stream-external-format
442 (tokenbuf-stream (iostruct-tokenbuf tokenbuf)))
443 (find-external-format (fifth val))))
448 (defun xml-error (text)
449 (declare (optimize (speed 3) (safety 1)))
450 (funcall 'error "~a" (concatenate 'string "XML not well-formed - " text)))