2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose. See the GNU
13 ;; Lesser General Public License for more details.
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA 02111-1307 USA
24 ;; 10/14/00 add namespace support; xml-error fix
26 (in-package :net.xml.parser)
28 (pxml-dribble-bug-hook "$Id: pxml1.cl,v 1.3 2003/06/20 02:21:23 kevin Exp $")
30 (defparameter *collectors* (list nil nil nil nil nil nil nil nil))
32 (defun put-back-collector (col)
33 (declare (optimize (speed 3) (safety 1)))
34 (mp::without-scheduling
35 (do ((cols *collectors* (cdr cols)))
39 (if* (null (car cols))
40 then (setf (car cols) col)
43 (defun pub-id-char-p (char)
44 (declare (optimize (speed 3) (safety 1)))
45 (let ((code (char-code char)))
46 (or (= #x20 code) (= #xD code) (= #xA code)
47 (<= (char-code #\a) code (char-code #\z))
48 (<= (char-code #\A) code (char-code #\Z))
49 (<= (char-code #\0) code (char-code #\9))
50 (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
51 #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
53 (defparameter *keyword-package* (find-package :keyword))
55 ;; cache of tokenbuf structs
56 (defparameter *tokenbufs* (list nil nil nil nil))
59 unget-char ;; character pushed back
60 tokenbuf ;; main input tokenbuf
61 read-sequence-func ;; optional alternative to read-sequence
62 entity-bufs ;; active entity tokenbufs
63 entity-names ;; active entity names
66 do-entity ;; still substituting entity text
69 seen-parameter-reference
77 cur ;; next index to use to grab from tokenbuf
78 max ;; index one beyond last character
79 data ;; character array
80 stream ;; for external sources
83 (defun get-tokenbuf ()
84 (declare (optimize (speed 3) (safety 1)))
86 (mp::without-scheduling
87 (do* ((bufs *tokenbufs* (cdr bufs))
88 (this (car bufs) (car bufs)))
91 then (setf (car bufs) nil)
95 then (setf (tokenbuf-cur buf) 0)
96 (setf (tokenbuf-max buf) 0)
97 (setf (tokenbuf-stream buf) nil)
102 :data (make-array 1024 :element-type 'character)))))
105 next ; next index to set
106 max ; 1+max index to set
110 (defun compute-tag (coll &optional (package *keyword-package*) ns-to-package)
111 (declare (optimize (speed 3) (safety 1)))
112 ;; compute the symbol named by what's in the collector
113 (if* (not ns-to-package)
114 then (excl::intern* (collector-data coll) (collector-next coll) package)
116 (let (new-package (data (collector-data coll)))
117 (if* (and (eq (schar data 0) #\x)
118 (eq (schar data 1) #\m)
119 (eq (schar data 2) #\l)
120 (eq (schar data 3) #\n)
121 (eq (schar data 4) #\s)
122 (or (eq (schar data 5) #\:)
123 (= (collector-next coll) 5)))
124 then ;; putting xmlns: in :none namespace
125 (setf new-package (assoc :none ns-to-package))
126 (when new-package (setf package (rest new-package)))
127 (excl::intern* (collector-data coll) (collector-next coll) package)
129 (let ((colon-index -1)
130 (data (collector-data coll)))
131 (dotimes (i (collector-next coll))
132 (when (eq (schar data i) #\:)
135 (if* (> colon-index -1) then
136 (let ((string1 (make-string colon-index))
138 (dotimes (i colon-index)
139 (setf (schar string1 i) (schar data i)))
140 (setf new-package (assoc string1 ns-to-package :test 'string=))
143 (setf string2 (make-string (- (collector-next coll)
145 (dotimes (i (- (collector-next coll)
147 (setf (schar string2 i)
148 (schar data (+ colon-index 1 i))))
149 (excl::intern string2 (rest new-package))
151 (excl::intern* (collector-data coll)
152 (collector-next coll) package)))
154 (let ((new-package (assoc :none ns-to-package)))
156 (setf package (rest new-package))))
157 (excl::intern* (collector-data coll)
158 (collector-next coll) package)))
162 (defun compute-coll-string (coll)
163 (declare (optimize (speed 3) (safety 1)))
164 ;; return the string that's in the collection
165 (let ((str (make-string (collector-next coll)))
166 (from (collector-data coll)))
167 (dotimes (i (collector-next coll))
168 (setf (schar str i) (schar from i)))
172 (defun grow-and-add (coll ch)
173 (declare (optimize (speed 3) (safety 1)))
174 ;; increase the size of the data portion of the collector and then
175 ;; add the given char at the end
176 (let* ((odata (collector-data coll))
177 (ndata (make-string (* 2 (length odata)))))
178 (dotimes (i (length odata))
179 (setf (schar ndata i) (schar odata i)))
180 (setf (collector-data coll) ndata)
181 (setf (collector-max coll) (length ndata))
182 (let ((next (collector-next coll)))
183 (setf (schar ndata next) ch)
184 (setf (collector-next coll) (1+ next)))))
186 (defun put-back-tokenbuf (buf)
187 (declare (optimize (speed 3) (safety 1)))
188 (mp::without-scheduling
189 (do ((bufs *tokenbufs* (cdr bufs)))
193 (if* (null (car bufs))
194 then (setf (car bufs) buf)
197 (defun get-collector ()
198 (declare (optimize (speed 3) (safety 1)))
200 (mp::without-scheduling
201 (do* ((cols *collectors* (cdr cols))
202 (this (car cols) (car cols)))
205 then (setf (car cols) nil)
209 then (setf (collector-next col) 0)
214 :data (make-string 100)))))
216 (defmacro next-char (tokenbuf read-sequence-func)
217 `(let ((cur (tokenbuf-cur ,tokenbuf))
218 (tb (tokenbuf-data ,tokenbuf)))
219 (if* (>= cur (tokenbuf-max ,tokenbuf))
221 (if* (or (not (tokenbuf-stream ,tokenbuf))
222 (zerop (setf (tokenbuf-max ,tokenbuf)
223 (if* ,read-sequence-func
224 then (funcall ,read-sequence-func tb
225 (tokenbuf-stream ,tokenbuf))
226 else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
227 then (setq cur nil) ;; eof
231 (let ((cc (schar tb cur)))
232 (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
233 (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
235 (defun get-next-char (iostruct)
236 (declare (optimize (speed 3) (safety 1)))
237 (let* (from-stream (tmp-char
239 (if* (iostruct-unget-char iostruct) then
240 ;; from-stream is used to do input CR/LF normalization
242 (setf char (first (iostruct-unget-char iostruct)))
243 (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
245 elseif (iostruct-entity-bufs iostruct) then
248 (setf entity-buf (first (iostruct-entity-bufs iostruct)))
249 (if* (streamp (tokenbuf-stream entity-buf))
250 then (setf from-stream t)
251 else (setf from-stream nil))
252 (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
254 (when (streamp (tokenbuf-stream entity-buf))
255 (close (tokenbuf-stream entity-buf))
256 (put-back-tokenbuf entity-buf))
257 (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
258 (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
259 (when (not (iostruct-entity-bufs iostruct)) (return))))
261 else (next-char (iostruct-tokenbuf iostruct)
262 (iostruct-read-sequence-func iostruct)))
263 else (setf from-stream t)
264 (next-char (iostruct-tokenbuf iostruct)
265 (iostruct-read-sequence-func iostruct))))))
266 (if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char)))
268 (defun unicode-check (p tokenbuf)
269 (declare (ignorable tokenbuf) (optimize (speed 3) (safety 1)))
270 ;; need no-OO check because external format support isn't completely done yet
271 (when (not (typep p 'string-input-simple-stream))
272 #+(version>= 6 0 pre-final 1)
273 (let ((format (ignore-errors (excl:sniff-for-unicode p))))
274 (if* (eq format (find-external-format :unicode))
276 (setf (stream-external-format p) format)
278 (setf (stream-external-format p) (find-external-format :utf8))))
279 #-(version>= 6 0 pre-final 1)
280 (let* ((c (read-char p nil)) c2
281 (c-code (if c (char-code c) nil)))
282 (if* (eq #xFF c-code) then
283 (setf c2 (read-char p nil))
284 (setf c-code (if c (char-code c2) nil))
285 (if* (eq #xFE c-code) then
286 (format t "set unicode~%")
287 (setf (stream-external-format p)
288 (find-external-format #+(version>= 6 0 pre-final 1) :unicode
289 #-(version>= 6 0 pre-final 1) :fat-little))
291 (xml-error "stream has incomplete Unicode marker"))
292 else (setf (stream-external-format p)
293 (find-external-format :utf8))
295 (push c (iostruct-unget-char tokenbuf))
296 #+ignore (unread-char c p) ;; bug when there is single ^M in file
299 (defun add-default-values (val attlist-data)
300 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
303 (let* ((tag-defaults (assoc val attlist-data)) defaults)
304 (dolist (def (rest tag-defaults))
305 (if* (stringp (third def)) then
306 (push (first def) defaults)
307 (push (if (eq (second def) :CDATA) (third def)
308 (normalize-attrib-value (third def))) defaults)
309 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
310 (push (first def) defaults)
311 (push (if (eq (second def) :CDATA) (fourth def)
312 (normalize-attrib-value (fourth def))) defaults)
315 (setf val (append (list val) (nreverse defaults)))
319 ;; first make sure there are no errors in given list
320 (let ((pairs (rest val)))
322 (when (null pairs) (return))
323 (let ((this-one (first pairs)))
324 (setf pairs (rest (rest pairs)))
325 (when (member this-one pairs)
326 (xml-error (concatenate 'string "Entity: "
330 " attribute values"))))))
331 (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
332 (dolist (def (rest tag-defaults))
333 (let ((old (member (first def) (rest val))))
335 (if* (stringp (third def)) then
336 (push (first def) defaults)
337 (push (third def) defaults)
338 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
339 (push (first def) defaults)
340 (push (fourth def) defaults))
342 (push (first old) defaults)
343 (push (second old) defaults))))
345 ;; now look for attributes in original list that weren't in dtd
346 (let ((tmp-val (rest val)) att att-val)
348 (when (null tmp-val) (return))
349 (setf att (first tmp-val))
350 (setf att-val (second tmp-val))
351 (setf tmp-val (rest (rest tmp-val)))
352 (when (not (member att defaults))
354 (push att-val defaults))))
355 (setf val (append (list (first val)) (nreverse defaults)))
359 (defun normalize-public-value (public-value)
360 (setf public-value (string-trim '(#\space) public-value))
361 (let ((count 0) (stop (length public-value)) (last-ch nil) cch)
363 (when (= count stop) (return public-value))
364 (setf cch (schar public-value count))
365 (if* (and (eq cch #\space) (eq last-ch #\space)) then
367 (remove #\space public-value :start count :count 1))
370 (setf last-ch cch)))))
373 (defun normalize-attrib-value (attrib-value &optional first-pass)
374 (declare (optimize (speed 3) (safety 1)))
376 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
378 (when (= count stop) (return))
379 (setf cch (schar attrib-value count))
380 (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
381 elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
382 (setf (schar attrib-value count) #\space)
383 elseif (and (eq cch #\newline) (eq last-ch #\return)) then
385 (remove #\space attrib-value :start count :count 1))
388 (setf last-ch cch))))
389 (setf attrib-value (string-trim '(#\space) attrib-value))
390 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
392 (when (= count stop) (return attrib-value))
393 (setf cch (schar attrib-value count))
394 (if* (and (eq cch #\space) (eq last-ch #\space)) then
396 (remove #\space attrib-value :start count :count 1))
399 (setf last-ch cch)))))
401 (defun check-xmldecl (val tokenbuf)
402 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
403 (when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val)))))
404 (xml-error "XML declaration tag does not include correct 'version' attribute"))
405 (when (and (fourth val)
406 (or (not (symbolp (fourth val)))
407 (and (not (string= "standalone" (symbol-name (fourth val))))
408 (not (string= "encoding" (symbol-name (fourth val)))))))
409 (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute"))
410 (when (and (fourth val) (string= "standalone" (symbol-name (fourth val))))
411 (if* (equal (fifth val) "yes") then
412 (setf (iostruct-standalonep tokenbuf) t)
413 elseif (not (equal (fifth val) "no")) then
414 (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
415 (dotimes (i (length (third val)))
416 (let ((c (schar (third val) i)))
417 (when (and (not (alpha-char-p c))
418 (not (digit-char-p c))
419 (not (member c '(#\. #\_ #\- #\:)))
421 (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
422 (if* (and (fourth val) (eql :encoding (fourth val)))
423 then (dotimes (i (length (fifth val)))
424 (let ((c (schar (fifth val) i)))
425 (when (and (not (alpha-char-p c))
427 (and (not (digit-char-p c))
428 (not (member c '(#\. #\_ #\-))))
430 (xml-error "XML declaration tag does not include correct 'encoding' attribute value"))))
432 ;; if we have a stream we're reading from set its external-format
434 ;; note - tokenbuf is really an iostruct, not a tokenbuf
435 (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf))
436 then (setf (stream-external-format
437 (tokenbuf-stream (iostruct-tokenbuf tokenbuf)))
438 (find-external-format (fifth val))))
443 (defun xml-error (text)
444 (declare (optimize (speed 3) (safety 1)))
445 (funcall 'error "~a" (concatenate 'string "XML not well-formed - " text)))