2 ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA
4 ;; This code is free software; you can redistribute it and/or
5 ;; modify it under the terms of the version 2.1 of
6 ;; the GNU Lesser General Public License as published by
7 ;; the Free Software Foundation, as clarified by the AllegroServe
8 ;; prequel found in license-allegroserve.txt.
10 ;; This code is distributed in the hope that it will be useful,
11 ;; but without any warranty; without even the implied warranty of
12 ;; merchantability or fitness for a particular purpose. See the GNU
13 ;; Lesser General Public License for more details.
15 ;; Version 2.1 of the GNU Lesser General Public License is in the file
16 ;; license-lgpl.txt that was distributed with this file.
17 ;; If it is not present, you can access it from
18 ;; http://www.gnu.org/copyleft/lesser.txt (until superseded by a newer
19 ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place,
20 ;; Suite 330, Boston, MA 02111-1307 USA
22 ;; $Id: pxml1.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $
26 ;; 10/14/00 add namespace support; xml-error fix
28 (in-package :net.xml.parser)
30 (pxml-dribble-bug-hook "$Id: pxml1.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $")
32 (defparameter *collectors* (list nil nil nil nil nil nil nil nil))
34 (defun put-back-collector (col)
35 (declare (optimize (speed 3) (safety 1)))
36 (mp::without-scheduling
37 (do ((cols *collectors* (cdr cols)))
41 (if* (null (car cols))
42 then (setf (car cols) col)
45 (defun pub-id-char-p (char)
46 (declare (optimize (speed 3) (safety 1)))
47 (let ((code (char-code char)))
48 (or (= #x20 code) (= #xD code) (= #xA code)
49 (<= (char-code #\a) code (char-code #\z))
50 (<= (char-code #\A) code (char-code #\Z))
51 (<= (char-code #\0) code (char-code #\9))
52 (member char '( #\- #\' #\( #\) #\+ #\, #\. #\/ #\: #\= #\?
53 #\; #\! #\* #\# #\@ #\$ #\_ #\%)))))
55 (defparameter *keyword-package* (find-package :keyword))
57 ;; cache of tokenbuf structs
58 (defparameter *tokenbufs* (list nil nil nil nil))
61 unget-char ;; character pushed back
62 tokenbuf ;; main input tokenbuf
63 read-sequence-func ;; optional alternative to read-sequence
64 entity-bufs ;; active entity tokenbufs
65 entity-names ;; active entity names
68 do-entity ;; still substituting entity text
71 seen-parameter-reference
79 cur ;; next index to use to grab from tokenbuf
80 max ;; index one beyond last character
81 data ;; character array
82 stream ;; for external sources
85 (defun get-tokenbuf ()
86 (declare (optimize (speed 3) (safety 1)))
88 (mp::without-scheduling
89 (do* ((bufs *tokenbufs* (cdr bufs))
90 (this (car bufs) (car bufs)))
93 then (setf (car bufs) nil)
97 then (setf (tokenbuf-cur buf) 0)
98 (setf (tokenbuf-max buf) 0)
99 (setf (tokenbuf-stream buf) nil)
104 :data (make-array 1024 :element-type 'character)))))
107 next ; next index to set
108 max ; 1+max index to set
112 (defun compute-tag (coll &optional (package *keyword-package*) ns-to-package)
113 (declare (optimize (speed 3) (safety 1)))
114 ;; compute the symbol named by what's in the collector
115 (if* (not ns-to-package)
116 then (excl::intern* (collector-data coll) (collector-next coll) package)
118 (let (new-package (data (collector-data coll)))
119 (if* (and (eq (schar data 0) #\x)
120 (eq (schar data 1) #\m)
121 (eq (schar data 2) #\l)
122 (eq (schar data 3) #\n)
123 (eq (schar data 4) #\s)
124 (or (eq (schar data 5) #\:)
125 (= (collector-next coll) 5)))
126 then ;; putting xmlns: in :none namespace
127 (setf new-package (assoc :none ns-to-package))
128 (when new-package (setf package (rest new-package)))
129 (excl::intern* (collector-data coll) (collector-next coll) package)
131 (let ((colon-index -1)
132 (data (collector-data coll)))
133 (dotimes (i (collector-next coll))
134 (when (eq (schar data i) #\:)
137 (if* (> colon-index -1) then
138 (let ((string1 (make-string colon-index))
140 (dotimes (i colon-index)
141 (setf (schar string1 i) (schar data i)))
142 (setf new-package (assoc string1 ns-to-package :test 'string=))
145 (setf string2 (make-string (- (collector-next coll)
147 (dotimes (i (- (collector-next coll)
149 (setf (schar string2 i)
150 (schar data (+ colon-index 1 i))))
151 (excl::intern string2 (rest new-package))
153 (excl::intern* (collector-data coll)
154 (collector-next coll) package)))
156 (let ((new-package (assoc :none ns-to-package)))
158 (setf package (rest new-package))))
159 (excl::intern* (collector-data coll)
160 (collector-next coll) package)))
164 (defun compute-coll-string (coll)
165 (declare (optimize (speed 3) (safety 1)))
166 ;; return the string that's in the collection
167 (let ((str (make-string (collector-next coll)))
168 (from (collector-data coll)))
169 (dotimes (i (collector-next coll))
170 (setf (schar str i) (schar from i)))
174 (defun grow-and-add (coll ch)
175 (declare (optimize (speed 3) (safety 1)))
176 ;; increase the size of the data portion of the collector and then
177 ;; add the given char at the end
178 (let* ((odata (collector-data coll))
179 (ndata (make-string (* 2 (length odata)))))
180 (dotimes (i (length odata))
181 (setf (schar ndata i) (schar odata i)))
182 (setf (collector-data coll) ndata)
183 (setf (collector-max coll) (length ndata))
184 (let ((next (collector-next coll)))
185 (setf (schar ndata next) ch)
186 (setf (collector-next coll) (1+ next)))))
188 (defun put-back-tokenbuf (buf)
189 (declare (optimize (speed 3) (safety 1)))
190 (mp::without-scheduling
191 (do ((bufs *tokenbufs* (cdr bufs)))
195 (if* (null (car bufs))
196 then (setf (car bufs) buf)
199 (defun get-collector ()
200 (declare (optimize (speed 3) (safety 1)))
202 (mp::without-scheduling
203 (do* ((cols *collectors* (cdr cols))
204 (this (car cols) (car cols)))
207 then (setf (car cols) nil)
211 then (setf (collector-next col) 0)
216 :data (make-string 100)))))
218 (defmacro next-char (tokenbuf read-sequence-func)
219 `(let ((cur (tokenbuf-cur ,tokenbuf))
220 (tb (tokenbuf-data ,tokenbuf)))
221 (if* (>= cur (tokenbuf-max ,tokenbuf))
223 (if* (or (not (tokenbuf-stream ,tokenbuf))
224 (zerop (setf (tokenbuf-max ,tokenbuf)
225 (if* ,read-sequence-func
226 then (funcall ,read-sequence-func tb
227 (tokenbuf-stream ,tokenbuf))
228 else (read-sequence tb (tokenbuf-stream ,tokenbuf))))))
229 then (setq cur nil) ;; eof
233 (let ((cc (schar tb cur)))
234 (if (and (tokenbuf-stream ,tokenbuf) (eq #\return cc)) #\newline cc))
235 (setf (tokenbuf-cur ,tokenbuf) (1+ cur))))))
237 (defun get-next-char (iostruct)
238 (declare (optimize (speed 3) (safety 1)))
239 (let* (from-stream (tmp-char
241 (if* (iostruct-unget-char iostruct) then
242 ;; from-stream is used to do input CR/LF normalization
244 (setf char (first (iostruct-unget-char iostruct)))
245 (setf (iostruct-unget-char iostruct) (rest (iostruct-unget-char iostruct)))
247 elseif (iostruct-entity-bufs iostruct) then
250 (setf entity-buf (first (iostruct-entity-bufs iostruct)))
251 (if* (streamp (tokenbuf-stream entity-buf))
252 then (setf from-stream t)
253 else (setf from-stream nil))
254 (setf char (next-char entity-buf (iostruct-read-sequence-func iostruct)))
256 (when (streamp (tokenbuf-stream entity-buf))
257 (close (tokenbuf-stream entity-buf))
258 (put-back-tokenbuf entity-buf))
259 (setf (iostruct-entity-bufs iostruct) (rest (iostruct-entity-bufs iostruct)))
260 (setf (iostruct-entity-names iostruct) (rest (iostruct-entity-names iostruct)))
261 (when (not (iostruct-entity-bufs iostruct)) (return))))
263 else (next-char (iostruct-tokenbuf iostruct)
264 (iostruct-read-sequence-func iostruct)))
265 else (setf from-stream t)
266 (next-char (iostruct-tokenbuf iostruct)
267 (iostruct-read-sequence-func iostruct))))))
268 (if* (and from-stream (eq tmp-char #\return)) then #\newline else tmp-char)))
270 (defun unicode-check (p tokenbuf)
271 (declare (ignorable tokenbuf) (optimize (speed 3) (safety 1)))
272 ;; need no-OO check because external format support isn't completely done yet
273 (when (not (typep p 'string-input-simple-stream))
274 #+(version>= 6 0 pre-final 1)
275 (let ((format (ignore-errors (excl:sniff-for-unicode p))))
276 (if* (eq format (find-external-format :unicode))
278 (setf (stream-external-format p) format)
280 (setf (stream-external-format p) (find-external-format :utf8))))
281 #-(version>= 6 0 pre-final 1)
282 (let* ((c (read-char p nil)) c2
283 (c-code (if c (char-code c) nil)))
284 (if* (eq #xFF c-code) then
285 (setf c2 (read-char p nil))
286 (setf c-code (if c (char-code c2) nil))
287 (if* (eq #xFE c-code) then
288 (format t "set unicode~%")
289 (setf (stream-external-format p)
290 (find-external-format #+(version>= 6 0 pre-final 1) :unicode
291 #-(version>= 6 0 pre-final 1) :fat-little))
293 (xml-error "stream has incomplete Unicode marker"))
294 else (setf (stream-external-format p)
295 (find-external-format :utf8))
297 (push c (iostruct-unget-char tokenbuf))
298 #+ignore (unread-char c p) ;; bug when there is single ^M in file
301 (defun add-default-values (val attlist-data)
302 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
305 (let* ((tag-defaults (assoc val attlist-data)) defaults)
306 (dolist (def (rest tag-defaults))
307 (if* (stringp (third def)) then
308 (push (first def) defaults)
309 (push (if (eq (second def) :CDATA) (third def)
310 (normalize-attrib-value (third def))) defaults)
311 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
312 (push (first def) defaults)
313 (push (if (eq (second def) :CDATA) (fourth def)
314 (normalize-attrib-value (fourth def))) defaults)
317 (setf val (append (list val) (nreverse defaults)))
321 ;; first make sure there are no errors in given list
322 (let ((pairs (rest val)))
324 (when (null pairs) (return))
325 (let ((this-one (first pairs)))
326 (setf pairs (rest (rest pairs)))
327 (when (member this-one pairs)
328 (xml-error (concatenate 'string "Entity: "
332 " attribute values"))))))
333 (let ((tag-defaults (assoc (first val) attlist-data)) defaults)
334 (dolist (def (rest tag-defaults))
335 (let ((old (member (first def) (rest val))))
337 (if* (stringp (third def)) then
338 (push (first def) defaults)
339 (push (third def) defaults)
340 elseif (and (eq (third def) :FIXED) (stringp (fourth def))) then
341 (push (first def) defaults)
342 (push (fourth def) defaults))
344 (push (first old) defaults)
345 (push (second old) defaults))))
347 ;; now look for attributes in original list that weren't in dtd
348 (let ((tmp-val (rest val)) att att-val)
350 (when (null tmp-val) (return))
351 (setf att (first tmp-val))
352 (setf att-val (second tmp-val))
353 (setf tmp-val (rest (rest tmp-val)))
354 (when (not (member att defaults))
356 (push att-val defaults))))
357 (setf val (append (list (first val)) (nreverse defaults)))
361 (defun normalize-public-value (public-value)
362 (setf public-value (string-trim '(#\space) public-value))
363 (let ((count 0) (stop (length public-value)) (last-ch nil) cch)
365 (when (= count stop) (return public-value))
366 (setf cch (schar public-value count))
367 (if* (and (eq cch #\space) (eq last-ch #\space)) then
369 (remove #\space public-value :start count :count 1))
372 (setf last-ch cch)))))
375 (defun normalize-attrib-value (attrib-value &optional first-pass)
376 (declare (optimize (speed 3) (safety 1)))
378 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
380 (when (= count stop) (return))
381 (setf cch (schar attrib-value count))
382 (if* (or (eq cch #\return) (eq cch #\tab)) then (setf (schar attrib-value count) #\space)
383 elseif (and (eq cch #\newline) (not (eq last-ch #\return))) then
384 (setf (schar attrib-value count) #\space)
385 elseif (and (eq cch #\newline) (eq last-ch #\return)) then
387 (remove #\space attrib-value :start count :count 1))
390 (setf last-ch cch))))
391 (setf attrib-value (string-trim '(#\space) attrib-value))
392 (let ((count 0) (stop (length attrib-value)) (last-ch nil) cch)
394 (when (= count stop) (return attrib-value))
395 (setf cch (schar attrib-value count))
396 (if* (and (eq cch #\space) (eq last-ch #\space)) then
398 (remove #\space attrib-value :start count :count 1))
401 (setf last-ch cch)))))
403 (defun check-xmldecl (val tokenbuf)
404 (declare (ignorable old-coll) (optimize (speed 3) (safety 1)))
405 (when (not (and (symbolp (second val)) (string= "version" (symbol-name (second val)))))
406 (xml-error "XML declaration tag does not include correct 'version' attribute"))
407 (when (and (fourth val)
408 (or (not (symbolp (fourth val)))
409 (and (not (string= "standalone" (symbol-name (fourth val))))
410 (not (string= "encoding" (symbol-name (fourth val)))))))
411 (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute"))
412 (when (and (fourth val) (string= "standalone" (symbol-name (fourth val))))
413 (if* (equal (fifth val) "yes") then
414 (setf (iostruct-standalonep tokenbuf) t)
415 elseif (not (equal (fifth val) "no")) then
416 (xml-error "XML declaration tag does not include correct 'standalone' attribute value")))
417 (dotimes (i (length (third val)))
418 (let ((c (schar (third val) i)))
419 (when (and (not (alpha-char-p c))
420 (not (digit-char-p c))
421 (not (member c '(#\. #\_ #\- #\:)))
423 (xml-error "XML declaration tag does not include correct 'version' attribute value"))))
424 (when (and (fourth val) (eql :encoding (fourth val)))
425 (dotimes (i (length (fifth val)))
426 (let ((c (schar (fifth val) i)))
427 (when (and (not (alpha-char-p c))
429 (and (not (digit-char-p c))
430 (not (member c '(#\. #\_ #\-))))
432 (xml-error "XML declaration tag does not include correct 'encoding' attribute value")))))
435 (defun xml-error (text)
436 (declare (optimize (speed 3) (safety 1)))
437 (funcall 'error "~a" (concatenate 'string "XML not well-formed - " text)))