1 ;; -*- mode: common-lisp; package: puri -*-
2 ;; Support for URIs in Allegro.
3 ;; For general URI information see RFC2396.
5 ;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
6 ;; copyright (c) 2003 Kevin Rosenberg (porting changes)
8 ;; The software, data and information contained herein are proprietary
9 ;; to, and comprise valuable trade secrets of, Franz, Inc. They are
10 ;; given in confidence by Franz, Inc. pursuant to a written license
11 ;; agreement, and may be stored and used only in accordance with the terms
14 ;; Restricted Rights Legend
15 ;; ------------------------
16 ;; Use, duplication, and disclosure of the software, data and information
17 ;; contained herein by any agency, department or entity of the U.S.
18 ;; Government are subject to restrictions of Restricted Rights for
19 ;; Commercial Software developed at private expense as specified in
20 ;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
22 ;; Original version from ACL 6.1:
23 ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
25 ;; $Id: src.lisp,v 1.2 2003/07/18 20:51:37 kevin Exp $
30 #:uri ; the type and a function
34 #:uri-scheme ; and slots
40 #:uri-authority ; pseudo-slot accessor
43 #:urn-nid ; pseudo-slot accessor
44 #:urn-nss ; pseudo-slot accessor
53 #:make-uri-space ; interning...
62 (eval-when (compile) (declaim (optimize (speed 3))))
64 (eval-when (:compile-toplevel :load-toplevel :execute)
65 (defvar if*-keyword-list '("then" "thenret" "else" "elseif")))
68 (define-condition parse-error (error)
73 (defun .parse-error (fmt &rest args)
74 #+allegro (apply #'excl::.parse-error fmt args)
76 (make-condition 'parse-error :format-control fmt
77 :format-arguments args)))
79 (defun internal-reader-error (stream fmt &rest args)
81 (apply #'excl::internal-reader-error stream fmt args)
83 (apply #'format stream
84 "#u takes a string or list argument: ~s" args))
86 #-allegro (defvar *current-case-mode* :case-insensitive-upper)
88 ;; From Larry Hunter with modifications
89 (defun position-char (char string start max)
90 (declare (optimize (speed 3) (safety 0) (space 0))
91 (fixnum start max) (simple-string string))
92 (do* ((i start (1+ i)))
95 (when (char= char (schar string i)) (return i))))
98 (defun delimited-string-to-list (string &optional (separator #\space))
99 (excl:delimited-string-to-list string))
101 (defun delimited-string-to-list (string &optional (separator #\space)
103 (declare (optimize (speed 3) (safety 0) (space 0)
104 (compilation-speed 0))
106 (type character separator))
107 (do* ((len (length string))
110 (end (position-char separator string pos len)
111 (position-char separator string pos len)))
114 (push (subseq string pos) output)
115 (when (or (not skip-terminal) (zerop len))
118 (declare (type fixnum pos len)
119 (type (or null fixnum) end))
120 (push (subseq string pos end) output)
121 (setq pos (1+ end))))
123 (defmacro if* (&rest args)
124 (do ((xx (reverse args) (cdr xx))
131 (cond ((eq state :compl)
133 (t (error "if*: illegal form ~s" args))))
134 (cond ((and (symbolp (car xx))
135 (member (symbol-name (car xx))
137 :test #'string-equal))
138 (setq lookat (symbol-name (car xx)))))
140 (cond ((eq state :init)
141 (cond (lookat (cond ((string-equal lookat "thenret")
145 "if*: bad keyword ~a" lookat))))
148 (push (car xx) col))))
151 (cond ((string-equal lookat "else")
154 "if*: multiples elses")))
157 (push `(t ,@col) totalcol))
158 ((string-equal lookat "then")
160 (t (error "if*: bad keyword ~s"
162 (t (push (car xx) col))))
166 "if*: keyword ~s at the wrong place " (car xx)))
167 (t (setq state :compl)
168 (push `(,(car xx) ,@col) totalcol))))
170 (cond ((not (string-equal lookat "elseif"))
171 (error "if*: missing elseif clause ")))
172 (setq state :init)))))
178 (scheme :initarg :scheme :initform nil :accessor uri-scheme)
179 (host :initarg :host :initform nil :accessor uri-host)
180 (port :initarg :port :initform nil :accessor uri-port)
181 (path :initarg :path :initform nil :accessor uri-path)
182 (query :initarg :query :initform nil :accessor uri-query)
183 (fragment :initarg :fragment :initform nil :accessor uri-fragment)
184 (plist :initarg :plist :initform nil :accessor uri-plist)
188 ;; used to prevent unnessary work, looking for chars to escape and
190 :initarg :escaped :initform nil :accessor uri-escaped)
192 ;; the cached printable representation of the URI. It *might* be
193 ;; different than the original string, though, because the user might
194 ;; have escaped non-reserved chars--they won't be escaped when the URI
196 :initarg :string :initform nil :accessor uri-string)
198 ;; the cached parsed representation of the URI path.
199 :initarg :parsed-path
201 :accessor .uri-parsed-path)
203 ;; cached sxhash, so we don't have to compute it more than once.
204 :initarg :hashcode :initform nil :accessor uri-hashcode)))
207 ((nid :initarg :nid :initform nil :accessor urn-nid)
208 (nss :initarg :nss :initform nil :accessor urn-nss)))
210 (eval-when (compile eval)
211 (defmacro clear-caching-on-slot-change (name)
212 `(defmethod (setf ,name) :around (new-value (self uri))
213 (declare (ignore new-value))
214 (prog1 (call-next-method)
215 (setf (uri-string self) nil)
216 ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
217 (setf (uri-hashcode self) nil))))
220 (clear-caching-on-slot-change uri-scheme)
221 (clear-caching-on-slot-change uri-host)
222 (clear-caching-on-slot-change uri-port)
223 (clear-caching-on-slot-change uri-path)
224 (clear-caching-on-slot-change uri-query)
225 (clear-caching-on-slot-change uri-fragment)
228 (defmethod make-load-form ((self uri) &optional env)
229 (declare (ignore env))
230 `(make-instance ',(class-name (class-of self))
231 :scheme ,(uri-scheme self)
232 :host ,(uri-host self)
233 :port ,(uri-port self)
234 :path ',(uri-path self)
235 :query ,(uri-query self)
236 :fragment ,(uri-fragment self)
237 :plist ',(uri-plist self)
238 :string ,(uri-string self)
239 :parsed-path ',(.uri-parsed-path self)))
241 (defmethod uri-p ((thing uri)) t)
242 (defmethod uri-p ((thing t)) nil)
246 (scheme (when uri (uri-scheme uri)))
247 (host (when uri (uri-host uri)))
248 (port (when uri (uri-port uri)))
249 (path (when uri (uri-path uri)))
251 (when uri (copy-list (.uri-parsed-path uri))))
252 (query (when uri (uri-query uri)))
253 (fragment (when uri (uri-fragment uri)))
254 (plist (when uri (copy-list (uri-plist uri))))
255 (class (when uri (class-of uri)))
256 &aux (escaped (when uri (uri-escaped uri))))
258 then (setf (uri-scheme place) scheme)
259 (setf (uri-host place) host)
260 (setf (uri-port place) port)
261 (setf (uri-path place) path)
262 (setf (.uri-parsed-path place) parsed-path)
263 (setf (uri-query place) query)
264 (setf (uri-fragment place) fragment)
265 (setf (uri-plist place) plist)
266 (setf (uri-escaped place) escaped)
267 (setf (uri-string place) nil)
268 (setf (uri-hashcode place) nil)
270 elseif (eq 'uri class)
271 then ;; allow the compiler to optimize the call to make-instance:
273 :scheme scheme :host host :port port :path path
274 :parsed-path parsed-path
275 :query query :fragment fragment :plist plist
276 :escaped escaped :string nil :hashcode nil)
277 else (make-instance class
278 :scheme scheme :host host :port port :path path
279 :parsed-path parsed-path
280 :query query :fragment fragment :plist plist
281 :escaped escaped :string nil :hashcode nil)))
283 (defmethod uri-parsed-path ((uri uri))
285 (when (null (.uri-parsed-path uri))
286 (setf (.uri-parsed-path uri)
287 (parse-path (uri-path uri) (uri-escaped uri))))
288 (.uri-parsed-path uri)))
290 (defmethod (setf uri-parsed-path) (path-list (uri uri))
291 (assert (and (consp path-list)
292 (or (member (car path-list) '(:absolute :relative)
294 (setf (uri-path uri) (render-parsed-path path-list t))
295 (setf (.uri-parsed-path uri) path-list)
298 (defun uri-authority (uri)
300 (let ((*print-pretty* nil))
301 (format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
304 (if* (equalp "urn" (uri-scheme uri))
306 else (error "URI is not a URN: ~s." uri)))
309 (if* (equalp "urn" (uri-scheme uri))
311 else (error "URI is not a URN: ~s." uri)))
313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
316 (defparameter *excluded-characters*
317 '(;; `delims' (except #\%, because it's handled specially):
318 #\< #\> #\" #\space #\#
320 #\{ #\} #\| #\\ #\^ #\[ #\] #\`))
322 (defun reserved-char-vector (chars &key except)
323 (do* ((a (make-array 127 :element-type 'bit :initial-element 0))
324 (chars chars (cdr chars))
325 (c (car chars) (car chars)))
327 (if* (and except (member c except :test #'char=))
329 else (setf (sbit a (char-int c)) 1))))
331 (defparameter *reserved-characters*
332 (reserved-char-vector
333 (append *excluded-characters*
334 '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
335 (defparameter *reserved-authority-characters*
336 (reserved-char-vector
337 (append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
338 (defparameter *reserved-path-characters*
339 (reserved-char-vector
340 (append *excluded-characters*
342 ;;;;The rfc says this should be here, but it doesn't make sense.
345 (defparameter *reserved-path-characters2*
346 ;; These are the same characters that are in
347 ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
348 ;; representation of the path can contain the %2f converted into a /.
349 ;; That's the whole point of having the parsed representation, so that
350 ;; lisp programs can deal with the path element data in the most
352 (reserved-char-vector
353 (append *excluded-characters*
355 ;;;;The rfc says this should be here, but it doesn't make sense.
358 (defparameter *reserved-fragment-characters*
359 (reserved-char-vector (remove #\# *excluded-characters*)))
361 (eval-when (compile eval)
362 (defun gen-char-range-list (start end)
364 (endcode (1+ (char-int end)))
365 (chcode (char-int start)
369 ;; - has to be first, otherwise it signifies a range!
371 then (setq res (nreverse res))
374 else (nreverse res)))
375 (if* (= #.(char-int #\-) chcode)
377 else (push (code-char chcode) res))))
380 (defparameter *valid-nid-characters*
381 (reserved-char-vector
382 '#.(nconc (gen-char-range-list #\a #\z)
383 (gen-char-range-list #\A #\Z)
384 (gen-char-range-list #\0 #\9)
386 (defparameter *reserved-nss-characters*
387 (reserved-char-vector
388 (append *excluded-characters* '(#\& #\~ #\/ #\?))))
390 (defparameter *illegal-characters*
391 (reserved-char-vector (remove #\# *excluded-characters*)))
392 (defparameter *strict-illegal-query-characters*
393 (reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
394 (defparameter *illegal-query-characters*
395 (reserved-char-vector
396 *excluded-characters* :except '(#\^ #\| #\#)))
399 (defun parse-uri (thing &key (class 'uri) &aux escape)
400 (when (uri-p thing) (return-from parse-uri thing))
402 (setq escape (escape-p thing))
403 (multiple-value-bind (scheme host port path query fragment)
404 (parse-uri-string thing)
408 (case *current-case-mode*
409 ((:case-insensitive-upper :case-sensitive-upper)
411 ((:case-insensitive-lower :case-sensitive-lower)
413 (decode-escaped-encoding scheme escape))
414 (find-package :keyword))))
416 (when (and scheme (eq :urn scheme))
417 (return-from parse-uri
418 (make-instance 'urn :scheme scheme :nid host :nss path)))
420 (when host (setq host (decode-escaped-encoding host escape)))
422 (setq port (read-from-string port))
423 (when (not (numberp port)) (error "port is not a number: ~s." port))
424 (when (not (plusp port))
425 (error "port is not a positive integer: ~d." port))
426 (when (eql port (case scheme
432 (when (or (string= "" path)
433 (and ;; we canonicalize away a reference to just /:
435 (member scheme '(:http :https :ftp) :test #'eq)
440 (decode-escaped-encoding path escape *reserved-path-characters*)))
441 (when query (setq query (decode-escaped-encoding query escape)))
444 (decode-escaped-encoding fragment escape
445 *reserved-fragment-characters*)))
447 then ;; allow the compiler to optimize the make-instance call:
456 else ;; do it the slow way:
466 (defmethod uri ((thing uri))
469 (defmethod uri ((thing string))
472 (defmethod uri ((thing t))
473 (error "Cannot coerce ~s to a uri." thing))
475 (defvar *strict-parse* t)
477 (defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
478 (declare (optimize (speed 3)))
479 ;; Speed is important, so use a specialized state machine instead of
480 ;; regular expressions for parsing the URI string. The regexp we are
489 (end (length string))
494 (path-components '())
497 ;; namespace identifier, for urn parsing only:
499 (declare (fixnum state start end))
500 (flet ((read-token (kind &optional legal-chars)
504 else (let ((sindex start)
507 (declare (fixnum sindex))
510 (when (>= start end) (return nil))
511 (setq c (schar string start))
512 (let ((ci (char-int c)))
514 then (if* (and (eq :colon kind) (eq c #\:))
516 elseif (= 0 (sbit legal-chars ci))
519 URI ~s contains illegal character ~s at position ~d."
521 elseif (and (< ci 128)
523 (= 1 (sbit illegal-chars ci)))
524 then (.parse-error "~
525 URI ~s contains illegal character ~s at position ~d."
529 (#\? (return :question))
530 (#\# (return :hash))))
531 (:query (case c (#\# (return :hash))))
534 (#\: (return :colon))
535 (#\? (return :question))
537 (#\/ (return :slash)))))
539 (if* (> start sindex)
540 then ;; we found some chars
541 ;; before we stopped the parse
542 (setq tokval (subseq string sindex start))
544 else ;; immediately stopped at a special char
547 (failure (&optional why)
548 (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
551 (.parse-error "impossible state: ~d [~s]" state string)))
554 (0 ;; starting to parse
555 (ecase (read-token t)
557 (:question (setq state 7))
558 (:hash (setq state 8))
559 (:slash (setq state 3))
560 (:string (setq state 1))
561 (:end (setq state 9))))
562 (1 ;; seen <token><special char>
563 (let ((token tokval))
564 (ecase (read-token t)
565 (:colon (setq scheme token)
566 (if* (equalp "urn" scheme)
568 else (setq state 2)))
569 (:question (push token path-components)
571 (:hash (push token path-components)
573 (:slash (push token path-components)
574 (push "/" path-components)
577 (:end (push token path-components)
580 (ecase (read-token t)
582 (:question (setq state 7))
583 (:hash (setq state 8))
584 (:slash (setq state 3))
585 (:string (setq state 10))
586 (:end (setq state 9))))
587 (10 ;; seen <scheme>:<token>
588 (let ((token tokval))
589 (ecase (read-token t)
591 (:question (push token path-components)
593 (:hash (push token path-components)
595 (:slash (push token path-components)
598 (:end (push token path-components)
600 (3 ;; seen / or <scheme>:/
601 (ecase (read-token t)
603 (:question (push "/" path-components)
605 (:hash (push "/" path-components)
607 (:slash (setq state 4))
608 (:string (push "/" path-components)
609 (push tokval path-components)
611 (:end (push "/" path-components)
613 (4 ;; seen [<scheme>:]//
614 (ecase (read-token t)
616 (:question (failure))
619 (:string (setq host tokval)
622 (11 ;; seen [<scheme>:]//<host>
623 (ecase (read-token t)
624 (:colon (setq state 5))
625 (:question (setq state 7))
626 (:hash (setq state 8))
627 (:slash (push "/" path-components)
629 (:string (impossible))
630 (:end (setq state 9))))
631 (5 ;; seen [<scheme>:]//<host>:
632 (ecase (read-token t)
634 (:question (failure))
636 (:slash (push "/" path-components)
638 (:string (setq port tokval)
641 (12 ;; seen [<scheme>:]//<host>:[<port>]
642 (ecase (read-token t)
644 (:question (setq state 7))
645 (:hash (setq state 8))
646 (:slash (push "/" path-components)
648 (:string (impossible))
649 (:end (setq state 9))))
651 (ecase (read-token :path)
652 (:question (setq state 7))
653 (:hash (setq state 8))
654 (:string (push tokval path-components)
656 (:end (setq state 9))))
658 (ecase (read-token :path)
659 (:question (setq state 7))
660 (:hash (setq state 8))
661 (:string (impossible))
662 (:end (setq state 9))))
666 then *strict-illegal-query-characters*
667 else *illegal-query-characters*))
668 (ecase (prog1 (read-token :query)
669 (setq illegal-chars *illegal-characters*))
670 (:hash (setq state 8))
671 (:string (setq query tokval)
673 (:end (setq state 9))))
675 (ecase (read-token :query)
676 (:hash (setq state 8))
677 (:string (impossible))
678 (:end (setq state 9))))
680 (ecase (read-token :rest)
681 (:string (setq fragment tokval)
683 (:end (setq state 9))))
688 (apply #'concatenate 'simple-string (nreverse path-components))
691 (15 ;; seen urn:, read nid now
692 (case (read-token :colon *valid-nid-characters*)
693 (:string (setq nid tokval)
695 (t (failure "missing namespace identifier"))))
696 (16 ;; seen urn:<nid>
698 (:colon (setq state 17))
699 (t (failure "missing namespace specific string"))))
700 (17 ;; seen urn:<nid>:, rest is nss
701 (return (values scheme
705 (setq illegal-chars *reserved-nss-characters*)
709 "internal error in parse engine, wrong state: ~s." state)))))))
711 (defun escape-p (string)
712 (declare (optimize (speed 3)))
714 (max (the fixnum (length string))))
716 (declare (fixnum i max))
717 (when (char= #\% (schar string i))
720 (defun parse-path (path-string escape)
721 (do* ((xpath-list (delimited-string-to-list path-string #\/))
724 (if* (string= "" (car xpath-list))
725 then (setf (car xpath-list) :absolute)
726 else (push :relative xpath-list))
728 (pl (cdr path-list) (cdr pl))
730 ((null pl) path-list)
731 (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
732 then ;; there is a param
733 ;;; (setf (car pl) segments)
735 (mapcar #'(lambda (s)
736 (decode-escaped-encoding
737 s escape *reserved-path-characters2*))
740 ;;; (setf (car pl) (car segments))
742 (decode-escaped-encoding
743 (car segments) escape *reserved-path-characters2*)))))
745 (defun decode-escaped-encoding (string escape
746 &optional (reserved-chars
747 *reserved-characters*))
748 ;; Return a string with the real characters.
749 (when (null escape) (return-from decode-escaped-encoding string))
751 (max (length string))
752 (new-string (copy-seq string))
757 (excl::.primcall 'sys::shrink-svector new-string new-i)
759 (sb-kernel:shrink-vector new-string new-i)
761 (subseq new-string 0 new-i)
763 (if* (char= #\% (setq ch (schar string i)))
764 then (when (> (+ i 3) max)
766 "Unsyntactic escaped encoding in ~s." string))
767 (setq ch (schar string (incf i)))
768 (setq ch2 (schar string (incf i)))
769 (when (not (and (setq chc (digit-char-p ch 16))
770 (setq chc2 (digit-char-p ch2 16))))
772 "Non-hexidecimal digits after %: %c%c." ch ch2))
773 (let ((ci (+ (* 16 chc) chc2)))
774 (if* (or (null reserved-chars)
775 (= 0 (sbit reserved-chars ci)))
777 (setf (schar new-string new-i)
779 else (setf (schar new-string new-i) #\%)
780 (setf (schar new-string (incf new-i)) ch)
781 (setf (schar new-string (incf new-i)) ch2)))
782 else (setf (schar new-string new-i) ch))))
784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
787 (defun render-uri (uri stream
788 &aux (escape (uri-escaped uri))
789 (*print-pretty* nil))
790 (when (null (uri-string uri))
791 (setf (uri-string uri)
792 (let ((scheme (uri-scheme uri))
793 (host (uri-host uri))
794 (port (uri-port uri))
795 (path (uri-path uri))
796 (query (uri-query uri))
797 (fragment (uri-fragment uri)))
798 (concatenate 'simple-string
800 (encode-escaped-encoding
801 (string-downcase ;; for upper case lisps
802 (symbol-name scheme))
803 *reserved-characters* escape))
807 (encode-escaped-encoding
808 host *reserved-authority-characters* escape))
811 ;;;; too slow until ACL 6.0:
812 ;;; (format nil "~d" port)
813 ;;; (princ-to-string port)
814 #-allegro (princ-to-string port)
816 (with-output-to-string (s)
817 (excl::maybe-print-fast s port))
820 (encode-escaped-encoding path
822 ;;*reserved-path-characters*
825 (when query (encode-escaped-encoding query nil escape))
827 (when fragment (encode-escaped-encoding fragment nil escape))))))
829 then (format stream "~a" (uri-string uri))
830 else (uri-string uri)))
832 (defun render-parsed-path (path-list escape)
834 (first (car path-list))
835 (pl (cdr path-list) (cdr pl))
836 (pe (car pl) (car pl)))
838 (when res (apply #'concatenate 'simple-string (nreverse res))))
839 (when (or (null first)
840 (prog1 (eq :absolute first)
845 (encode-escaped-encoding pe *reserved-path-characters* escape)
847 else ;; contains params
848 (push (encode-escaped-encoding
849 (car pe) *reserved-path-characters* escape)
851 (dolist (item (cdr pe))
853 (push (encode-escaped-encoding
854 item *reserved-path-characters* escape)
857 (defun render-urn (urn stream
858 &aux (*print-pretty* nil))
859 (when (null (uri-string urn))
860 (setf (uri-string urn)
861 (let ((nid (urn-nid urn))
863 (concatenate 'simple-string "urn:" nid ":" nss))))
865 then (format stream "~a" (uri-string urn))
866 else (uri-string urn)))
868 (defparameter *escaped-encoding*
869 (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
871 (defun encode-escaped-encoding (string reserved-chars escape)
872 (when (null escape) (return-from encode-escaped-encoding string))
873 ;; Make a string as big as it possibly needs to be (3 times the original
874 ;; size), and truncate it at the end.
875 (do* ((max (length string))
876 (new-max (* 3 max)) ;; worst case new size
877 (new-string (make-string new-max))
883 (excl::.primcall 'sys::shrink-svector new-string (incf new-i))
885 (sb-kernel:shrink-vector new-string (incf new-i))
887 (subseq new-string 0 (incf new-i))
889 (setq ci (char-int (setq c (schar string i))))
890 (if* (or (null reserved-chars)
892 (= 0 (sbit reserved-chars ci)))
895 (setf (schar new-string new-i) c)
896 else ;; need to escape it
897 (multiple-value-bind (q r) (truncate ci 16)
898 (setf (schar new-string (incf new-i)) #\%)
899 (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
900 (setf (schar new-string (incf new-i))
901 (elt *escaped-encoding* r))))))
903 (defmethod print-object ((uri uri) stream)
905 then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
906 else (render-uri uri stream)))
908 (defmethod print-object ((urn urn) stream)
910 then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
911 else (render-urn urn stream)))
913 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
914 ;; merging and unmerging
916 (defmethod merge-uris ((uri string) (base string) &optional place)
917 (merge-uris (parse-uri uri) (parse-uri base) place))
919 (defmethod merge-uris ((uri uri) (base string) &optional place)
920 (merge-uris uri (parse-uri base) place))
922 (defmethod merge-uris ((uri string) (base uri) &optional place)
923 (merge-uris (parse-uri uri) base place))
925 (defmethod merge-uris ((uri uri) (base uri) &optional place)
926 ;; The following is from
927 ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
928 ;; and is algorithm we use to merge URIs.
930 ;; For more information, see section 5.2 of the RFC.
934 (when (and (null (uri-parsed-path uri))
935 (null (uri-scheme uri))
936 (null (uri-host uri))
937 (null (uri-port uri))
938 (null (uri-query uri)))
939 (return-from merge-uris
940 (let ((new (copy-uri base :place place)))
941 (when (uri-query uri)
942 (setf (uri-query new) (uri-query uri)))
943 (when (uri-fragment uri)
944 (setf (uri-fragment new) (uri-fragment uri)))
947 (setq uri (copy-uri uri :place place))
950 (when (uri-scheme uri)
951 (return-from merge-uris uri))
952 (setf (uri-scheme uri) (uri-scheme base))
955 (when (uri-host uri) (go :done))
956 (setf (uri-host uri) (uri-host base))
957 (setf (uri-port uri) (uri-port base))
960 (let ((p (uri-parsed-path uri)))
961 (when (and p (eq :absolute (car p)))
962 (when (equal '(:absolute "") p)
963 ;; Canonicalize the way parsing does:
964 (setf (uri-path uri) nil))
969 (or (uri-parsed-path base)
970 ;; needed because we canonicalize away a path of just `/':
972 (path (uri-parsed-path uri))
974 (when (not (eq :absolute (car base-path)))
975 (error "Cannot merge ~a and ~a, since latter is not absolute."
980 (append (butlast base-path)
981 (if* path then (cdr path) else '(""))))
984 (let ((last (last new-path-list)))
985 (if* (atom (car last))
986 then (when (string= "." (car last))
987 (setf (car last) ""))
988 else (when (string= "." (caar last))
989 (setf (caar last) ""))))
991 (delete "." new-path-list :test #'(lambda (a b)
997 (let ((npl (cdr new-path-list))
1000 (string= ".." (let ((l (car (last npl))))
1007 :test #'(lambda (a b)
1012 (when (null index) (return))
1014 ;; The RFC says, in 6g, "that the implementation may handle
1015 ;; this error by retaining these components in the resolved
1016 ;; path, by removing them from the resolved path, or by
1017 ;; avoiding traversal of the reference." The examples in C.2
1018 ;; imply that we should do the first thing (retain them), so
1019 ;; that's what we'll do.
1022 then (setq npl (cddr npl))
1024 (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
1025 (setf (cdr tmp) (cdddr tmp))))
1026 (setf (cdr new-path-list) npl)
1027 (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
1030 ;; don't complain if new-path-list starts with `..'. See comment
1031 ;; above about this step.
1034 (when (or (equal '(:absolute "") new-path-list)
1035 (equal '(:absolute) new-path-list))
1036 (setq new-path-list nil))
1037 (setf (uri-path uri)
1038 (render-parsed-path new-path-list
1039 ;; don't know, so have to assume:
1044 (return-from merge-uris uri)))
1046 (defmethod enough-uri ((uri string) (base string) &optional place)
1047 (enough-uri (parse-uri uri) (parse-uri base) place))
1049 (defmethod enough-uri ((uri uri) (base string) &optional place)
1050 (enough-uri uri (parse-uri base) place))
1052 (defmethod enough-uri ((uri string) (base uri) &optional place)
1053 (enough-uri (parse-uri uri) base place))
1055 (defmethod enough-uri ((uri uri) (base uri) &optional place)
1056 (let ((new-scheme nil)
1059 (new-parsed-path nil))
1061 (when (or (and (uri-scheme uri)
1062 (not (equalp (uri-scheme uri) (uri-scheme base))))
1064 (not (equalp (uri-host uri) (uri-host base))))
1065 (not (equalp (uri-port uri) (uri-port base))))
1066 (return-from enough-uri uri))
1068 (when (null (uri-host uri))
1069 (setq new-host (uri-host base)))
1070 (when (null (uri-port uri))
1071 (setq new-port (uri-port base)))
1073 (when (null (uri-scheme uri))
1074 (setq new-scheme (uri-scheme base)))
1076 ;; Now, for the hard one, path.
1077 ;; We essentially do here what enough-namestring does.
1078 (do* ((base-path (uri-parsed-path base))
1079 (path (uri-parsed-path uri))
1080 (bp base-path (cdr bp))
1082 ((or (null bp) (null p))
1083 ;; If p is nil, that means we have something like
1084 ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
1085 ;; new-parsed-path will be nil.
1087 (setq new-parsed-path (copy-list p))
1088 (when (not (symbolp (car new-parsed-path)))
1089 (push :relative new-parsed-path))))
1090 (if* (equal (car bp) (car p))
1092 else (setq new-parsed-path (copy-list p))
1093 (when (not (symbolp (car new-parsed-path)))
1094 (push :relative new-parsed-path))
1098 (when new-parsed-path
1099 (render-parsed-path new-parsed-path
1100 ;; don't know, so have to assume:
1102 (new-query (uri-query uri))
1103 (new-fragment (uri-fragment uri))
1104 (new-plist (copy-list (uri-plist uri))))
1105 (if* (and (null new-scheme)
1109 (null new-parsed-path)
1111 (null new-fragment))
1112 then ;; can't have a completely empty uri!
1114 :class (class-of uri)
1119 :class (class-of uri)
1125 :parsed-path new-parsed-path
1127 :fragment new-fragment
1128 :plist new-plist)))))
1130 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1131 ;; support for interning URIs
1133 (defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
1135 (apply #'make-hash-table :size size
1136 :hash-function 'uri-hash
1137 :test 'uri= :values nil keys)
1139 (apply #'make-hash-table :size size keys))
1141 (defun gethash-uri (uri table)
1142 #+allegro (gethash uri table)
1144 (let* ((hash (uri-hash uri))
1145 (existing (gethash hash table)))
1146 (dolist (u existing)
1148 (return-from gethash-uri (values u t))))
1151 (defun puthash-uri (uri table)
1152 #+allegro (excl:puthash-key uri table)
1154 (let ((existing (gethash (uri-hash uri) table)))
1155 (dolist (u existing)
1157 (return-from puthash-uri u)))
1158 (setf (gethash (uri-hash uri) table)
1159 (cons uri existing))
1163 (defun uri-hash (uri)
1164 (if* (uri-hashcode uri)
1166 else (setf (uri-hashcode uri)
1169 (render-uri uri nil)
1172 (render-uri uri nil))))))
1174 (defvar *uris* (make-uri-space))
1176 (defun uri-space () *uris*)
1178 (defun (setf uri-space) (new-val)
1179 (setq *uris* new-val))
1181 ;; bootstrapping (uri= changed from function to method):
1182 (when (fboundp 'uri=) (fmakunbound 'uri=))
1184 (defmethod uri= ((uri1 uri) (uri2 uri))
1185 (when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
1186 (return-from uri= nil))
1187 ;; RFC2396 says: a URL with an explicit ":port", where the port is
1188 ;; the default for the scheme, is the equivalent to one where the
1189 ;; port is elided. Hmmmm. This means that this function has to be
1190 ;; scheme dependent. Grrrr.
1191 (let ((default-port (case (uri-scheme uri1)
1196 (and (equalp (uri-host uri1) (uri-host uri2))
1197 (eql (or (uri-port uri1) default-port)
1198 (or (uri-port uri2) default-port))
1199 (string= (uri-path uri1) (uri-path uri2))
1200 (string= (uri-query uri1) (uri-query uri2))
1201 (string= (uri-fragment uri1) (uri-fragment uri2)))))
1203 (defmethod uri= ((urn1 urn) (urn2 urn))
1204 (when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
1205 (return-from uri= nil))
1206 (and (equalp (urn-nid urn1) (urn-nid urn2))
1207 (urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
1209 (defun urn-nss-equal (nss1 nss2 &aux len)
1210 ;; Return t iff the nss values are the same.
1211 ;; %2c and %2C are equivalent.
1212 (when (or (null nss1) (null nss2)
1213 (not (= (setq len (length nss1))
1215 (return-from urn-nss-equal nil))
1220 (setq c1 (schar nss1 i))
1221 (setq c2 (schar nss2 i))
1224 (if* (and (char= #\% c1) (char= #\% c2))
1225 then (setq state :percent+1)
1226 elseif (char/= c1 c2)
1229 (when (char-not-equal c1 c2) (return nil))
1230 (setq state :percent+2))
1232 (when (char-not-equal c1 c2) (return nil))
1233 (setq state :char)))))
1235 (defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
1236 (let ((uri (gethash-uri xuri uri-space)))
1239 else (puthash-uri xuri uri-space))))
1241 (defmethod intern-uri ((uri string) &optional (uri-space *uris*))
1242 (intern-uri (parse-uri uri) uri-space))
1244 (defun unintern-uri (uri &optional (uri-space *uris*))
1246 then (clrhash uri-space)
1248 then (remhash uri uri-space)
1249 else (error "bad uri: ~s." uri)))
1251 (defmacro do-all-uris ((var &optional uri-space result-form)
1254 "do-all-uris (var [[uri-space] result-form])
1255 {declaration}* {tag | statement}*
1256 Executes the forms once for each uri with var bound to the current uri"
1259 (g-uri-space (gensym))
1260 (body #+allegro (third (excl::parse-body forms env))
1262 `(let ((,g-uri-space (or ,uri-space *uris*)))
1264 (flet ((,f (,var &optional ,g-ignore)
1265 (declare (ignore-if-unused ,var ,g-ignore))
1267 (maphash #',f ,g-uri-space))
1268 (return ,result-form)))))
1270 (defun sharp-u (stream chr arg)
1271 (declare (ignore chr arg))
1272 (let ((arg (read stream nil nil t)))
1276 then (parse-uri arg)
1279 (internal-reader-error
1281 "#u takes a string or list argument: ~s" arg)))))
1286 (locally (declare (special std-lisp-readtable))
1287 (let ((*readtable* std-lisp-readtable))
1288 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
1290 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)
1292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1298 ;; (don't run under emacs with M-x fi:common-lisp)
1301 (defun time-uri-module ()
1302 (declare (optimize (speed 3) (safety 0) (debug 0)))
1303 (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
1304 (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
1305 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1306 (format t "~&;;; starting timing testing 1...~%")
1307 (time (dotimes (i 100000) (parse-uri uri)))
1309 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1310 (format t "~&;;; starting timing testing 2...~%")
1311 (let ((uri (parse-uri uri)))
1312 (time (dotimes (i 100000)
1313 ;; forces no caching of the printed representation:
1314 (setf (uri-string uri) nil)
1315 (format nil "~a" uri))))
1317 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1318 (format t "~&;;; starting timing testing 3...~%")
1321 (dotimes (i 100000) (parse-uri uri2))
1322 (let ((uri (parse-uri uri)))
1324 ;; forces no caching of the printed representation:
1325 (setf (uri-string uri) nil)
1326 (format nil "~a" uri)))))))
1328 ;;******** reference output (ultra, modified 5.0.1):
1329 ;;; starting timing testing 1...
1330 ; cpu time (non-gc) 13,710 msec user, 0 msec system
1331 ; cpu time (gc) 600 msec user, 10 msec system
1332 ; cpu time (total) 14,310 msec user, 10 msec system
1333 ; real time 14,465 msec
1335 ; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
1336 ;;; starting timing testing 2...
1337 ; cpu time (non-gc) 27,500 msec user, 0 msec system
1338 ; cpu time (gc) 280 msec user, 20 msec system
1339 ; cpu time (total) 27,780 msec user, 20 msec system
1340 ; real time 27,897 msec
1342 ; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1343 ;;; starting timing testing 3...
1344 ; cpu time (non-gc) 52,290 msec user, 10 msec system
1345 ; cpu time (gc) 1,290 msec user, 30 msec system
1346 ; cpu time (total) 53,580 msec user, 40 msec system
1347 ; real time 54,062 msec
1349 ; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
1351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1352 ;;; after improving decode-escaped-encoding/encode-escaped-encoding:
1354 ;;; starting timing testing 1...
1355 ; cpu time (non-gc) 14,520 msec user, 0 msec system
1356 ; cpu time (gc) 400 msec user, 0 msec system
1357 ; cpu time (total) 14,920 msec user, 0 msec system
1358 ; real time 15,082 msec
1360 ; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
1361 ;;; starting timing testing 2...
1362 ; cpu time (non-gc) 27,490 msec user, 10 msec system
1363 ; cpu time (gc) 300 msec user, 0 msec system
1364 ; cpu time (total) 27,790 msec user, 10 msec system
1365 ; real time 28,025 msec
1367 ; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1368 ;;; starting timing testing 3...
1369 ; cpu time (non-gc) 47,900 msec user, 20 msec system
1370 ; cpu time (gc) 920 msec user, 10 msec system
1371 ; cpu time (total) 48,820 msec user, 30 msec system
1372 ; real time 49,188 msec
1374 ; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes