1 ;; -*- mode: common-lisp; package: puri -*-
2 ;; Support for URIs in Allegro.
3 ;; For general URI information see RFC2396.
5 ;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
6 ;; copyright (c) 2003 Kevin Rosenberg (porting changes)
8 ;; The software, data and information contained herein are proprietary
9 ;; to, and comprise valuable trade secrets of, Franz, Inc. They are
10 ;; given in confidence by Franz, Inc. pursuant to a written license
11 ;; agreement, and may be stored and used only in accordance with the terms
14 ;; Restricted Rights Legend
15 ;; ------------------------
16 ;; Use, duplication, and disclosure of the software, data and information
17 ;; contained herein by any agency, department or entity of the U.S.
18 ;; Government are subject to restrictions of Restricted Rights for
19 ;; Commercial Software developed at private expense as specified in
20 ;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
22 ;; Original version from ACL 6.1:
23 ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
25 ;; $Id: src.lisp,v 1.5 2003/07/19 13:34:12 kevin Exp $
30 #:uri ; the type and a function
34 #:uri-scheme ; and slots
40 #:uri-authority ; pseudo-slot accessor
43 #:urn-nid ; pseudo-slot accessor
44 #:urn-nss ; pseudo-slot accessor
53 #:make-uri-space ; interning...
62 (eval-when (compile) (declaim (optimize (speed 3))))
66 #-(or allegro lispworks)
67 (define-condition parse-error (error) ())
69 (defun shrink-vector (str size)
71 (excl::.primcall 'sys::shrink-svector str size)
73 (sb-kernel:shrink-vector str size)
75 (lisp::shrink-vector str size)
77 (system::shrink-vector$vector str size)
78 #+(or allegro cmu sbcl lispworks)
80 #-(or allegro cmu sbcl lispworks)
81 (subseq new-string 0 (incf new-i)))
84 (defun .parse-error (fmt &rest args)
85 #+allegro (apply #'excl::.parse-error fmt args)
87 (make-condition 'parse-error :format-control fmt
88 :format-arguments args)))
90 (defun internal-reader-error (stream fmt &rest args)
92 (apply #'excl::internal-reader-error stream fmt args)
94 (apply #'format stream
95 "#u takes a string or list argument: ~s" args))
97 #-allegro (defvar *current-case-mode* :case-insensitive-upper)
98 #+allegro (eval-when (compile load eval)
99 (import '(excl:*current-case-mode*
100 excl:delimited-string-to-list
104 (defun position-char (char string start max)
105 (declare (optimize (speed 3) (safety 0) (space 0))
106 (fixnum start max) (simple-string string))
107 (do* ((i start (1+ i)))
110 (when (char= char (schar string i)) (return i))))
113 (defun delimited-string-to-list (string &optional (separator #\space)
115 (declare (optimize (speed 3) (safety 0) (space 0)
116 (compilation-speed 0))
118 (type character separator))
119 (do* ((len (length string))
122 (end (position-char separator string pos len)
123 (position-char separator string pos len)))
126 (push (subseq string pos) output)
127 (when (or (not skip-terminal) (zerop len))
130 (declare (type fixnum pos len)
131 (type (or null fixnum) end))
132 (push (subseq string pos end) output)
133 (setq pos (1+ end))))
136 (eval-when (:compile-toplevel :load-toplevel :execute)
137 (defvar if*-keyword-list '("then" "thenret" "else" "elseif"))
139 (defmacro if* (&rest args)
140 (do ((xx (reverse args) (cdr xx))
147 (cond ((eq state :compl)
149 (t (error "if*: illegal form ~s" args))))
150 (cond ((and (symbolp (car xx))
151 (member (symbol-name (car xx))
153 :test #'string-equal))
154 (setq lookat (symbol-name (car xx)))))
156 (cond ((eq state :init)
157 (cond (lookat (cond ((string-equal lookat "thenret")
161 "if*: bad keyword ~a" lookat))))
164 (push (car xx) col))))
167 (cond ((string-equal lookat "else")
170 "if*: multiples elses")))
173 (push `(t ,@col) totalcol))
174 ((string-equal lookat "then")
176 (t (error "if*: bad keyword ~s"
178 (t (push (car xx) col))))
182 "if*: keyword ~s at the wrong place " (car xx)))
183 (t (setq state :compl)
184 (push `(,(car xx) ,@col) totalcol))))
186 (cond ((not (string-equal lookat "elseif"))
187 (error "if*: missing elseif clause ")))
188 (setq state :init))))))
194 (scheme :initarg :scheme :initform nil :accessor uri-scheme)
195 (host :initarg :host :initform nil :accessor uri-host)
196 (port :initarg :port :initform nil :accessor uri-port)
197 (path :initarg :path :initform nil :accessor uri-path)
198 (query :initarg :query :initform nil :accessor uri-query)
199 (fragment :initarg :fragment :initform nil :accessor uri-fragment)
200 (plist :initarg :plist :initform nil :accessor uri-plist)
204 ;; used to prevent unnessary work, looking for chars to escape and
206 :initarg :escaped :initform nil :accessor uri-escaped)
208 ;; the cached printable representation of the URI. It *might* be
209 ;; different than the original string, though, because the user might
210 ;; have escaped non-reserved chars--they won't be escaped when the URI
212 :initarg :string :initform nil :accessor uri-string)
214 ;; the cached parsed representation of the URI path.
215 :initarg :parsed-path
217 :accessor .uri-parsed-path)
219 ;; cached sxhash, so we don't have to compute it more than once.
220 :initarg :hashcode :initform nil :accessor uri-hashcode)))
223 ((nid :initarg :nid :initform nil :accessor urn-nid)
224 (nss :initarg :nss :initform nil :accessor urn-nss)))
226 (eval-when (compile eval)
227 (defmacro clear-caching-on-slot-change (name)
228 `(defmethod (setf ,name) :around (new-value (self uri))
229 (declare (ignore new-value))
230 (prog1 (call-next-method)
231 (setf (uri-string self) nil)
232 ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
233 (setf (uri-hashcode self) nil))))
236 (clear-caching-on-slot-change uri-scheme)
237 (clear-caching-on-slot-change uri-host)
238 (clear-caching-on-slot-change uri-port)
239 (clear-caching-on-slot-change uri-path)
240 (clear-caching-on-slot-change uri-query)
241 (clear-caching-on-slot-change uri-fragment)
244 (defmethod make-load-form ((self uri) &optional env)
245 (declare (ignore env))
246 `(make-instance ',(class-name (class-of self))
247 :scheme ,(uri-scheme self)
248 :host ,(uri-host self)
249 :port ,(uri-port self)
250 :path ',(uri-path self)
251 :query ,(uri-query self)
252 :fragment ,(uri-fragment self)
253 :plist ',(uri-plist self)
254 :string ,(uri-string self)
255 :parsed-path ',(.uri-parsed-path self)))
257 (defmethod uri-p ((thing uri)) t)
258 (defmethod uri-p ((thing t)) nil)
262 (scheme (when uri (uri-scheme uri)))
263 (host (when uri (uri-host uri)))
264 (port (when uri (uri-port uri)))
265 (path (when uri (uri-path uri)))
267 (when uri (copy-list (.uri-parsed-path uri))))
268 (query (when uri (uri-query uri)))
269 (fragment (when uri (uri-fragment uri)))
270 (plist (when uri (copy-list (uri-plist uri))))
271 (class (when uri (class-of uri)))
272 &aux (escaped (when uri (uri-escaped uri))))
274 then (setf (uri-scheme place) scheme)
275 (setf (uri-host place) host)
276 (setf (uri-port place) port)
277 (setf (uri-path place) path)
278 (setf (.uri-parsed-path place) parsed-path)
279 (setf (uri-query place) query)
280 (setf (uri-fragment place) fragment)
281 (setf (uri-plist place) plist)
282 (setf (uri-escaped place) escaped)
283 (setf (uri-string place) nil)
284 (setf (uri-hashcode place) nil)
286 elseif (eq 'uri class)
287 then ;; allow the compiler to optimize the call to make-instance:
289 :scheme scheme :host host :port port :path path
290 :parsed-path parsed-path
291 :query query :fragment fragment :plist plist
292 :escaped escaped :string nil :hashcode nil)
293 else (make-instance class
294 :scheme scheme :host host :port port :path path
295 :parsed-path parsed-path
296 :query query :fragment fragment :plist plist
297 :escaped escaped :string nil :hashcode nil)))
299 (defmethod uri-parsed-path ((uri uri))
301 (when (null (.uri-parsed-path uri))
302 (setf (.uri-parsed-path uri)
303 (parse-path (uri-path uri) (uri-escaped uri))))
304 (.uri-parsed-path uri)))
306 (defmethod (setf uri-parsed-path) (path-list (uri uri))
307 (assert (and (consp path-list)
308 (or (member (car path-list) '(:absolute :relative)
310 (setf (uri-path uri) (render-parsed-path path-list t))
311 (setf (.uri-parsed-path uri) path-list)
314 (defun uri-authority (uri)
316 (let ((*print-pretty* nil))
317 (format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
320 (if* (equalp "urn" (uri-scheme uri))
322 else (error "URI is not a URN: ~s." uri)))
325 (if* (equalp "urn" (uri-scheme uri))
327 else (error "URI is not a URN: ~s." uri)))
329 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
332 (defparameter *excluded-characters*
333 '(;; `delims' (except #\%, because it's handled specially):
334 #\< #\> #\" #\space #\#
336 #\{ #\} #\| #\\ #\^ #\[ #\] #\`))
338 (defun reserved-char-vector (chars &key except)
339 (do* ((a (make-array 127 :element-type 'bit :initial-element 0))
340 (chars chars (cdr chars))
341 (c (car chars) (car chars)))
343 (if* (and except (member c except :test #'char=))
345 else (setf (sbit a (char-int c)) 1))))
347 (defparameter *reserved-characters*
348 (reserved-char-vector
349 (append *excluded-characters*
350 '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
351 (defparameter *reserved-authority-characters*
352 (reserved-char-vector
353 (append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
354 (defparameter *reserved-path-characters*
355 (reserved-char-vector
356 (append *excluded-characters*
358 ;;;;The rfc says this should be here, but it doesn't make sense.
361 (defparameter *reserved-path-characters2*
362 ;; These are the same characters that are in
363 ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
364 ;; representation of the path can contain the %2f converted into a /.
365 ;; That's the whole point of having the parsed representation, so that
366 ;; lisp programs can deal with the path element data in the most
368 (reserved-char-vector
369 (append *excluded-characters*
371 ;;;;The rfc says this should be here, but it doesn't make sense.
374 (defparameter *reserved-fragment-characters*
375 (reserved-char-vector (remove #\# *excluded-characters*)))
377 (eval-when (compile eval)
378 (defun gen-char-range-list (start end)
380 (endcode (1+ (char-int end)))
381 (chcode (char-int start)
385 ;; - has to be first, otherwise it signifies a range!
387 then (setq res (nreverse res))
390 else (nreverse res)))
391 (if* (= #.(char-int #\-) chcode)
393 else (push (code-char chcode) res))))
396 (defparameter *valid-nid-characters*
397 (reserved-char-vector
398 '#.(nconc (gen-char-range-list #\a #\z)
399 (gen-char-range-list #\A #\Z)
400 (gen-char-range-list #\0 #\9)
402 (defparameter *reserved-nss-characters*
403 (reserved-char-vector
404 (append *excluded-characters* '(#\& #\~ #\/ #\?))))
406 (defparameter *illegal-characters*
407 (reserved-char-vector (remove #\# *excluded-characters*)))
408 (defparameter *strict-illegal-query-characters*
409 (reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
410 (defparameter *illegal-query-characters*
411 (reserved-char-vector
412 *excluded-characters* :except '(#\^ #\| #\#)))
415 (defun parse-uri (thing &key (class 'uri) &aux escape)
416 (when (uri-p thing) (return-from parse-uri thing))
418 (setq escape (escape-p thing))
419 (multiple-value-bind (scheme host port path query fragment)
420 (parse-uri-string thing)
424 (case *current-case-mode*
425 ((:case-insensitive-upper :case-sensitive-upper)
427 ((:case-insensitive-lower :case-sensitive-lower)
429 (decode-escaped-encoding scheme escape))
430 (find-package :keyword))))
432 (when (and scheme (eq :urn scheme))
433 (return-from parse-uri
434 (make-instance 'urn :scheme scheme :nid host :nss path)))
436 (when host (setq host (decode-escaped-encoding host escape)))
438 (setq port (read-from-string port))
439 (when (not (numberp port)) (error "port is not a number: ~s." port))
440 (when (not (plusp port))
441 (error "port is not a positive integer: ~d." port))
442 (when (eql port (case scheme
448 (when (or (string= "" path)
449 (and ;; we canonicalize away a reference to just /:
451 (member scheme '(:http :https :ftp) :test #'eq)
456 (decode-escaped-encoding path escape *reserved-path-characters*)))
457 (when query (setq query (decode-escaped-encoding query escape)))
460 (decode-escaped-encoding fragment escape
461 *reserved-fragment-characters*)))
463 then ;; allow the compiler to optimize the make-instance call:
472 else ;; do it the slow way:
482 (defmethod uri ((thing uri))
485 (defmethod uri ((thing string))
488 (defmethod uri ((thing t))
489 (error "Cannot coerce ~s to a uri." thing))
491 (defvar *strict-parse* t)
493 (defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
494 (declare (optimize (speed 3)))
495 ;; Speed is important, so use a specialized state machine instead of
496 ;; regular expressions for parsing the URI string. The regexp we are
505 (end (length string))
510 (path-components '())
513 ;; namespace identifier, for urn parsing only:
515 (declare (fixnum state start end))
516 (flet ((read-token (kind &optional legal-chars)
520 else (let ((sindex start)
523 (declare (fixnum sindex))
526 (when (>= start end) (return nil))
527 (setq c (schar string start))
528 (let ((ci (char-int c)))
530 then (if* (and (eq :colon kind) (eq c #\:))
532 elseif (= 0 (sbit legal-chars ci))
535 URI ~s contains illegal character ~s at position ~d."
537 elseif (and (< ci 128)
539 (= 1 (sbit illegal-chars ci)))
540 then (.parse-error "~
541 URI ~s contains illegal character ~s at position ~d."
545 (#\? (return :question))
546 (#\# (return :hash))))
547 (:query (case c (#\# (return :hash))))
550 (#\: (return :colon))
551 (#\? (return :question))
553 (#\/ (return :slash)))))
555 (if* (> start sindex)
556 then ;; we found some chars
557 ;; before we stopped the parse
558 (setq tokval (subseq string sindex start))
560 else ;; immediately stopped at a special char
563 (failure (&optional why)
564 (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
567 (.parse-error "impossible state: ~d [~s]" state string)))
570 (0 ;; starting to parse
571 (ecase (read-token t)
573 (:question (setq state 7))
574 (:hash (setq state 8))
575 (:slash (setq state 3))
576 (:string (setq state 1))
577 (:end (setq state 9))))
578 (1 ;; seen <token><special char>
579 (let ((token tokval))
580 (ecase (read-token t)
581 (:colon (setq scheme token)
582 (if* (equalp "urn" scheme)
584 else (setq state 2)))
585 (:question (push token path-components)
587 (:hash (push token path-components)
589 (:slash (push token path-components)
590 (push "/" path-components)
593 (:end (push token path-components)
596 (ecase (read-token t)
598 (:question (setq state 7))
599 (:hash (setq state 8))
600 (:slash (setq state 3))
601 (:string (setq state 10))
602 (:end (setq state 9))))
603 (10 ;; seen <scheme>:<token>
604 (let ((token tokval))
605 (ecase (read-token t)
607 (:question (push token path-components)
609 (:hash (push token path-components)
611 (:slash (push token path-components)
614 (:end (push token path-components)
616 (3 ;; seen / or <scheme>:/
617 (ecase (read-token t)
619 (:question (push "/" path-components)
621 (:hash (push "/" path-components)
623 (:slash (setq state 4))
624 (:string (push "/" path-components)
625 (push tokval path-components)
627 (:end (push "/" path-components)
629 (4 ;; seen [<scheme>:]//
630 (ecase (read-token t)
632 (:question (failure))
635 (:string (setq host tokval)
638 (11 ;; seen [<scheme>:]//<host>
639 (ecase (read-token t)
640 (:colon (setq state 5))
641 (:question (setq state 7))
642 (:hash (setq state 8))
643 (:slash (push "/" path-components)
645 (:string (impossible))
646 (:end (setq state 9))))
647 (5 ;; seen [<scheme>:]//<host>:
648 (ecase (read-token t)
650 (:question (failure))
652 (:slash (push "/" path-components)
654 (:string (setq port tokval)
657 (12 ;; seen [<scheme>:]//<host>:[<port>]
658 (ecase (read-token t)
660 (:question (setq state 7))
661 (:hash (setq state 8))
662 (:slash (push "/" path-components)
664 (:string (impossible))
665 (:end (setq state 9))))
667 (ecase (read-token :path)
668 (:question (setq state 7))
669 (:hash (setq state 8))
670 (:string (push tokval path-components)
672 (:end (setq state 9))))
674 (ecase (read-token :path)
675 (:question (setq state 7))
676 (:hash (setq state 8))
677 (:string (impossible))
678 (:end (setq state 9))))
682 then *strict-illegal-query-characters*
683 else *illegal-query-characters*))
684 (ecase (prog1 (read-token :query)
685 (setq illegal-chars *illegal-characters*))
686 (:hash (setq state 8))
687 (:string (setq query tokval)
689 (:end (setq state 9))))
691 (ecase (read-token :query)
692 (:hash (setq state 8))
693 (:string (impossible))
694 (:end (setq state 9))))
696 (ecase (read-token :rest)
697 (:string (setq fragment tokval)
699 (:end (setq state 9))))
704 (apply #'concatenate 'simple-string (nreverse path-components))
707 (15 ;; seen urn:, read nid now
708 (case (read-token :colon *valid-nid-characters*)
709 (:string (setq nid tokval)
711 (t (failure "missing namespace identifier"))))
712 (16 ;; seen urn:<nid>
714 (:colon (setq state 17))
715 (t (failure "missing namespace specific string"))))
716 (17 ;; seen urn:<nid>:, rest is nss
717 (return (values scheme
721 (setq illegal-chars *reserved-nss-characters*)
725 "internal error in parse engine, wrong state: ~s." state)))))))
727 (defun escape-p (string)
728 (declare (optimize (speed 3)))
730 (max (the fixnum (length string))))
732 (declare (fixnum i max))
733 (when (char= #\% (schar string i))
736 (defun parse-path (path-string escape)
737 (do* ((xpath-list (delimited-string-to-list path-string #\/))
740 (if* (string= "" (car xpath-list))
741 then (setf (car xpath-list) :absolute)
742 else (push :relative xpath-list))
744 (pl (cdr path-list) (cdr pl))
746 ((null pl) path-list)
747 (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
748 then ;; there is a param
749 ;;; (setf (car pl) segments)
751 (mapcar #'(lambda (s)
752 (decode-escaped-encoding
753 s escape *reserved-path-characters2*))
756 ;;; (setf (car pl) (car segments))
758 (decode-escaped-encoding
759 (car segments) escape *reserved-path-characters2*)))))
761 (defun decode-escaped-encoding (string escape
762 &optional (reserved-chars
763 *reserved-characters*))
764 ;; Return a string with the real characters.
765 (when (null escape) (return-from decode-escaped-encoding string))
767 (max (length string))
768 (new-string (copy-seq string))
772 (shrink-vector new-string new-i))
773 (if* (char= #\% (setq ch (schar string i)))
774 then (when (> (+ i 3) max)
776 "Unsyntactic escaped encoding in ~s." string))
777 (setq ch (schar string (incf i)))
778 (setq ch2 (schar string (incf i)))
779 (when (not (and (setq chc (digit-char-p ch 16))
780 (setq chc2 (digit-char-p ch2 16))))
782 "Non-hexidecimal digits after %: %c%c." ch ch2))
783 (let ((ci (+ (* 16 chc) chc2)))
784 (if* (or (null reserved-chars)
785 (= 0 (sbit reserved-chars ci)))
787 (setf (schar new-string new-i)
789 else (setf (schar new-string new-i) #\%)
790 (setf (schar new-string (incf new-i)) ch)
791 (setf (schar new-string (incf new-i)) ch2)))
792 else (setf (schar new-string new-i) ch))))
794 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
797 (defun render-uri (uri stream
798 &aux (escape (uri-escaped uri))
799 (*print-pretty* nil))
800 (when (null (uri-string uri))
801 (setf (uri-string uri)
802 (let ((scheme (uri-scheme uri))
803 (host (uri-host uri))
804 (port (uri-port uri))
805 (path (uri-path uri))
806 (query (uri-query uri))
807 (fragment (uri-fragment uri)))
808 (concatenate 'simple-string
810 (encode-escaped-encoding
811 (string-downcase ;; for upper case lisps
812 (symbol-name scheme))
813 *reserved-characters* escape))
817 (encode-escaped-encoding
818 host *reserved-authority-characters* escape))
821 ;;;; too slow until ACL 6.0:
822 ;;; (format nil "~d" port)
823 ;;; (princ-to-string port)
824 #-allegro (princ-to-string port)
826 (with-output-to-string (s)
827 (excl::maybe-print-fast s port))
830 (encode-escaped-encoding path
832 ;;*reserved-path-characters*
835 (when query (encode-escaped-encoding query nil escape))
837 (when fragment (encode-escaped-encoding fragment nil escape))))))
839 then (format stream "~a" (uri-string uri))
840 else (uri-string uri)))
842 (defun render-parsed-path (path-list escape)
844 (first (car path-list))
845 (pl (cdr path-list) (cdr pl))
846 (pe (car pl) (car pl)))
848 (when res (apply #'concatenate 'simple-string (nreverse res))))
849 (when (or (null first)
850 (prog1 (eq :absolute first)
855 (encode-escaped-encoding pe *reserved-path-characters* escape)
857 else ;; contains params
858 (push (encode-escaped-encoding
859 (car pe) *reserved-path-characters* escape)
861 (dolist (item (cdr pe))
863 (push (encode-escaped-encoding
864 item *reserved-path-characters* escape)
867 (defun render-urn (urn stream
868 &aux (*print-pretty* nil))
869 (when (null (uri-string urn))
870 (setf (uri-string urn)
871 (let ((nid (urn-nid urn))
873 (concatenate 'simple-string "urn:" nid ":" nss))))
875 then (format stream "~a" (uri-string urn))
876 else (uri-string urn)))
878 (defparameter *escaped-encoding*
879 (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
881 (defun encode-escaped-encoding (string reserved-chars escape)
882 (when (null escape) (return-from encode-escaped-encoding string))
883 ;; Make a string as big as it possibly needs to be (3 times the original
884 ;; size), and truncate it at the end.
885 (do* ((max (length string))
886 (new-max (* 3 max)) ;; worst case new size
887 (new-string (make-string new-max))
892 (shrink-vector new-string (incf new-i)))
893 (setq ci (char-int (setq c (schar string i))))
894 (if* (or (null reserved-chars)
896 (= 0 (sbit reserved-chars ci)))
899 (setf (schar new-string new-i) c)
900 else ;; need to escape it
901 (multiple-value-bind (q r) (truncate ci 16)
902 (setf (schar new-string (incf new-i)) #\%)
903 (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
904 (setf (schar new-string (incf new-i))
905 (elt *escaped-encoding* r))))))
907 (defmethod print-object ((uri uri) stream)
909 then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
910 else (render-uri uri stream)))
912 (defmethod print-object ((urn urn) stream)
914 then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
915 else (render-urn urn stream)))
917 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
918 ;; merging and unmerging
920 (defmethod merge-uris ((uri string) (base string) &optional place)
921 (merge-uris (parse-uri uri) (parse-uri base) place))
923 (defmethod merge-uris ((uri uri) (base string) &optional place)
924 (merge-uris uri (parse-uri base) place))
926 (defmethod merge-uris ((uri string) (base uri) &optional place)
927 (merge-uris (parse-uri uri) base place))
929 (defmethod merge-uris ((uri uri) (base uri) &optional place)
930 ;; The following is from
931 ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
932 ;; and is algorithm we use to merge URIs.
934 ;; For more information, see section 5.2 of the RFC.
938 (when (and (null (uri-parsed-path uri))
939 (null (uri-scheme uri))
940 (null (uri-host uri))
941 (null (uri-port uri))
942 (null (uri-query uri)))
943 (return-from merge-uris
944 (let ((new (copy-uri base :place place)))
945 (when (uri-query uri)
946 (setf (uri-query new) (uri-query uri)))
947 (when (uri-fragment uri)
948 (setf (uri-fragment new) (uri-fragment uri)))
951 (setq uri (copy-uri uri :place place))
954 (when (uri-scheme uri)
955 (return-from merge-uris uri))
956 (setf (uri-scheme uri) (uri-scheme base))
959 (when (uri-host uri) (go :done))
960 (setf (uri-host uri) (uri-host base))
961 (setf (uri-port uri) (uri-port base))
964 (let ((p (uri-parsed-path uri)))
965 (when (and p (eq :absolute (car p)))
966 (when (equal '(:absolute "") p)
967 ;; Canonicalize the way parsing does:
968 (setf (uri-path uri) nil))
973 (or (uri-parsed-path base)
974 ;; needed because we canonicalize away a path of just `/':
976 (path (uri-parsed-path uri))
978 (when (not (eq :absolute (car base-path)))
979 (error "Cannot merge ~a and ~a, since latter is not absolute."
984 (append (butlast base-path)
985 (if* path then (cdr path) else '(""))))
988 (let ((last (last new-path-list)))
989 (if* (atom (car last))
990 then (when (string= "." (car last))
991 (setf (car last) ""))
992 else (when (string= "." (caar last))
993 (setf (caar last) ""))))
995 (delete "." new-path-list :test #'(lambda (a b)
1001 (let ((npl (cdr new-path-list))
1004 (string= ".." (let ((l (car (last npl))))
1011 :test #'(lambda (a b)
1016 (when (null index) (return))
1018 ;; The RFC says, in 6g, "that the implementation may handle
1019 ;; this error by retaining these components in the resolved
1020 ;; path, by removing them from the resolved path, or by
1021 ;; avoiding traversal of the reference." The examples in C.2
1022 ;; imply that we should do the first thing (retain them), so
1023 ;; that's what we'll do.
1026 then (setq npl (cddr npl))
1028 (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
1029 (setf (cdr tmp) (cdddr tmp))))
1030 (setf (cdr new-path-list) npl)
1031 (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
1034 ;; don't complain if new-path-list starts with `..'. See comment
1035 ;; above about this step.
1038 (when (or (equal '(:absolute "") new-path-list)
1039 (equal '(:absolute) new-path-list))
1040 (setq new-path-list nil))
1041 (setf (uri-path uri)
1042 (render-parsed-path new-path-list
1043 ;; don't know, so have to assume:
1048 (return-from merge-uris uri)))
1050 (defmethod enough-uri ((uri string) (base string) &optional place)
1051 (enough-uri (parse-uri uri) (parse-uri base) place))
1053 (defmethod enough-uri ((uri uri) (base string) &optional place)
1054 (enough-uri uri (parse-uri base) place))
1056 (defmethod enough-uri ((uri string) (base uri) &optional place)
1057 (enough-uri (parse-uri uri) base place))
1059 (defmethod enough-uri ((uri uri) (base uri) &optional place)
1060 (let ((new-scheme nil)
1063 (new-parsed-path nil))
1065 (when (or (and (uri-scheme uri)
1066 (not (equalp (uri-scheme uri) (uri-scheme base))))
1068 (not (equalp (uri-host uri) (uri-host base))))
1069 (not (equalp (uri-port uri) (uri-port base))))
1070 (return-from enough-uri uri))
1072 (when (null (uri-host uri))
1073 (setq new-host (uri-host base)))
1074 (when (null (uri-port uri))
1075 (setq new-port (uri-port base)))
1077 (when (null (uri-scheme uri))
1078 (setq new-scheme (uri-scheme base)))
1080 ;; Now, for the hard one, path.
1081 ;; We essentially do here what enough-namestring does.
1082 (do* ((base-path (uri-parsed-path base))
1083 (path (uri-parsed-path uri))
1084 (bp base-path (cdr bp))
1086 ((or (null bp) (null p))
1087 ;; If p is nil, that means we have something like
1088 ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
1089 ;; new-parsed-path will be nil.
1091 (setq new-parsed-path (copy-list p))
1092 (when (not (symbolp (car new-parsed-path)))
1093 (push :relative new-parsed-path))))
1094 (if* (equal (car bp) (car p))
1096 else (setq new-parsed-path (copy-list p))
1097 (when (not (symbolp (car new-parsed-path)))
1098 (push :relative new-parsed-path))
1102 (when new-parsed-path
1103 (render-parsed-path new-parsed-path
1104 ;; don't know, so have to assume:
1106 (new-query (uri-query uri))
1107 (new-fragment (uri-fragment uri))
1108 (new-plist (copy-list (uri-plist uri))))
1109 (if* (and (null new-scheme)
1113 (null new-parsed-path)
1115 (null new-fragment))
1116 then ;; can't have a completely empty uri!
1118 :class (class-of uri)
1123 :class (class-of uri)
1129 :parsed-path new-parsed-path
1131 :fragment new-fragment
1132 :plist new-plist)))))
1134 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1135 ;; support for interning URIs
1137 (defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
1139 (apply #'make-hash-table :size size
1140 :hash-function 'uri-hash
1141 :test 'uri= :values nil keys)
1143 (apply #'make-hash-table :size size keys))
1145 (defun gethash-uri (uri table)
1146 #+allegro (gethash uri table)
1148 (let* ((hash (uri-hash uri))
1149 (existing (gethash hash table)))
1150 (dolist (u existing)
1152 (return-from gethash-uri (values u t))))
1155 (defun puthash-uri (uri table)
1156 #+allegro (excl:puthash-key uri table)
1158 (let ((existing (gethash (uri-hash uri) table)))
1159 (dolist (u existing)
1161 (return-from puthash-uri u)))
1162 (setf (gethash (uri-hash uri) table)
1163 (cons uri existing))
1167 (defun uri-hash (uri)
1168 (if* (uri-hashcode uri)
1170 else (setf (uri-hashcode uri)
1173 (render-uri uri nil)
1176 (render-uri uri nil))))))
1178 (defvar *uris* (make-uri-space))
1180 (defun uri-space () *uris*)
1182 (defun (setf uri-space) (new-val)
1183 (setq *uris* new-val))
1185 ;; bootstrapping (uri= changed from function to method):
1186 (when (fboundp 'uri=) (fmakunbound 'uri=))
1188 (defmethod uri= ((uri1 uri) (uri2 uri))
1189 (when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
1190 (return-from uri= nil))
1191 ;; RFC2396 says: a URL with an explicit ":port", where the port is
1192 ;; the default for the scheme, is the equivalent to one where the
1193 ;; port is elided. Hmmmm. This means that this function has to be
1194 ;; scheme dependent. Grrrr.
1195 (let ((default-port (case (uri-scheme uri1)
1200 (and (equalp (uri-host uri1) (uri-host uri2))
1201 (eql (or (uri-port uri1) default-port)
1202 (or (uri-port uri2) default-port))
1203 (string= (uri-path uri1) (uri-path uri2))
1204 (string= (uri-query uri1) (uri-query uri2))
1205 (string= (uri-fragment uri1) (uri-fragment uri2)))))
1207 (defmethod uri= ((urn1 urn) (urn2 urn))
1208 (when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
1209 (return-from uri= nil))
1210 (and (equalp (urn-nid urn1) (urn-nid urn2))
1211 (urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
1213 (defun urn-nss-equal (nss1 nss2 &aux len)
1214 ;; Return t iff the nss values are the same.
1215 ;; %2c and %2C are equivalent.
1216 (when (or (null nss1) (null nss2)
1217 (not (= (setq len (length nss1))
1219 (return-from urn-nss-equal nil))
1224 (setq c1 (schar nss1 i))
1225 (setq c2 (schar nss2 i))
1228 (if* (and (char= #\% c1) (char= #\% c2))
1229 then (setq state :percent+1)
1230 elseif (char/= c1 c2)
1233 (when (char-not-equal c1 c2) (return nil))
1234 (setq state :percent+2))
1236 (when (char-not-equal c1 c2) (return nil))
1237 (setq state :char)))))
1239 (defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
1240 (let ((uri (gethash-uri xuri uri-space)))
1243 else (puthash-uri xuri uri-space))))
1245 (defmethod intern-uri ((uri string) &optional (uri-space *uris*))
1246 (intern-uri (parse-uri uri) uri-space))
1248 (defun unintern-uri (uri &optional (uri-space *uris*))
1250 then (clrhash uri-space)
1252 then (remhash uri uri-space)
1253 else (error "bad uri: ~s." uri)))
1255 (defmacro do-all-uris ((var &optional uri-space result-form)
1258 "do-all-uris (var [[uri-space] result-form])
1259 {declaration}* {tag | statement}*
1260 Executes the forms once for each uri with var bound to the current uri"
1263 (g-uri-space (gensym))
1264 (body #+allegro (third (excl::parse-body forms env))
1266 `(let ((,g-uri-space (or ,uri-space *uris*)))
1268 (flet ((,f (,var &optional ,g-ignore)
1269 (declare (ignore-if-unused ,var ,g-ignore))
1271 (maphash #',f ,g-uri-space))
1272 (return ,result-form)))))
1274 (defun sharp-u (stream chr arg)
1275 (declare (ignore chr arg))
1276 (let ((arg (read stream nil nil t)))
1280 then (parse-uri arg)
1283 (internal-reader-error
1285 "#u takes a string or list argument: ~s" arg)))))
1290 (locally (declare (special std-lisp-readtable))
1291 (let ((*readtable* std-lisp-readtable))
1292 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
1294 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)
1296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1302 ;; (don't run under emacs with M-x fi:common-lisp)
1305 (defun time-uri-module ()
1306 (declare (optimize (speed 3) (safety 0) (debug 0)))
1307 (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
1308 (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
1309 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1310 (format t "~&;;; starting timing testing 1...~%")
1311 (time (dotimes (i 100000) (parse-uri uri)))
1313 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1314 (format t "~&;;; starting timing testing 2...~%")
1315 (let ((uri (parse-uri uri)))
1316 (time (dotimes (i 100000)
1317 ;; forces no caching of the printed representation:
1318 (setf (uri-string uri) nil)
1319 (format nil "~a" uri))))
1321 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1322 (format t "~&;;; starting timing testing 3...~%")
1325 (dotimes (i 100000) (parse-uri uri2))
1326 (let ((uri (parse-uri uri)))
1328 ;; forces no caching of the printed representation:
1329 (setf (uri-string uri) nil)
1330 (format nil "~a" uri)))))))
1332 ;;******** reference output (ultra, modified 5.0.1):
1333 ;;; starting timing testing 1...
1334 ; cpu time (non-gc) 13,710 msec user, 0 msec system
1335 ; cpu time (gc) 600 msec user, 10 msec system
1336 ; cpu time (total) 14,310 msec user, 10 msec system
1337 ; real time 14,465 msec
1339 ; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
1340 ;;; starting timing testing 2...
1341 ; cpu time (non-gc) 27,500 msec user, 0 msec system
1342 ; cpu time (gc) 280 msec user, 20 msec system
1343 ; cpu time (total) 27,780 msec user, 20 msec system
1344 ; real time 27,897 msec
1346 ; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1347 ;;; starting timing testing 3...
1348 ; cpu time (non-gc) 52,290 msec user, 10 msec system
1349 ; cpu time (gc) 1,290 msec user, 30 msec system
1350 ; cpu time (total) 53,580 msec user, 40 msec system
1351 ; real time 54,062 msec
1353 ; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
1355 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1356 ;;; after improving decode-escaped-encoding/encode-escaped-encoding:
1358 ;;; starting timing testing 1...
1359 ; cpu time (non-gc) 14,520 msec user, 0 msec system
1360 ; cpu time (gc) 400 msec user, 0 msec system
1361 ; cpu time (total) 14,920 msec user, 0 msec system
1362 ; real time 15,082 msec
1364 ; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
1365 ;;; starting timing testing 2...
1366 ; cpu time (non-gc) 27,490 msec user, 10 msec system
1367 ; cpu time (gc) 300 msec user, 0 msec system
1368 ; cpu time (total) 27,790 msec user, 10 msec system
1369 ; real time 28,025 msec
1371 ; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1372 ;;; starting timing testing 3...
1373 ; cpu time (non-gc) 47,900 msec user, 20 msec system
1374 ; cpu time (gc) 920 msec user, 10 msec system
1375 ; cpu time (total) 48,820 msec user, 30 msec system
1376 ; real time 49,188 msec
1378 ; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes