1 ;; -*- mode: common-lisp; package: puri -*-
3 ;; For general URI information see RFC2396.
5 ;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved.
6 ;; copyright (c) 2002-2005 Franz Inc, Oakland, CA - All rights reserved.
7 ;; copyright (c) 2003-2010 Kevin Rosenberg
9 ;; This code is free software; you can redistribute it and/or
10 ;; modify it under the terms of the version 2.1 of
11 ;; the GNU Lesser General Public License as published by
12 ;; the Free Software Foundation, as clarified by the
13 ;; preamble found here:
14 ;; http://opensource.franz.com/preamble.html
16 ;; Versions ported from Franz's opensource release
17 ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
18 ;; uri.cl,v 2.9.84.1 2005/08/11 18:38:52 layer
20 ;; This code is distributed in the hope that it will be useful,
21 ;; but without any warranty; without even the implied warranty of
22 ;; merchantability or fitness for a particular purpose. See the GNU
23 ;; Lesser General Public License for more details.
27 #-(or allegro zacl) (:nicknames #:net.uri)
29 #:uri ; the type and a function
33 #:uri-scheme ; and slots
40 #:uri-authority ; pseudo-slot accessor
43 #:urn-nid ; pseudo-slot accessor
44 #:urn-nss ; pseudo-slot accessor
53 #:make-uri-space ; interning...
60 #:uri-parse-error ;; Added by KMR
65 (eval-when (:compile-toplevel) (declaim (optimize (speed 3))))
69 (defun parse-body (forms &optional env)
70 "Parses a body, returns (VALUES docstring declarations forms)"
71 (declare (ignore env))
72 ;; fixme -- need to add parsing of multiple declarations
73 (let (docstring declarations)
74 (when (stringp (car forms))
75 (setq docstring (car forms))
76 (setq forms (cdr forms)))
77 (when (and (listp (car forms))
78 (symbolp (caar forms))
79 (string-equal (symbol-name '#:declare)
80 (symbol-name (caar forms))))
81 (setq declarations (car forms))
82 (setq forms (cdr forms)))
83 (values docstring declarations forms)))
86 (defun shrink-vector (str size)
88 (excl::.primcall 'sys::shrink-svector str size)
90 (setq str (sb-kernel:shrink-vector str size))
92 (lisp::shrink-vector str size)
94 (system::shrink-vector$vector str size)
96 (common-lisp::shrink-vector str size)
97 #-(or allegro cmu lispworks sbcl scl)
98 (setq str (subseq str 0 size))
102 ;; KMR: Added new condition to handle cross-implementation variances
103 ;; in the parse-error condition many implementations define
105 (define-condition uri-parse-error (parse-error)
106 ((fmt-control :initarg :fmt-control :accessor fmt-control)
107 (fmt-arguments :initarg :fmt-arguments :accessor fmt-arguments ))
108 (:report (lambda (c stream)
109 (format stream "Parse error:")
110 (apply #'format stream (fmt-control c) (fmt-arguments c)))))
112 (defun .parse-error (fmt &rest args)
113 (error 'uri-parse-error :fmt-control fmt :fmt-arguments args))
116 (defun internal-reader-error (stream fmt &rest args)
117 (apply #'format stream fmt args))
119 #-allegro (defvar *current-case-mode* :case-insensitive-upper)
120 #+allegro (eval-when (:compile-toplevel :load-toplevel :execute)
121 (import '(excl:*current-case-mode*
122 excl:delimited-string-to-list
124 excl::internal-reader-error
128 (defmethod position-char (char (string string) start max)
129 (declare (optimize (speed 3) (safety 0) (space 0))
130 (fixnum start max) (string string))
131 (do* ((i start (1+ i)))
134 (when (char= char (char string i)) (return i))))
137 (defun delimited-string-to-list (string &optional (separator #\space)
139 (declare (optimize (speed 3) (safety 0) (space 0)
140 (compilation-speed 0))
142 (type character separator))
143 (do* ((len (length string))
146 (end (position-char separator string pos len)
147 (position-char separator string pos len)))
150 (push (subseq string pos) output)
151 (when (and (plusp len) (not skip-terminal))
154 (declare (type fixnum pos len)
155 (type (or null fixnum) end))
156 (push (subseq string pos end) output)
157 (setq pos (1+ end))))
160 (eval-when (:compile-toplevel :load-toplevel :execute)
161 (defvar if*-keyword-list '("then" "thenret" "else" "elseif"))
163 (defmacro if* (&rest args)
164 (do ((xx (reverse args) (cdr xx))
171 (cond ((eq state :compl)
173 (t (error "if*: illegal form ~s" args))))
174 (cond ((and (symbolp (car xx))
175 (member (symbol-name (car xx))
177 :test #'string-equal))
178 (setq lookat (symbol-name (car xx)))))
180 (cond ((eq state :init)
181 (cond (lookat (cond ((string-equal lookat "thenret")
185 "if*: bad keyword ~a" lookat))))
188 (push (car xx) col))))
191 (cond ((string-equal lookat "else")
194 "if*: multiples elses")))
197 (push `(t ,@col) totalcol))
198 ((string-equal lookat "then")
200 (t (error "if*: bad keyword ~s"
202 (t (push (car xx) col))))
206 "if*: keyword ~s at the wrong place " (car xx)))
207 (t (setq state :compl)
208 (push `(,(car xx) ,@col) totalcol))))
210 (cond ((not (string-equal lookat "elseif"))
211 (error "if*: missing elseif clause ")))
212 (setq state :init))))))
218 (scheme :initarg :scheme :initform nil :accessor uri-scheme)
219 (host :initarg :host :initform nil :accessor uri-host)
220 (port :initarg :port :initform nil :accessor uri-port)
221 (path :initarg :path :initform nil :accessor uri-path)
222 (query :initarg :query :initform nil :accessor uri-query)
223 (fragment :initarg :fragment :initform nil :accessor uri-fragment)
224 (plist :initarg :plist :initform nil :accessor uri-plist)
228 ;; used to prevent unnessary work, looking for chars to escape and
230 :initarg :escaped :initform nil :accessor uri-escaped)
232 ;; the cached printable representation of the URI. It *might* be
233 ;; different than the original string, though, because the user might
234 ;; have escaped non-reserved chars--they won't be escaped when the URI
236 :initarg :string :initform nil :accessor uri-string)
238 ;; the cached parsed representation of the URI path.
239 :initarg :parsed-path
241 :accessor .uri-parsed-path)
245 :accessor uri-is-ip6)
247 ;; cached sxhash, so we don't have to compute it more than once.
248 :initarg :hashcode :initform nil :accessor uri-hashcode)))
251 ((nid :initarg :nid :initform nil :accessor urn-nid)
252 (nss :initarg :nss :initform nil :accessor urn-nss)))
254 (eval-when (:compile-toplevel :execute)
255 (defmacro clear-caching-on-slot-change (name)
256 `(defmethod (setf ,name) :around (new-value (self uri))
257 (declare (ignore new-value))
258 (prog1 (call-next-method)
259 (setf (uri-string self) nil)
260 ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
261 (setf (uri-hashcode self) nil))))
264 (clear-caching-on-slot-change uri-scheme)
265 (clear-caching-on-slot-change uri-host)
266 (clear-caching-on-slot-change uri-port)
267 (clear-caching-on-slot-change uri-path)
268 (clear-caching-on-slot-change uri-query)
269 (clear-caching-on-slot-change uri-fragment)
272 (defmethod make-load-form ((self uri) &optional env)
273 (declare (ignore env))
274 `(make-instance ',(class-name (class-of self))
275 :scheme ,(uri-scheme self)
276 :host ,(uri-host self)
277 :port ,(uri-port self)
278 :path ',(uri-path self)
279 :query ,(uri-query self)
280 :fragment ,(uri-fragment self)
281 :plist ',(uri-plist self)
282 :string ,(uri-string self)
283 :parsed-path ',(.uri-parsed-path self)))
285 (defmethod uri-p ((thing uri)) t)
286 (defmethod uri-p ((thing t)) nil)
290 (scheme (when uri (uri-scheme uri)))
291 (host (when uri (uri-host uri)))
292 (port (when uri (uri-port uri)))
293 (path (when uri (uri-path uri)))
295 (when uri (copy-list (.uri-parsed-path uri))))
296 (query (when uri (uri-query uri)))
297 (fragment (when uri (uri-fragment uri)))
298 (plist (when uri (copy-list (uri-plist uri))))
299 (class (when uri (class-of uri)))
300 &aux (escaped (when uri (uri-escaped uri))))
302 then (setf (uri-scheme place) scheme)
303 (setf (uri-host place) host)
304 (setf (uri-port place) port)
305 (setf (uri-path place) path)
306 (setf (.uri-parsed-path place) parsed-path)
307 (setf (uri-query place) query)
308 (setf (uri-fragment place) fragment)
309 (setf (uri-plist place) plist)
310 (setf (uri-escaped place) escaped)
311 (setf (uri-string place) nil)
312 (setf (uri-hashcode place) nil)
314 elseif (eq 'uri class)
315 then ;; allow the compiler to optimize the call to make-instance:
317 :scheme scheme :host host :port port :path path
318 :parsed-path parsed-path
319 :query query :fragment fragment :plist plist
320 :escaped escaped :string nil :hashcode nil)
321 else (make-instance class
322 :scheme scheme :host host :port port :path path
323 :parsed-path parsed-path
324 :query query :fragment fragment :plist plist
325 :escaped escaped :string nil :hashcode nil)))
327 (defmethod uri-parsed-path ((uri uri))
329 (when (null (.uri-parsed-path uri))
330 (setf (.uri-parsed-path uri)
331 (parse-path (uri-path uri) (uri-escaped uri))))
332 (.uri-parsed-path uri)))
334 (defmethod (setf uri-parsed-path) (path-list (uri uri))
335 (assert (and (consp path-list)
336 (or (member (car path-list) '(:absolute :relative)
338 (setf (uri-path uri) (render-parsed-path path-list t))
339 (setf (.uri-parsed-path uri) path-list)
342 (defun uri-authority (uri)
344 (let ((*print-pretty* nil))
345 (format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
348 (if* (equalp "urn" (uri-scheme uri))
350 else (error "URI is not a URN: ~s." uri)))
353 (if* (equalp "urn" (uri-scheme uri))
355 else (error "URI is not a URN: ~s." uri)))
357 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
360 (defparameter *excluded-characters*
362 ;; exclude control characters
363 (loop for i from 0 to #x1f
364 collect (code-char i))
365 '(;; `delims' (except #\%, because it's handled specially):
366 #\< #\> #\" #\space #\#
367 #\Rubout ;; (code-char #x7f)
369 #\{ #\} #\| #\\ #\^ #\[ #\] #\`))
370 "Excluded charcters from RFC2396 (http://www.ietf.org/rfc/rfc2396.txt 2.4.3)")
372 (defun reserved-char-vector (chars &key except)
373 (do* ((a (make-array 128 :element-type 'bit :initial-element 0))
374 (chars chars (cdr chars))
375 (c (car chars) (car chars)))
377 (if* (and except (member c except :test #'char=))
379 else (setf (sbit a (char-int c)) 1))))
381 (defparameter *reserved-characters*
382 (reserved-char-vector
383 (append *excluded-characters*
384 '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
385 (defparameter *reserved-authority-characters*
386 (reserved-char-vector
387 (append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
388 (defparameter *reserved-path-characters*
389 (reserved-char-vector
390 (append *excluded-characters*
392 ;;;;The rfc says this should be here, but it doesn't make sense.
396 (defparameter *reserved-fragment-characters*
397 (reserved-char-vector (remove #\# *excluded-characters*)))
399 (eval-when (:compile-toplevel :execute)
400 (defun gen-char-range-list (start end)
402 (endcode (1+ (char-int end)))
403 (chcode (char-int start)
407 ;; - has to be first, otherwise it signifies a range!
409 then (setq res (nreverse res))
412 else (nreverse res)))
413 (if* (= #.(char-int #\-) chcode)
415 else (push (code-char chcode) res))))
418 (defparameter *valid-nid-characters*
419 (reserved-char-vector
420 '#.(nconc (gen-char-range-list #\a #\z)
421 (gen-char-range-list #\A #\Z)
422 (gen-char-range-list #\0 #\9)
424 (defparameter *reserved-nss-characters*
425 (reserved-char-vector
426 (append *excluded-characters* '(#\& #\~ #\/ #\?))))
428 (defparameter *illegal-characters*
429 (reserved-char-vector (set-difference *excluded-characters*
431 (defparameter *strict-illegal-query-characters*
432 (reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
433 (defparameter *illegal-query-characters*
434 (reserved-char-vector
435 *excluded-characters* :except '(#\^ #\| #\#)))
437 (defparameter *valid-ip6-characters*
438 (reserved-char-vector
439 '#.(nconc (gen-char-range-list #\a #\f)
440 (gen-char-range-list #\A #\F)
441 (gen-char-range-list #\0 #\9)
445 (defun parse-uri (thing &key (class 'uri) &aux escape)
446 (when (uri-p thing) (return-from parse-uri thing))
448 (setq escape (escape-p thing))
449 (multiple-value-bind (scheme host port path query fragment is-ip6)
450 (parse-uri-string thing)
454 (case *current-case-mode*
455 ((:case-insensitive-upper :case-sensitive-upper)
457 ((:case-insensitive-lower :case-sensitive-lower)
459 (decode-escaped-encoding scheme escape))
460 (find-package :keyword))))
462 (when (and scheme (eq :urn scheme))
463 (return-from parse-uri
464 (make-instance 'urn :scheme scheme :nid host :nss path)))
466 (when host (setq host (decode-escaped-encoding host escape)))
468 (setq port (read-from-string port))
469 (when (not (numberp port)) (error "port is not a number: ~s." port))
470 (when (not (plusp port))
471 (error "port is not a positive integer: ~d." port))
472 (when (eql port (case scheme
478 (when (or (string= "" path)
479 (and ;; we canonicalize away a reference to just /:
481 (member scheme '(:http :https :ftp) :test #'eq)
486 (decode-escaped-encoding path escape *reserved-path-characters*)))
487 (when query (setq query (decode-escaped-encoding query escape)))
490 (decode-escaped-encoding fragment escape
491 *reserved-fragment-characters*)))
493 then ;; allow the compiler to optimize the make-instance call:
503 else ;; do it the slow way:
514 (defmethod uri ((thing uri))
517 (defmethod uri ((thing string))
520 (defmethod uri ((thing t))
521 (error "Cannot coerce ~s to a uri." thing))
523 (defvar *strict-parse* t)
525 (defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
526 (declare (optimize (speed 3)))
527 ;; Speed is important, so use a specialized state machine instead of
528 ;; regular expressions for parsing the URI string. The regexp we are
532 ;; May include a []-pair for ipv6
538 (end (length string))
544 (path-components '())
547 ;; namespace identifier, for urn parsing only:
549 (declare (fixnum state start end))
550 (flet ((read-token (kind &optional legal-chars)
554 else (let ((sindex start)
557 (declare (fixnum sindex))
560 (when (>= start end) (return nil))
561 (setq c (char string start))
562 (let ((ci (char-int c)))
564 then (if* (and (eq :colon kind) (eq c #\:))
566 elseif (= 0 (sbit legal-chars ci))
569 URI ~s contains illegal character ~s at position ~d."
571 elseif (and (< ci 128)
573 (= 1 (sbit illegal-chars ci)))
574 then (.parse-error "~
575 URI ~s contains illegal character ~s at position ~d."
579 (#\? (return :question))
580 (#\# (return :hash))))
581 (:query (case c (#\# (return :hash))))
583 (#\] (return :close-bracket))))
586 (#\: (return :colon))
587 (#\? (return :question))
588 (#\[ (return :open-bracket))
589 (#\] (return :close-bracket))
591 (#\/ (return :slash)))))
593 (if* (> start sindex)
594 then ;; we found some chars
595 ;; before we stopped the parse
596 (setq tokval (subseq string sindex start))
598 else ;; immediately stopped at a special char
601 (failure (&optional why)
602 (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
605 (.parse-error "impossible state: ~d [~s]" state string)))
608 (0 ;; starting to parse
609 (ecase (read-token t)
611 (:question (setq state 7))
612 (:hash (setq state 8))
613 (:slash (setq state 3))
614 (:string (setq state 1))
615 (:end (setq state 9))))
616 (1 ;; seen <token><special char>
617 (let ((token tokval))
618 (ecase (read-token t)
619 (:colon (setq scheme token)
620 (if* (equalp "urn" scheme)
622 else (setq state 2)))
623 (:question (push token path-components)
625 (:hash (push token path-components)
627 (:slash (push token path-components)
628 (push "/" path-components)
631 (:end (push token path-components)
634 (ecase (read-token t)
636 (:question (setq state 7))
637 (:hash (setq state 8))
638 (:slash (setq state 3))
639 (:string (setq state 10))
640 (:end (setq state 9))))
641 (10 ;; seen <scheme>:<token>
642 (let ((token tokval))
643 (ecase (read-token t)
645 (:question (push token path-components)
647 (:hash (push token path-components)
649 (:slash (push token path-components)
652 (:end (push token path-components)
654 (3 ;; seen / or <scheme>:/
655 (ecase (read-token t)
657 (:question (push "/" path-components)
659 (:hash (push "/" path-components)
661 (:slash (setq state 4))
662 (:string (push "/" path-components)
663 (push tokval path-components)
665 (:end (push "/" path-components)
667 (66 ;; seen [<scheme>:]//[
668 (ecase (read-token :ip6 *valid-ip6-characters*)
669 (:string (setq host tokval)
672 (67 ;; seen [<scheme>:]//[ip6]
673 (ecase (read-token t)
674 (:close-bracket (setq state 11))))
675 (4 ;; seen [<scheme>:]//
676 (ecase (read-token t)
678 (:question (failure))
680 (:open-bracket (setq state 66))
682 (if* (and (equalp "file" scheme)
685 (push "/" path-components)
688 (:string (setq host tokval)
691 (11 ;; seen [<scheme>:]//<host>
692 (ecase (read-token t)
693 (:colon (setq state 5))
694 (:question (setq state 7))
695 (:hash (setq state 8))
696 (:slash (push "/" path-components)
698 (:string (impossible))
699 (:end (setq state 9))))
700 (5 ;; seen [<scheme>:]//<host>:
701 (ecase (read-token t)
703 (:question (failure))
705 (:slash (push "/" path-components)
707 (:string (setq port tokval)
710 (12 ;; seen [<scheme>:]//<host>:[<port>]
711 (ecase (read-token t)
713 (:question (setq state 7))
714 (:hash (setq state 8))
715 (:slash (push "/" path-components)
717 (:string (impossible))
718 (:end (setq state 9))))
720 (ecase (read-token :path)
721 (:question (setq state 7))
722 (:hash (setq state 8))
723 (:string (push tokval path-components)
725 (:end (setq state 9))))
727 (ecase (read-token :path)
728 (:question (setq state 7))
729 (:hash (setq state 8))
730 (:string (impossible))
731 (:end (setq state 9))))
735 then *strict-illegal-query-characters*
736 else *illegal-query-characters*))
737 (ecase (prog1 (read-token :query)
738 (setq illegal-chars *illegal-characters*))
739 (:hash (setq state 8))
740 (:string (setq query tokval)
742 (:end (setq state 9))))
744 (ecase (read-token :query)
745 (:hash (setq state 8))
746 (:string (impossible))
747 (:end (setq state 9))))
749 (ecase (read-token :rest)
750 (:string (setq fragment tokval)
752 (:end (setq state 9))))
757 (apply #'concatenate 'string (nreverse path-components))
758 query fragment is-ip6)))
760 (15 ;; seen urn:, read nid now
761 (case (read-token :colon *valid-nid-characters*)
762 (:string (setq nid tokval)
764 (t (failure "missing namespace identifier"))))
765 (16 ;; seen urn:<nid>
767 (:colon (setq state 17))
768 (t (failure "missing namespace specific string"))))
769 (17 ;; seen urn:<nid>:, rest is nss
770 (return (values scheme
774 (setq illegal-chars *reserved-nss-characters*)
778 "internal error in parse engine, wrong state: ~s." state)))))))
780 (defun escape-p (string)
781 (declare (optimize (speed 3)))
783 (max (the fixnum (length string))))
785 (declare (fixnum i max))
786 (when (char= #\% (char string i))
789 (defun parse-path (path-string escape)
790 (do* ((xpath-list (delimited-string-to-list path-string #\/))
793 (if* (string= "" (car xpath-list))
794 then (setf (car xpath-list) :absolute)
795 else (push :relative xpath-list))
797 (pl (cdr path-list) (cdr pl))
799 ((null pl) path-list)
801 (if* (cdr (setq segments
802 (if* (string= "" (car pl))
804 else (delimited-string-to-list (car pl) #\;))))
805 then ;; there is a param
807 (mapcar #'(lambda (s)
808 (decode-escaped-encoding s escape
814 (decode-escaped-encoding (car segments) escape
818 (defun decode-escaped-encoding (string escape
819 &optional (reserved-chars
820 *reserved-characters*))
821 ;; Return a string with the real characters.
822 (when (null escape) (return-from decode-escaped-encoding string))
824 (max (length string))
825 (new-string (copy-seq string))
829 (shrink-vector new-string new-i))
830 (if* (char= #\% (setq ch (char string i)))
831 then (when (> (+ i 3) max)
833 "Unsyntactic escaped encoding in ~s." string))
834 (setq ch (char string (incf i)))
835 (setq ch2 (char string (incf i)))
836 (when (not (and (setq chc (digit-char-p ch 16))
837 (setq chc2 (digit-char-p ch2 16))))
839 "Non-hexidecimal digits after %: %c%c." ch ch2))
840 (let ((ci (+ (* 16 chc) chc2)))
841 (if* (or (null reserved-chars)
842 (> ci 127) ; bug11527
843 (= 0 (sbit reserved-chars ci)))
845 (setf (char new-string new-i)
847 else (setf (char new-string new-i) #\%)
848 (setf (char new-string (incf new-i)) ch)
849 (setf (char new-string (incf new-i)) ch2)))
850 else (setf (char new-string new-i) ch))))
852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
855 (defun render-uri (uri stream
856 &aux (escape (uri-escaped uri))
857 (*print-pretty* nil))
858 (when (null (uri-string uri))
859 (setf (uri-string uri)
860 (let ((scheme (uri-scheme uri))
861 (host (uri-host uri))
862 (is-ip6 (uri-is-ip6 uri))
863 (port (uri-port uri))
864 (path (uri-path uri))
865 (query (uri-query uri))
866 (fragment (uri-fragment uri)))
869 (encode-escaped-encoding
870 (string-downcase ;; for upper case lisps
871 (symbol-name scheme))
872 *reserved-characters* escape))
874 (when (or host (eq :file scheme)) "//")
877 (encode-escaped-encoding
878 host *reserved-authority-characters* escape))
882 #-allegro (format nil "~D" port)
883 #+allegro (with-output-to-string (s)
884 (excl::maybe-print-fast s port))
886 (encode-escaped-encoding (or path "/")
888 ;;*reserved-path-characters*
891 (when query (encode-escaped-encoding query nil escape))
893 (when fragment (encode-escaped-encoding fragment nil escape))))))
895 then (format stream "~a" (uri-string uri))
896 else (uri-string uri)))
898 (defun render-parsed-path (path-list escape)
900 (first (car path-list))
901 (pl (cdr path-list) (cdr pl))
902 (pe (car pl) (car pl)))
904 (when res (apply #'concatenate 'string (nreverse res))))
905 (when (or (null first)
906 (prog1 (eq :absolute first)
911 (encode-escaped-encoding pe *reserved-path-characters* escape)
913 else ;; contains params
914 (push (encode-escaped-encoding
915 (car pe) *reserved-path-characters* escape)
917 (dolist (item (cdr pe))
919 (push (encode-escaped-encoding
920 item *reserved-path-characters* escape)
923 (defun render-urn (urn stream
924 &aux (*print-pretty* nil))
925 (when (null (uri-string urn))
926 (setf (uri-string urn)
927 (let ((nid (urn-nid urn))
929 (concatenate 'string "urn:" nid ":" nss))))
931 then (format stream "~a" (uri-string urn))
932 else (uri-string urn)))
934 (defparameter *escaped-encoding*
935 (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
937 (defun encode-escaped-encoding (string reserved-chars escape)
938 (when (null escape) (return-from encode-escaped-encoding string))
939 ;; Make a string as big as it possibly needs to be (3 times the original
940 ;; size), and truncate it at the end.
941 (do* ((max (length string))
942 (new-max (* 3 max)) ;; worst case new size
943 (new-string (make-string new-max))
948 (shrink-vector new-string (incf new-i)))
949 (setq ci (char-int (setq c (char string i))))
950 (if* (or (null reserved-chars)
952 (= 0 (sbit reserved-chars ci)))
955 (setf (char new-string new-i) c)
956 else ;; need to escape it
957 (multiple-value-bind (q r) (truncate ci 16)
958 (setf (char new-string (incf new-i)) #\%)
959 (setf (char new-string (incf new-i)) (elt *escaped-encoding* q))
960 (setf (char new-string (incf new-i))
961 (elt *escaped-encoding* r))))))
963 (defmethod print-object ((uri uri) stream)
965 then (print-unreadable-object (uri stream :type t) (render-uri uri stream))
966 else (render-uri uri stream)))
968 (defmethod print-object ((urn urn) stream)
970 then (print-unreadable-object (urn stream :type t) (render-urn urn stream))
971 else (render-urn urn stream)))
973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
974 ;; merging and unmerging
976 (defmethod merge-uris ((uri string) (base string) &optional place)
977 (merge-uris (parse-uri uri) (parse-uri base) place))
979 (defmethod merge-uris ((uri uri) (base string) &optional place)
980 (merge-uris uri (parse-uri base) place))
982 (defmethod merge-uris ((uri string) (base uri) &optional place)
983 (merge-uris (parse-uri uri) base place))
986 (defmethod merge-uris ((uri uri) (base uri) &optional place)
987 ;; See ../doc/rfc2396.txt for info on the algorithm we use to merge
992 (when (and (null (uri-parsed-path uri))
993 (null (uri-scheme uri))
994 (null (uri-host uri))
995 (null (uri-port uri))
996 (null (uri-query uri)))
997 (return-from merge-uris
998 (let ((new (copy-uri base :place place)))
999 (when (uri-query uri)
1000 (setf (uri-query new) (uri-query uri)))
1001 (when (uri-fragment uri)
1002 (setf (uri-fragment new) (uri-fragment uri)))
1005 (setq uri (copy-uri uri :place place))
1008 (when (uri-scheme uri)
1009 (return-from merge-uris uri))
1010 (setf (uri-scheme uri) (uri-scheme base))
1013 (when (uri-host uri) (go :done))
1014 (setf (uri-host uri) (uri-host base))
1015 (setf (uri-port uri) (uri-port base))
1018 (let ((p (uri-parsed-path uri)))
1021 ;; The following form causes our implementation to be at odds with
1022 ;; RFC 2396, however this is apparently what was intended by the
1023 ;; authors of the RFC. Specifically, (merge-uris "?y" "/foo")
1024 ;; should return #<uri /foo?y> instead of #<uri ?y>, according to
1026 ;;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query
1028 (setf (uri-path uri) (uri-path base))
1031 (when (and p (eq :absolute (car p)))
1032 (when (equal '(:absolute "") p)
1033 ;; Canonicalize the way parsing does:
1034 (setf (uri-path uri) nil))
1039 (or (uri-parsed-path base)
1040 ;; needed because we canonicalize away a path of just `/':
1042 (path (uri-parsed-path uri))
1044 (when (not (eq :absolute (car base-path)))
1045 (error "Cannot merge ~a and ~a, since latter is not absolute."
1050 (append (butlast base-path)
1051 (if* path then (cdr path) else '(""))))
1054 (let ((last (last new-path-list)))
1055 (if* (atom (car last))
1056 then (when (string= "." (car last))
1057 (setf (car last) ""))
1058 else (when (string= "." (caar last))
1059 (setf (caar last) ""))))
1061 (delete "." new-path-list :test #'(lambda (a b)
1067 (let ((npl (cdr new-path-list))
1070 (string= ".." (let ((l (car (last npl))))
1077 :test #'(lambda (a b)
1082 (when (null index) (return))
1084 ;; The RFC says, in 6g, "that the implementation may handle
1085 ;; this error by retaining these components in the resolved
1086 ;; path, by removing them from the resolved path, or by
1087 ;; avoiding traversal of the reference." The examples in C.2
1088 ;; imply that we should do the first thing (retain them), so
1089 ;; that's what we'll do.
1092 then (setq npl (cddr npl))
1094 (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
1095 (setf (cdr tmp) (cdddr tmp))))
1096 (setf (cdr new-path-list) npl)
1097 (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
1100 ;; don't complain if new-path-list starts with `..'. See comment
1101 ;; above about this step.
1104 (when (or (equal '(:absolute "") new-path-list)
1105 (equal '(:absolute) new-path-list))
1106 (setq new-path-list nil))
1107 (setf (uri-path uri)
1108 (render-parsed-path new-path-list
1109 ;; don't know, so have to assume:
1114 (return-from merge-uris uri)))
1116 (defmethod enough-uri ((uri string) (base string) &optional place)
1117 (enough-uri (parse-uri uri) (parse-uri base) place))
1119 (defmethod enough-uri ((uri uri) (base string) &optional place)
1120 (enough-uri uri (parse-uri base) place))
1122 (defmethod enough-uri ((uri string) (base uri) &optional place)
1123 (enough-uri (parse-uri uri) base place))
1125 (defmethod enough-uri ((uri uri) (base uri) &optional place)
1126 (let ((new-scheme nil)
1129 (new-parsed-path nil))
1131 (when (or (and (uri-scheme uri)
1132 (not (equalp (uri-scheme uri) (uri-scheme base))))
1134 (not (equalp (uri-host uri) (uri-host base))))
1135 (not (equalp (uri-port uri) (uri-port base))))
1136 (return-from enough-uri uri))
1138 (when (null (uri-host uri))
1139 (setq new-host (uri-host base)))
1140 (when (null (uri-port uri))
1141 (setq new-port (uri-port base)))
1143 (when (null (uri-scheme uri))
1144 (setq new-scheme (uri-scheme base)))
1146 ;; Now, for the hard one, path.
1147 ;; We essentially do here what enough-namestring does.
1148 (do* ((base-path (uri-parsed-path base))
1149 (path (uri-parsed-path uri))
1150 (bp base-path (cdr bp))
1152 ((or (null bp) (null p))
1153 ;; If p is nil, that means we have something like
1154 ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
1155 ;; new-parsed-path will be nil.
1157 (setq new-parsed-path (copy-list p))
1158 (when (not (symbolp (car new-parsed-path)))
1159 (push :relative new-parsed-path))))
1160 (if* (equal (car bp) (car p))
1162 else (setq new-parsed-path (copy-list p))
1163 (when (not (symbolp (car new-parsed-path)))
1164 (push :relative new-parsed-path))
1168 (when new-parsed-path
1169 (render-parsed-path new-parsed-path
1170 ;; don't know, so have to assume:
1172 (new-query (uri-query uri))
1173 (new-fragment (uri-fragment uri))
1174 (new-plist (copy-list (uri-plist uri))))
1175 (if* (and (null new-scheme)
1179 (null new-parsed-path)
1181 (null new-fragment))
1182 then ;; can't have a completely empty uri!
1184 :class (class-of uri)
1189 :class (class-of uri)
1195 :parsed-path new-parsed-path
1197 :fragment new-fragment
1198 :plist new-plist)))))
1200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1201 ;; support for interning URIs
1203 (defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
1205 (apply #'make-hash-table :size size
1206 :hash-function 'uri-hash
1207 :test 'uri= :values nil keys)
1209 (apply #'make-hash-table :size size keys))
1211 (defun gethash-uri (uri table)
1212 #+allegro (gethash uri table)
1214 (let* ((hash (uri-hash uri))
1215 (existing (gethash hash table)))
1216 (dolist (u existing)
1218 (return-from gethash-uri (values u t))))
1221 (defun puthash-uri (uri table)
1222 #+allegro (excl:puthash-key uri table)
1224 (let ((existing (gethash (uri-hash uri) table)))
1225 (dolist (u existing)
1227 (return-from puthash-uri u)))
1228 (setf (gethash (uri-hash uri) table)
1229 (cons uri existing))
1233 (defun uri-hash (uri)
1234 (if* (uri-hashcode uri)
1236 else (setf (uri-hashcode uri)
1239 (render-uri uri nil)
1242 (render-uri uri nil))))))
1244 (defvar *uris* (make-uri-space))
1246 (defun uri-space () *uris*)
1248 (defun (setf uri-space) (new-val)
1249 (setq *uris* new-val))
1251 ;; bootstrapping (uri= changed from function to method):
1252 (when (fboundp 'uri=) (fmakunbound 'uri=))
1254 (defgeneric uri= (uri1 uri2))
1255 (defmethod uri= ((uri1 uri) (uri2 uri))
1256 (when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
1257 (return-from uri= nil))
1258 ;; RFC2396 says: a URL with an explicit ":port", where the port is
1259 ;; the default for the scheme, is the equivalent to one where the
1260 ;; port is elided. Hmmmm. This means that this function has to be
1261 ;; scheme dependent. Grrrr.
1262 (let ((default-port (case (uri-scheme uri1)
1267 (and (equalp (uri-host uri1) (uri-host uri2))
1268 (eql (or (uri-port uri1) default-port)
1269 (or (uri-port uri2) default-port))
1270 (string= (uri-path uri1) (uri-path uri2))
1271 (string= (uri-query uri1) (uri-query uri2))
1272 (string= (uri-fragment uri1) (uri-fragment uri2)))))
1274 (defmethod uri= ((urn1 urn) (urn2 urn))
1275 (when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
1276 (return-from uri= nil))
1277 (and (equalp (urn-nid urn1) (urn-nid urn2))
1278 (urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
1280 (defun urn-nss-equal (nss1 nss2 &aux len)
1281 ;; Return t iff the nss values are the same.
1282 ;; %2c and %2C are equivalent.
1283 (when (or (null nss1) (null nss2)
1284 (not (= (setq len (length nss1))
1286 (return-from urn-nss-equal nil))
1291 (setq c1 (char nss1 i))
1292 (setq c2 (char nss2 i))
1295 (if* (and (char= #\% c1) (char= #\% c2))
1296 then (setq state :percent+1)
1297 elseif (char/= c1 c2)
1300 (when (char-not-equal c1 c2) (return nil))
1301 (setq state :percent+2))
1303 (when (char-not-equal c1 c2) (return nil))
1304 (setq state :char)))))
1306 (defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
1307 (let ((uri (gethash-uri xuri uri-space)))
1310 else (puthash-uri xuri uri-space))))
1312 (defmethod intern-uri ((uri string) &optional (uri-space *uris*))
1313 (intern-uri (parse-uri uri) uri-space))
1315 (defun unintern-uri (uri &optional (uri-space *uris*))
1317 then (clrhash uri-space)
1319 then (remhash uri uri-space)
1320 else (error "bad uri: ~s." uri)))
1322 (defmacro do-all-uris ((var &optional uri-space result-form)
1325 "do-all-uris (var [[uri-space] result-form])
1326 {declaration}* {tag | statement}*
1327 Executes the forms once for each uri with var bound to the current uri"
1330 (g-uri-space (gensym))
1331 (body (third (parse-body forms env))))
1332 `(let ((,g-uri-space (or ,uri-space *uris*)))
1334 (flet ((,f (,var &optional ,g-ignore)
1335 (declare (ignore-if-unused ,var ,g-ignore))
1337 (maphash #',f ,g-uri-space))
1338 (return ,result-form)))))
1340 (defun sharp-u (stream chr arg)
1341 (declare (ignore chr arg))
1342 (let ((arg (read stream nil nil t)))
1346 then (parse-uri arg)
1349 (internal-reader-error
1351 "#u takes a string or list argument: ~s" arg)))))
1354 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)
1356 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1360 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1362 ;; (don't run under emacs with M-x fi:common-lisp)
1365 (eval-when (:compile-toplevel :load-toplevel :execute)
1369 (defun gc (&rest options)
1370 (declare (ignore options))
1375 (defun time-uri-module ()
1376 (declare (optimize (speed 3) (safety 0) (debug 0)))
1377 (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
1378 (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
1379 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1380 (format t "~&;;; starting timing testing 1...~%")
1381 (time (dotimes (i 100000) (parse-uri uri)))
1383 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1384 (format t "~&;;; starting timing testing 2...~%")
1385 (let ((uri (parse-uri uri)))
1386 (time (dotimes (i 100000)
1387 ;; forces no caching of the printed representation:
1388 (setf (uri-string uri) nil)
1389 (format nil "~a" uri))))
1391 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1392 (format t "~&;;; starting timing testing 3...~%")
1395 (dotimes (i 100000) (parse-uri uri2))
1396 (let ((uri (parse-uri uri)))
1398 ;; forces no caching of the printed representation:
1399 (setf (uri-string uri) nil)
1400 (format nil "~a" uri)))))))
1402 ;;******** reference output (ultra, modified 5.0.1):
1403 ;;; starting timing testing 1...
1404 ; cpu time (non-gc) 13,710 msec user, 0 msec system
1405 ; cpu time (gc) 600 msec user, 10 msec system
1406 ; cpu time (total) 14,310 msec user, 10 msec system
1407 ; real time 14,465 msec
1409 ; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
1410 ;;; starting timing testing 2...
1411 ; cpu time (non-gc) 27,500 msec user, 0 msec system
1412 ; cpu time (gc) 280 msec user, 20 msec system
1413 ; cpu time (total) 27,780 msec user, 20 msec system
1414 ; real time 27,897 msec
1416 ; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1417 ;;; starting timing testing 3...
1418 ; cpu time (non-gc) 52,290 msec user, 10 msec system
1419 ; cpu time (gc) 1,290 msec user, 30 msec system
1420 ; cpu time (total) 53,580 msec user, 40 msec system
1421 ; real time 54,062 msec
1423 ; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
1425 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1426 ;;; after improving decode-escaped-encoding/encode-escaped-encoding:
1428 ;;; starting timing testing 1...
1429 ; cpu time (non-gc) 14,520 msec user, 0 msec system
1430 ; cpu time (gc) 400 msec user, 0 msec system
1431 ; cpu time (total) 14,920 msec user, 0 msec system
1432 ; real time 15,082 msec
1434 ; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
1435 ;;; starting timing testing 2...
1436 ; cpu time (non-gc) 27,490 msec user, 10 msec system
1437 ; cpu time (gc) 300 msec user, 0 msec system
1438 ; cpu time (total) 27,790 msec user, 10 msec system
1439 ; real time 28,025 msec
1441 ; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1442 ;;; starting timing testing 3...
1443 ; cpu time (non-gc) 47,900 msec user, 20 msec system
1444 ; cpu time (gc) 920 msec user, 10 msec system
1445 ; cpu time (total) 48,820 msec user, 30 msec system
1446 ; real time 49,188 msec
1448 ; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes