1 ;; -*- mode: common-lisp; package: puri -*-
2 ;; Support for URIs in Allegro.
3 ;; For general URI information see RFC2396.
5 ;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved.
6 ;; copyright (c) 2003 Kevin Rosenberg (porting changes)
8 ;; The software, data and information contained herein are proprietary
9 ;; to, and comprise valuable trade secrets of, Franz, Inc. They are
10 ;; given in confidence by Franz, Inc. pursuant to a written license
11 ;; agreement, and may be stored and used only in accordance with the terms
14 ;; Restricted Rights Legend
15 ;; ------------------------
16 ;; Use, duplication, and disclosure of the software, data and information
17 ;; contained herein by any agency, department or entity of the U.S.
18 ;; Government are subject to restrictions of Restricted Rights for
19 ;; Commercial Software developed at private expense as specified in
20 ;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable.
22 ;; Original version from ACL 6.1:
23 ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer
25 ;; $Id: src.lisp,v 1.6 2003/07/19 18:21:43 kevin Exp $
30 #:uri ; the type and a function
34 #:uri-scheme ; and slots
40 #:uri-authority ; pseudo-slot accessor
43 #:urn-nid ; pseudo-slot accessor
44 #:urn-nss ; pseudo-slot accessor
53 #:make-uri-space ; interning...
62 (eval-when (compile) (declaim (optimize (speed 3))))
66 #-(or allegro lispworks)
67 (define-condition parse-error (error) ())
69 (defun shrink-vector (str size)
71 (excl::.primcall 'sys::shrink-svector str size)
73 (sb-kernel:shrink-vector str size)
75 (lisp::shrink-vector str size)
77 (system::shrink-vector$vector str size)
78 #+(or allegro cmu sbcl lispworks)
80 #-(or allegro cmu sbcl lispworks)
85 (defun .parse-error (fmt &rest args)
86 (error (make-condition 'parse-error :format-control fmt
87 :format-arguments args)))
90 (defun internal-reader-error (stream fmt &rest args)
91 (apply #'format stream fmt args))
93 #-allegro (defvar *current-case-mode* :case-insensitive-upper)
94 #+allegro (eval-when (compile load eval)
95 (import '(excl:*current-case-mode*
96 excl:delimited-string-to-list
98 excl::internal-reader-error
102 (defun position-char (char string start max)
103 (declare (optimize (speed 3) (safety 0) (space 0))
104 (fixnum start max) (simple-string string))
105 (do* ((i start (1+ i)))
108 (when (char= char (schar string i)) (return i))))
111 (defun delimited-string-to-list (string &optional (separator #\space)
113 (declare (optimize (speed 3) (safety 0) (space 0)
114 (compilation-speed 0))
116 (type character separator))
117 (do* ((len (length string))
120 (end (position-char separator string pos len)
121 (position-char separator string pos len)))
124 (push (subseq string pos) output)
125 (when (or (not skip-terminal) (zerop len))
128 (declare (type fixnum pos len)
129 (type (or null fixnum) end))
130 (push (subseq string pos end) output)
131 (setq pos (1+ end))))
134 (eval-when (:compile-toplevel :load-toplevel :execute)
135 (defvar if*-keyword-list '("then" "thenret" "else" "elseif"))
137 (defmacro if* (&rest args)
138 (do ((xx (reverse args) (cdr xx))
145 (cond ((eq state :compl)
147 (t (error "if*: illegal form ~s" args))))
148 (cond ((and (symbolp (car xx))
149 (member (symbol-name (car xx))
151 :test #'string-equal))
152 (setq lookat (symbol-name (car xx)))))
154 (cond ((eq state :init)
155 (cond (lookat (cond ((string-equal lookat "thenret")
159 "if*: bad keyword ~a" lookat))))
162 (push (car xx) col))))
165 (cond ((string-equal lookat "else")
168 "if*: multiples elses")))
171 (push `(t ,@col) totalcol))
172 ((string-equal lookat "then")
174 (t (error "if*: bad keyword ~s"
176 (t (push (car xx) col))))
180 "if*: keyword ~s at the wrong place " (car xx)))
181 (t (setq state :compl)
182 (push `(,(car xx) ,@col) totalcol))))
184 (cond ((not (string-equal lookat "elseif"))
185 (error "if*: missing elseif clause ")))
186 (setq state :init))))))
192 (scheme :initarg :scheme :initform nil :accessor uri-scheme)
193 (host :initarg :host :initform nil :accessor uri-host)
194 (port :initarg :port :initform nil :accessor uri-port)
195 (path :initarg :path :initform nil :accessor uri-path)
196 (query :initarg :query :initform nil :accessor uri-query)
197 (fragment :initarg :fragment :initform nil :accessor uri-fragment)
198 (plist :initarg :plist :initform nil :accessor uri-plist)
202 ;; used to prevent unnessary work, looking for chars to escape and
204 :initarg :escaped :initform nil :accessor uri-escaped)
206 ;; the cached printable representation of the URI. It *might* be
207 ;; different than the original string, though, because the user might
208 ;; have escaped non-reserved chars--they won't be escaped when the URI
210 :initarg :string :initform nil :accessor uri-string)
212 ;; the cached parsed representation of the URI path.
213 :initarg :parsed-path
215 :accessor .uri-parsed-path)
217 ;; cached sxhash, so we don't have to compute it more than once.
218 :initarg :hashcode :initform nil :accessor uri-hashcode)))
221 ((nid :initarg :nid :initform nil :accessor urn-nid)
222 (nss :initarg :nss :initform nil :accessor urn-nss)))
224 (eval-when (compile eval)
225 (defmacro clear-caching-on-slot-change (name)
226 `(defmethod (setf ,name) :around (new-value (self uri))
227 (declare (ignore new-value))
228 (prog1 (call-next-method)
229 (setf (uri-string self) nil)
230 ,@(when (eq name 'uri-path) `((setf (.uri-parsed-path self) nil)))
231 (setf (uri-hashcode self) nil))))
234 (clear-caching-on-slot-change uri-scheme)
235 (clear-caching-on-slot-change uri-host)
236 (clear-caching-on-slot-change uri-port)
237 (clear-caching-on-slot-change uri-path)
238 (clear-caching-on-slot-change uri-query)
239 (clear-caching-on-slot-change uri-fragment)
242 (defmethod make-load-form ((self uri) &optional env)
243 (declare (ignore env))
244 `(make-instance ',(class-name (class-of self))
245 :scheme ,(uri-scheme self)
246 :host ,(uri-host self)
247 :port ,(uri-port self)
248 :path ',(uri-path self)
249 :query ,(uri-query self)
250 :fragment ,(uri-fragment self)
251 :plist ',(uri-plist self)
252 :string ,(uri-string self)
253 :parsed-path ',(.uri-parsed-path self)))
255 (defmethod uri-p ((thing uri)) t)
256 (defmethod uri-p ((thing t)) nil)
260 (scheme (when uri (uri-scheme uri)))
261 (host (when uri (uri-host uri)))
262 (port (when uri (uri-port uri)))
263 (path (when uri (uri-path uri)))
265 (when uri (copy-list (.uri-parsed-path uri))))
266 (query (when uri (uri-query uri)))
267 (fragment (when uri (uri-fragment uri)))
268 (plist (when uri (copy-list (uri-plist uri))))
269 (class (when uri (class-of uri)))
270 &aux (escaped (when uri (uri-escaped uri))))
272 then (setf (uri-scheme place) scheme)
273 (setf (uri-host place) host)
274 (setf (uri-port place) port)
275 (setf (uri-path place) path)
276 (setf (.uri-parsed-path place) parsed-path)
277 (setf (uri-query place) query)
278 (setf (uri-fragment place) fragment)
279 (setf (uri-plist place) plist)
280 (setf (uri-escaped place) escaped)
281 (setf (uri-string place) nil)
282 (setf (uri-hashcode place) nil)
284 elseif (eq 'uri class)
285 then ;; allow the compiler to optimize the call to make-instance:
287 :scheme scheme :host host :port port :path path
288 :parsed-path parsed-path
289 :query query :fragment fragment :plist plist
290 :escaped escaped :string nil :hashcode nil)
291 else (make-instance class
292 :scheme scheme :host host :port port :path path
293 :parsed-path parsed-path
294 :query query :fragment fragment :plist plist
295 :escaped escaped :string nil :hashcode nil)))
297 (defmethod uri-parsed-path ((uri uri))
299 (when (null (.uri-parsed-path uri))
300 (setf (.uri-parsed-path uri)
301 (parse-path (uri-path uri) (uri-escaped uri))))
302 (.uri-parsed-path uri)))
304 (defmethod (setf uri-parsed-path) (path-list (uri uri))
305 (assert (and (consp path-list)
306 (or (member (car path-list) '(:absolute :relative)
308 (setf (uri-path uri) (render-parsed-path path-list t))
309 (setf (.uri-parsed-path uri) path-list)
312 (defun uri-authority (uri)
314 (let ((*print-pretty* nil))
315 (format nil "~a~@[:~a~]" (uri-host uri) (uri-port uri)))))
318 (if* (equalp "urn" (uri-scheme uri))
320 else (error "URI is not a URN: ~s." uri)))
323 (if* (equalp "urn" (uri-scheme uri))
325 else (error "URI is not a URN: ~s." uri)))
327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
330 (defparameter *excluded-characters*
331 '(;; `delims' (except #\%, because it's handled specially):
332 #\< #\> #\" #\space #\#
334 #\{ #\} #\| #\\ #\^ #\[ #\] #\`))
336 (defun reserved-char-vector (chars &key except)
337 (do* ((a (make-array 127 :element-type 'bit :initial-element 0))
338 (chars chars (cdr chars))
339 (c (car chars) (car chars)))
341 (if* (and except (member c except :test #'char=))
343 else (setf (sbit a (char-int c)) 1))))
345 (defparameter *reserved-characters*
346 (reserved-char-vector
347 (append *excluded-characters*
348 '(#\; #\/ #\? #\: #\@ #\& #\= #\+ #\$ #\, #\%))))
349 (defparameter *reserved-authority-characters*
350 (reserved-char-vector
351 (append *excluded-characters* '(#\; #\/ #\? #\: #\@))))
352 (defparameter *reserved-path-characters*
353 (reserved-char-vector
354 (append *excluded-characters*
356 ;;;;The rfc says this should be here, but it doesn't make sense.
359 (defparameter *reserved-path-characters2*
360 ;; These are the same characters that are in
361 ;; *reserved-path-characters*, minus #\/. Why? Because the parsed
362 ;; representation of the path can contain the %2f converted into a /.
363 ;; That's the whole point of having the parsed representation, so that
364 ;; lisp programs can deal with the path element data in the most
366 (reserved-char-vector
367 (append *excluded-characters*
369 ;;;;The rfc says this should be here, but it doesn't make sense.
372 (defparameter *reserved-fragment-characters*
373 (reserved-char-vector (remove #\# *excluded-characters*)))
375 (eval-when (compile eval)
376 (defun gen-char-range-list (start end)
378 (endcode (1+ (char-int end)))
379 (chcode (char-int start)
383 ;; - has to be first, otherwise it signifies a range!
385 then (setq res (nreverse res))
388 else (nreverse res)))
389 (if* (= #.(char-int #\-) chcode)
391 else (push (code-char chcode) res))))
394 (defparameter *valid-nid-characters*
395 (reserved-char-vector
396 '#.(nconc (gen-char-range-list #\a #\z)
397 (gen-char-range-list #\A #\Z)
398 (gen-char-range-list #\0 #\9)
400 (defparameter *reserved-nss-characters*
401 (reserved-char-vector
402 (append *excluded-characters* '(#\& #\~ #\/ #\?))))
404 (defparameter *illegal-characters*
405 (reserved-char-vector (remove #\# *excluded-characters*)))
406 (defparameter *strict-illegal-query-characters*
407 (reserved-char-vector (append '(#\?) (remove #\# *excluded-characters*))))
408 (defparameter *illegal-query-characters*
409 (reserved-char-vector
410 *excluded-characters* :except '(#\^ #\| #\#)))
413 (defun parse-uri (thing &key (class 'uri) &aux escape)
414 (when (uri-p thing) (return-from parse-uri thing))
416 (setq escape (escape-p thing))
417 (multiple-value-bind (scheme host port path query fragment)
418 (parse-uri-string thing)
422 (case *current-case-mode*
423 ((:case-insensitive-upper :case-sensitive-upper)
425 ((:case-insensitive-lower :case-sensitive-lower)
427 (decode-escaped-encoding scheme escape))
428 (find-package :keyword))))
430 (when (and scheme (eq :urn scheme))
431 (return-from parse-uri
432 (make-instance 'urn :scheme scheme :nid host :nss path)))
434 (when host (setq host (decode-escaped-encoding host escape)))
436 (setq port (read-from-string port))
437 (when (not (numberp port)) (error "port is not a number: ~s." port))
438 (when (not (plusp port))
439 (error "port is not a positive integer: ~d." port))
440 (when (eql port (case scheme
446 (when (or (string= "" path)
447 (and ;; we canonicalize away a reference to just /:
449 (member scheme '(:http :https :ftp) :test #'eq)
454 (decode-escaped-encoding path escape *reserved-path-characters*)))
455 (when query (setq query (decode-escaped-encoding query escape)))
458 (decode-escaped-encoding fragment escape
459 *reserved-fragment-characters*)))
461 then ;; allow the compiler to optimize the make-instance call:
470 else ;; do it the slow way:
480 (defmethod uri ((thing uri))
483 (defmethod uri ((thing string))
486 (defmethod uri ((thing t))
487 (error "Cannot coerce ~s to a uri." thing))
489 (defvar *strict-parse* t)
491 (defun parse-uri-string (string &aux (illegal-chars *illegal-characters*))
492 (declare (optimize (speed 3)))
493 ;; Speed is important, so use a specialized state machine instead of
494 ;; regular expressions for parsing the URI string. The regexp we are
503 (end (length string))
508 (path-components '())
511 ;; namespace identifier, for urn parsing only:
513 (declare (fixnum state start end))
514 (flet ((read-token (kind &optional legal-chars)
518 else (let ((sindex start)
521 (declare (fixnum sindex))
524 (when (>= start end) (return nil))
525 (setq c (schar string start))
526 (let ((ci (char-int c)))
528 then (if* (and (eq :colon kind) (eq c #\:))
530 elseif (= 0 (sbit legal-chars ci))
533 URI ~s contains illegal character ~s at position ~d."
535 elseif (and (< ci 128)
537 (= 1 (sbit illegal-chars ci)))
538 then (.parse-error "~
539 URI ~s contains illegal character ~s at position ~d."
543 (#\? (return :question))
544 (#\# (return :hash))))
545 (:query (case c (#\# (return :hash))))
548 (#\: (return :colon))
549 (#\? (return :question))
551 (#\/ (return :slash)))))
553 (if* (> start sindex)
554 then ;; we found some chars
555 ;; before we stopped the parse
556 (setq tokval (subseq string sindex start))
558 else ;; immediately stopped at a special char
561 (failure (&optional why)
562 (.parse-error "illegal URI: ~s [~d]~@[: ~a~]"
565 (.parse-error "impossible state: ~d [~s]" state string)))
568 (0 ;; starting to parse
569 (ecase (read-token t)
571 (:question (setq state 7))
572 (:hash (setq state 8))
573 (:slash (setq state 3))
574 (:string (setq state 1))
575 (:end (setq state 9))))
576 (1 ;; seen <token><special char>
577 (let ((token tokval))
578 (ecase (read-token t)
579 (:colon (setq scheme token)
580 (if* (equalp "urn" scheme)
582 else (setq state 2)))
583 (:question (push token path-components)
585 (:hash (push token path-components)
587 (:slash (push token path-components)
588 (push "/" path-components)
591 (:end (push token path-components)
594 (ecase (read-token t)
596 (:question (setq state 7))
597 (:hash (setq state 8))
598 (:slash (setq state 3))
599 (:string (setq state 10))
600 (:end (setq state 9))))
601 (10 ;; seen <scheme>:<token>
602 (let ((token tokval))
603 (ecase (read-token t)
605 (:question (push token path-components)
607 (:hash (push token path-components)
609 (:slash (push token path-components)
612 (:end (push token path-components)
614 (3 ;; seen / or <scheme>:/
615 (ecase (read-token t)
617 (:question (push "/" path-components)
619 (:hash (push "/" path-components)
621 (:slash (setq state 4))
622 (:string (push "/" path-components)
623 (push tokval path-components)
625 (:end (push "/" path-components)
627 (4 ;; seen [<scheme>:]//
628 (ecase (read-token t)
630 (:question (failure))
633 (:string (setq host tokval)
636 (11 ;; seen [<scheme>:]//<host>
637 (ecase (read-token t)
638 (:colon (setq state 5))
639 (:question (setq state 7))
640 (:hash (setq state 8))
641 (:slash (push "/" path-components)
643 (:string (impossible))
644 (:end (setq state 9))))
645 (5 ;; seen [<scheme>:]//<host>:
646 (ecase (read-token t)
648 (:question (failure))
650 (:slash (push "/" path-components)
652 (:string (setq port tokval)
655 (12 ;; seen [<scheme>:]//<host>:[<port>]
656 (ecase (read-token t)
658 (:question (setq state 7))
659 (:hash (setq state 8))
660 (:slash (push "/" path-components)
662 (:string (impossible))
663 (:end (setq state 9))))
665 (ecase (read-token :path)
666 (:question (setq state 7))
667 (:hash (setq state 8))
668 (:string (push tokval path-components)
670 (:end (setq state 9))))
672 (ecase (read-token :path)
673 (:question (setq state 7))
674 (:hash (setq state 8))
675 (:string (impossible))
676 (:end (setq state 9))))
680 then *strict-illegal-query-characters*
681 else *illegal-query-characters*))
682 (ecase (prog1 (read-token :query)
683 (setq illegal-chars *illegal-characters*))
684 (:hash (setq state 8))
685 (:string (setq query tokval)
687 (:end (setq state 9))))
689 (ecase (read-token :query)
690 (:hash (setq state 8))
691 (:string (impossible))
692 (:end (setq state 9))))
694 (ecase (read-token :rest)
695 (:string (setq fragment tokval)
697 (:end (setq state 9))))
702 (apply #'concatenate 'simple-string (nreverse path-components))
705 (15 ;; seen urn:, read nid now
706 (case (read-token :colon *valid-nid-characters*)
707 (:string (setq nid tokval)
709 (t (failure "missing namespace identifier"))))
710 (16 ;; seen urn:<nid>
712 (:colon (setq state 17))
713 (t (failure "missing namespace specific string"))))
714 (17 ;; seen urn:<nid>:, rest is nss
715 (return (values scheme
719 (setq illegal-chars *reserved-nss-characters*)
723 "internal error in parse engine, wrong state: ~s." state)))))))
725 (defun escape-p (string)
726 (declare (optimize (speed 3)))
728 (max (the fixnum (length string))))
730 (declare (fixnum i max))
731 (when (char= #\% (schar string i))
734 (defun parse-path (path-string escape)
735 (do* ((xpath-list (delimited-string-to-list path-string #\/))
738 (if* (string= "" (car xpath-list))
739 then (setf (car xpath-list) :absolute)
740 else (push :relative xpath-list))
742 (pl (cdr path-list) (cdr pl))
744 ((null pl) path-list)
745 (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;)))
746 then ;; there is a param
747 ;;; (setf (car pl) segments)
749 (mapcar #'(lambda (s)
750 (decode-escaped-encoding
751 s escape *reserved-path-characters2*))
754 ;;; (setf (car pl) (car segments))
756 (decode-escaped-encoding
757 (car segments) escape *reserved-path-characters2*)))))
759 (defun decode-escaped-encoding (string escape
760 &optional (reserved-chars
761 *reserved-characters*))
762 ;; Return a string with the real characters.
763 (when (null escape) (return-from decode-escaped-encoding string))
765 (max (length string))
766 (new-string (copy-seq string))
770 (shrink-vector new-string new-i))
771 (if* (char= #\% (setq ch (schar string i)))
772 then (when (> (+ i 3) max)
774 "Unsyntactic escaped encoding in ~s." string))
775 (setq ch (schar string (incf i)))
776 (setq ch2 (schar string (incf i)))
777 (when (not (and (setq chc (digit-char-p ch 16))
778 (setq chc2 (digit-char-p ch2 16))))
780 "Non-hexidecimal digits after %: %c%c." ch ch2))
781 (let ((ci (+ (* 16 chc) chc2)))
782 (if* (or (null reserved-chars)
783 (= 0 (sbit reserved-chars ci)))
785 (setf (schar new-string new-i)
787 else (setf (schar new-string new-i) #\%)
788 (setf (schar new-string (incf new-i)) ch)
789 (setf (schar new-string (incf new-i)) ch2)))
790 else (setf (schar new-string new-i) ch))))
792 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
795 (defun render-uri (uri stream
796 &aux (escape (uri-escaped uri))
797 (*print-pretty* nil))
798 (when (null (uri-string uri))
799 (setf (uri-string uri)
800 (let ((scheme (uri-scheme uri))
801 (host (uri-host uri))
802 (port (uri-port uri))
803 (path (uri-path uri))
804 (query (uri-query uri))
805 (fragment (uri-fragment uri)))
806 (concatenate 'simple-string
808 (encode-escaped-encoding
809 (string-downcase ;; for upper case lisps
810 (symbol-name scheme))
811 *reserved-characters* escape))
815 (encode-escaped-encoding
816 host *reserved-authority-characters* escape))
819 ;;;; too slow until ACL 6.0:
820 ;;; (format nil "~d" port)
821 ;;; (princ-to-string port)
822 #-allegro (princ-to-string port)
824 (with-output-to-string (s)
825 (excl::maybe-print-fast s port))
828 (encode-escaped-encoding path
830 ;;*reserved-path-characters*
833 (when query (encode-escaped-encoding query nil escape))
835 (when fragment (encode-escaped-encoding fragment nil escape))))))
837 then (format stream "~a" (uri-string uri))
838 else (uri-string uri)))
840 (defun render-parsed-path (path-list escape)
842 (first (car path-list))
843 (pl (cdr path-list) (cdr pl))
844 (pe (car pl) (car pl)))
846 (when res (apply #'concatenate 'simple-string (nreverse res))))
847 (when (or (null first)
848 (prog1 (eq :absolute first)
853 (encode-escaped-encoding pe *reserved-path-characters* escape)
855 else ;; contains params
856 (push (encode-escaped-encoding
857 (car pe) *reserved-path-characters* escape)
859 (dolist (item (cdr pe))
861 (push (encode-escaped-encoding
862 item *reserved-path-characters* escape)
865 (defun render-urn (urn stream
866 &aux (*print-pretty* nil))
867 (when (null (uri-string urn))
868 (setf (uri-string urn)
869 (let ((nid (urn-nid urn))
871 (concatenate 'simple-string "urn:" nid ":" nss))))
873 then (format stream "~a" (uri-string urn))
874 else (uri-string urn)))
876 (defparameter *escaped-encoding*
877 (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\a #\b #\c #\d #\e #\f))
879 (defun encode-escaped-encoding (string reserved-chars escape)
880 (when (null escape) (return-from encode-escaped-encoding string))
881 ;; Make a string as big as it possibly needs to be (3 times the original
882 ;; size), and truncate it at the end.
883 (do* ((max (length string))
884 (new-max (* 3 max)) ;; worst case new size
885 (new-string (make-string new-max))
890 (shrink-vector new-string (incf new-i)))
891 (setq ci (char-int (setq c (schar string i))))
892 (if* (or (null reserved-chars)
894 (= 0 (sbit reserved-chars ci)))
897 (setf (schar new-string new-i) c)
898 else ;; need to escape it
899 (multiple-value-bind (q r) (truncate ci 16)
900 (setf (schar new-string (incf new-i)) #\%)
901 (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q))
902 (setf (schar new-string (incf new-i))
903 (elt *escaped-encoding* r))))))
905 (defmethod print-object ((uri uri) stream)
907 then (format stream "#<~a ~a>" 'uri (render-uri uri nil))
908 else (render-uri uri stream)))
910 (defmethod print-object ((urn urn) stream)
912 then (format stream "#<~a ~a>" 'uri (render-urn urn nil))
913 else (render-urn urn stream)))
915 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
916 ;; merging and unmerging
918 (defmethod merge-uris ((uri string) (base string) &optional place)
919 (merge-uris (parse-uri uri) (parse-uri base) place))
921 (defmethod merge-uris ((uri uri) (base string) &optional place)
922 (merge-uris uri (parse-uri base) place))
924 (defmethod merge-uris ((uri string) (base uri) &optional place)
925 (merge-uris (parse-uri uri) base place))
927 (defmethod merge-uris ((uri uri) (base uri) &optional place)
928 ;; The following is from
929 ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt
930 ;; and is algorithm we use to merge URIs.
932 ;; For more information, see section 5.2 of the RFC.
936 (when (and (null (uri-parsed-path uri))
937 (null (uri-scheme uri))
938 (null (uri-host uri))
939 (null (uri-port uri))
940 (null (uri-query uri)))
941 (return-from merge-uris
942 (let ((new (copy-uri base :place place)))
943 (when (uri-query uri)
944 (setf (uri-query new) (uri-query uri)))
945 (when (uri-fragment uri)
946 (setf (uri-fragment new) (uri-fragment uri)))
949 (setq uri (copy-uri uri :place place))
952 (when (uri-scheme uri)
953 (return-from merge-uris uri))
954 (setf (uri-scheme uri) (uri-scheme base))
957 (when (uri-host uri) (go :done))
958 (setf (uri-host uri) (uri-host base))
959 (setf (uri-port uri) (uri-port base))
962 (let ((p (uri-parsed-path uri)))
963 (when (and p (eq :absolute (car p)))
964 (when (equal '(:absolute "") p)
965 ;; Canonicalize the way parsing does:
966 (setf (uri-path uri) nil))
971 (or (uri-parsed-path base)
972 ;; needed because we canonicalize away a path of just `/':
974 (path (uri-parsed-path uri))
976 (when (not (eq :absolute (car base-path)))
977 (error "Cannot merge ~a and ~a, since latter is not absolute."
982 (append (butlast base-path)
983 (if* path then (cdr path) else '(""))))
986 (let ((last (last new-path-list)))
987 (if* (atom (car last))
988 then (when (string= "." (car last))
989 (setf (car last) ""))
990 else (when (string= "." (caar last))
991 (setf (caar last) ""))))
993 (delete "." new-path-list :test #'(lambda (a b)
999 (let ((npl (cdr new-path-list))
1002 (string= ".." (let ((l (car (last npl))))
1009 :test #'(lambda (a b)
1014 (when (null index) (return))
1016 ;; The RFC says, in 6g, "that the implementation may handle
1017 ;; this error by retaining these components in the resolved
1018 ;; path, by removing them from the resolved path, or by
1019 ;; avoiding traversal of the reference." The examples in C.2
1020 ;; imply that we should do the first thing (retain them), so
1021 ;; that's what we'll do.
1024 then (setq npl (cddr npl))
1026 (dotimes (x (- index 2)) (setq tmp (cdr tmp)))
1027 (setf (cdr tmp) (cdddr tmp))))
1028 (setf (cdr new-path-list) npl)
1029 (when fix-tail (setq new-path-list (nconc new-path-list '("")))))
1032 ;; don't complain if new-path-list starts with `..'. See comment
1033 ;; above about this step.
1036 (when (or (equal '(:absolute "") new-path-list)
1037 (equal '(:absolute) new-path-list))
1038 (setq new-path-list nil))
1039 (setf (uri-path uri)
1040 (render-parsed-path new-path-list
1041 ;; don't know, so have to assume:
1046 (return-from merge-uris uri)))
1048 (defmethod enough-uri ((uri string) (base string) &optional place)
1049 (enough-uri (parse-uri uri) (parse-uri base) place))
1051 (defmethod enough-uri ((uri uri) (base string) &optional place)
1052 (enough-uri uri (parse-uri base) place))
1054 (defmethod enough-uri ((uri string) (base uri) &optional place)
1055 (enough-uri (parse-uri uri) base place))
1057 (defmethod enough-uri ((uri uri) (base uri) &optional place)
1058 (let ((new-scheme nil)
1061 (new-parsed-path nil))
1063 (when (or (and (uri-scheme uri)
1064 (not (equalp (uri-scheme uri) (uri-scheme base))))
1066 (not (equalp (uri-host uri) (uri-host base))))
1067 (not (equalp (uri-port uri) (uri-port base))))
1068 (return-from enough-uri uri))
1070 (when (null (uri-host uri))
1071 (setq new-host (uri-host base)))
1072 (when (null (uri-port uri))
1073 (setq new-port (uri-port base)))
1075 (when (null (uri-scheme uri))
1076 (setq new-scheme (uri-scheme base)))
1078 ;; Now, for the hard one, path.
1079 ;; We essentially do here what enough-namestring does.
1080 (do* ((base-path (uri-parsed-path base))
1081 (path (uri-parsed-path uri))
1082 (bp base-path (cdr bp))
1084 ((or (null bp) (null p))
1085 ;; If p is nil, that means we have something like
1086 ;; (enough-uri "/foo/bar" "/foo/bar/baz.htm"), so
1087 ;; new-parsed-path will be nil.
1089 (setq new-parsed-path (copy-list p))
1090 (when (not (symbolp (car new-parsed-path)))
1091 (push :relative new-parsed-path))))
1092 (if* (equal (car bp) (car p))
1094 else (setq new-parsed-path (copy-list p))
1095 (when (not (symbolp (car new-parsed-path)))
1096 (push :relative new-parsed-path))
1100 (when new-parsed-path
1101 (render-parsed-path new-parsed-path
1102 ;; don't know, so have to assume:
1104 (new-query (uri-query uri))
1105 (new-fragment (uri-fragment uri))
1106 (new-plist (copy-list (uri-plist uri))))
1107 (if* (and (null new-scheme)
1111 (null new-parsed-path)
1113 (null new-fragment))
1114 then ;; can't have a completely empty uri!
1116 :class (class-of uri)
1121 :class (class-of uri)
1127 :parsed-path new-parsed-path
1129 :fragment new-fragment
1130 :plist new-plist)))))
1132 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1133 ;; support for interning URIs
1135 (defun make-uri-space (&rest keys &key (size 777) &allow-other-keys)
1137 (apply #'make-hash-table :size size
1138 :hash-function 'uri-hash
1139 :test 'uri= :values nil keys)
1141 (apply #'make-hash-table :size size keys))
1143 (defun gethash-uri (uri table)
1144 #+allegro (gethash uri table)
1146 (let* ((hash (uri-hash uri))
1147 (existing (gethash hash table)))
1148 (dolist (u existing)
1150 (return-from gethash-uri (values u t))))
1153 (defun puthash-uri (uri table)
1154 #+allegro (excl:puthash-key uri table)
1156 (let ((existing (gethash (uri-hash uri) table)))
1157 (dolist (u existing)
1159 (return-from puthash-uri u)))
1160 (setf (gethash (uri-hash uri) table)
1161 (cons uri existing))
1165 (defun uri-hash (uri)
1166 (if* (uri-hashcode uri)
1168 else (setf (uri-hashcode uri)
1171 (render-uri uri nil)
1174 (render-uri uri nil))))))
1176 (defvar *uris* (make-uri-space))
1178 (defun uri-space () *uris*)
1180 (defun (setf uri-space) (new-val)
1181 (setq *uris* new-val))
1183 ;; bootstrapping (uri= changed from function to method):
1184 (when (fboundp 'uri=) (fmakunbound 'uri=))
1186 (defmethod uri= ((uri1 uri) (uri2 uri))
1187 (when (not (eq (uri-scheme uri1) (uri-scheme uri2)))
1188 (return-from uri= nil))
1189 ;; RFC2396 says: a URL with an explicit ":port", where the port is
1190 ;; the default for the scheme, is the equivalent to one where the
1191 ;; port is elided. Hmmmm. This means that this function has to be
1192 ;; scheme dependent. Grrrr.
1193 (let ((default-port (case (uri-scheme uri1)
1198 (and (equalp (uri-host uri1) (uri-host uri2))
1199 (eql (or (uri-port uri1) default-port)
1200 (or (uri-port uri2) default-port))
1201 (string= (uri-path uri1) (uri-path uri2))
1202 (string= (uri-query uri1) (uri-query uri2))
1203 (string= (uri-fragment uri1) (uri-fragment uri2)))))
1205 (defmethod uri= ((urn1 urn) (urn2 urn))
1206 (when (not (eq (uri-scheme urn1) (uri-scheme urn2)))
1207 (return-from uri= nil))
1208 (and (equalp (urn-nid urn1) (urn-nid urn2))
1209 (urn-nss-equal (urn-nss urn1) (urn-nss urn2))))
1211 (defun urn-nss-equal (nss1 nss2 &aux len)
1212 ;; Return t iff the nss values are the same.
1213 ;; %2c and %2C are equivalent.
1214 (when (or (null nss1) (null nss2)
1215 (not (= (setq len (length nss1))
1217 (return-from urn-nss-equal nil))
1222 (setq c1 (schar nss1 i))
1223 (setq c2 (schar nss2 i))
1226 (if* (and (char= #\% c1) (char= #\% c2))
1227 then (setq state :percent+1)
1228 elseif (char/= c1 c2)
1231 (when (char-not-equal c1 c2) (return nil))
1232 (setq state :percent+2))
1234 (when (char-not-equal c1 c2) (return nil))
1235 (setq state :char)))))
1237 (defmethod intern-uri ((xuri uri) &optional (uri-space *uris*))
1238 (let ((uri (gethash-uri xuri uri-space)))
1241 else (puthash-uri xuri uri-space))))
1243 (defmethod intern-uri ((uri string) &optional (uri-space *uris*))
1244 (intern-uri (parse-uri uri) uri-space))
1246 (defun unintern-uri (uri &optional (uri-space *uris*))
1248 then (clrhash uri-space)
1250 then (remhash uri uri-space)
1251 else (error "bad uri: ~s." uri)))
1253 (defmacro do-all-uris ((var &optional uri-space result-form)
1256 "do-all-uris (var [[uri-space] result-form])
1257 {declaration}* {tag | statement}*
1258 Executes the forms once for each uri with var bound to the current uri"
1261 (g-uri-space (gensym))
1262 (body #+allegro (third (excl::parse-body forms env))
1264 `(let ((,g-uri-space (or ,uri-space *uris*)))
1266 (flet ((,f (,var &optional ,g-ignore)
1267 (declare (ignore-if-unused ,var ,g-ignore))
1269 (maphash #',f ,g-uri-space))
1270 (return ,result-form)))))
1272 (defun sharp-u (stream chr arg)
1273 (declare (ignore chr arg))
1274 (let ((arg (read stream nil nil t)))
1278 then (parse-uri arg)
1281 (internal-reader-error
1283 "#u takes a string or list argument: ~s" arg)))))
1289 (locally (declare (special std-lisp-readtable))
1290 (let ((*readtable* std-lisp-readtable))
1291 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)))
1293 (set-dispatch-macro-character #\# #\u #'puri::sharp-u)
1295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1299 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1301 ;; (don't run under emacs with M-x fi:common-lisp)
1304 (defun time-uri-module ()
1305 (declare (optimize (speed 3) (safety 0) (debug 0)))
1306 (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")
1307 (uri2 "http://www.franz.com/a/b;x;y;z/c/%2ffoo?bar=baz&xxx#foo"))
1308 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1309 (format t "~&;;; starting timing testing 1...~%")
1310 (time (dotimes (i 100000) (parse-uri uri)))
1312 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1313 (format t "~&;;; starting timing testing 2...~%")
1314 (let ((uri (parse-uri uri)))
1315 (time (dotimes (i 100000)
1316 ;; forces no caching of the printed representation:
1317 (setf (uri-string uri) nil)
1318 (format nil "~a" uri))))
1320 (gc t) (gc :tenure) (gc :tenure) (gc :tenure)
1321 (format t "~&;;; starting timing testing 3...~%")
1324 (dotimes (i 100000) (parse-uri uri2))
1325 (let ((uri (parse-uri uri)))
1327 ;; forces no caching of the printed representation:
1328 (setf (uri-string uri) nil)
1329 (format nil "~a" uri)))))))
1331 ;;******** reference output (ultra, modified 5.0.1):
1332 ;;; starting timing testing 1...
1333 ; cpu time (non-gc) 13,710 msec user, 0 msec system
1334 ; cpu time (gc) 600 msec user, 10 msec system
1335 ; cpu time (total) 14,310 msec user, 10 msec system
1336 ; real time 14,465 msec
1338 ; 1,804,261 cons cells, 7 symbols, 41,628,832 other bytes, 0 static bytes
1339 ;;; starting timing testing 2...
1340 ; cpu time (non-gc) 27,500 msec user, 0 msec system
1341 ; cpu time (gc) 280 msec user, 20 msec system
1342 ; cpu time (total) 27,780 msec user, 20 msec system
1343 ; real time 27,897 msec
1345 ; 1,900,463 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1346 ;;; starting timing testing 3...
1347 ; cpu time (non-gc) 52,290 msec user, 10 msec system
1348 ; cpu time (gc) 1,290 msec user, 30 msec system
1349 ; cpu time (total) 53,580 msec user, 40 msec system
1350 ; real time 54,062 msec
1352 ; 7,800,205 cons cells, 0 symbols, 81,697,496 other bytes, 0 static bytes
1354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1355 ;;; after improving decode-escaped-encoding/encode-escaped-encoding:
1357 ;;; starting timing testing 1...
1358 ; cpu time (non-gc) 14,520 msec user, 0 msec system
1359 ; cpu time (gc) 400 msec user, 0 msec system
1360 ; cpu time (total) 14,920 msec user, 0 msec system
1361 ; real time 15,082 msec
1363 ; 1,800,270 cons cells, 0 symbols, 41,600,160 other bytes, 0 static bytes
1364 ;;; starting timing testing 2...
1365 ; cpu time (non-gc) 27,490 msec user, 10 msec system
1366 ; cpu time (gc) 300 msec user, 0 msec system
1367 ; cpu time (total) 27,790 msec user, 10 msec system
1368 ; real time 28,025 msec
1370 ; 1,900,436 cons cells, 0 symbols, 17,693,712 other bytes, 0 static bytes
1371 ;;; starting timing testing 3...
1372 ; cpu time (non-gc) 47,900 msec user, 20 msec system
1373 ; cpu time (gc) 920 msec user, 10 msec system
1374 ; cpu time (total) 48,820 msec user, 30 msec system
1375 ; real time 49,188 msec
1377 ; 3,700,215 cons cells, 0 symbols, 81,707,144 other bytes, 0 static bytes