X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;f=src.lisp;h=e6c1eda9e2b0d02686046eb064d8c40c161aa87f;hb=2b87997148945852b9d2b25e71a127b152e5132e;hp=22e34f1bee2dbd68c6c333a0f703a679db97f53a;hpb=a3cc7bca5ac50360113b3a51aa45926a28f4d4d3;p=puri.git diff --git a/src.lisp b/src.lisp index 22e34f1..e6c1eda 100644 --- a/src.lisp +++ b/src.lisp @@ -1,31 +1,32 @@ ;; -*- mode: common-lisp; package: puri -*- -;; Support for URIs in Allegro. +;; Support for URIs ;; For general URI information see RFC2396. ;; -;; copyright (c) 1999-2001 Franz Inc, Berkeley, CA - All rights reserved. -;; copyright (c) 2003 Kevin Rosenberg (porting changes) +;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved. +;; copyright (c) 2002-2005 Franz Inc, Oakland, CA - All rights reserved. +;; copyright (c) 2003-2006 Kevin Rosenberg (porting changes) ;; -;; The software, data and information contained herein are proprietary -;; to, and comprise valuable trade secrets of, Franz, Inc. They are -;; given in confidence by Franz, Inc. pursuant to a written license -;; agreement, and may be stored and used only in accordance with the terms -;; of such license. +;; This code is free software; you can redistribute it and/or +;; modify it under the terms of the version 2.1 of +;; the GNU Lesser General Public License as published by +;; the Free Software Foundation, as clarified by the +;; preamble found here: +;; http://opensource.franz.com/preamble.html ;; -;; Restricted Rights Legend -;; ------------------------ -;; Use, duplication, and disclosure of the software, data and information -;; contained herein by any agency, department or entity of the U.S. -;; Government are subject to restrictions of Restricted Rights for -;; Commercial Software developed at private expense as specified in -;; DOD FAR Supplement 52.227-7013 (c) (1) (ii), as applicable. -;; -;; Original version from ACL 6.1: +;; Versions ported from Franz's opensource release ;; uri.cl,v 2.3.6.4.2.1 2001/08/09 17:42:39 layer +;; uri.cl,v 2.9.84.1 2005/08/11 18:38:52 layer + +;; This code is distributed in the hope that it will be useful, +;; but without any warranty; without even the implied warranty of +;; merchantability or fitness for a particular purpose. See the GNU +;; Lesser General Public License for more details. ;; -;; $Id: src.lisp,v 1.2 2003/07/18 20:51:37 kevin Exp $ +;; $Id$ (defpackage #:puri (:use #:cl) + #-allegro (:nicknames #:net.uri) (:export #:uri ; the type and a function #:uri-p @@ -55,51 +56,87 @@ #:uri= #:intern-uri #:unintern-uri - #:do-all-uris)) + #:do-all-uris + + #:uri-parse-error ;; Added by KMR + )) (in-package #:puri) -(eval-when (compile) (declaim (optimize (speed 3)))) +(eval-when (:compile-toplevel) (declaim (optimize (speed 3)))) -(eval-when (:compile-toplevel :load-toplevel :execute) - (defvar if*-keyword-list '("then" "thenret" "else" "elseif"))) #-allegro -(define-condition parse-error (error) - () - ) +(defun parse-body (forms &optional env) + "Parses a body, returns (VALUES docstring declarations forms)" + (declare (ignore env)) + ;; fixme -- need to add parsing of multiple declarations + (let (docstring declarations) + (when (stringp (car forms)) + (setq docstring (car forms)) + (setq forms (cdr forms))) + (when (and (listp (car forms)) + (symbolp (caar forms)) + (string-equal (symbol-name '#:declare) + (symbol-name (caar forms)))) + (setq declarations (car forms)) + (setq forms (cdr forms))) + (values docstring declarations forms))) + +(defun shrink-vector (str size) + #+allegro + (excl::.primcall 'sys::shrink-svector str size) + #+sbcl + (setq str (sb-kernel:shrink-vector str size)) + #+cmu + (lisp::shrink-vector str size) + #+lispworks + (system::shrink-vector$vector str size) + #+scl + (common-lisp::shrink-vector str size) + #-(or allegro cmu lispworks sbcl scl) + (setq str (subseq str 0 size)) + str) + + +;; KMR: Added new condition to handle cross-implementation variances +;; in the parse-error condition many implementations define + +(define-condition uri-parse-error (parse-error) + ((fmt-control :initarg :fmt-control :accessor fmt-control) + (fmt-arguments :initarg :fmt-arguments :accessor fmt-arguments )) + (:report (lambda (c stream) + (format stream "Parse error:") + (apply #'format stream (fmt-control c) (fmt-arguments c))))) (defun .parse-error (fmt &rest args) - #+allegro (apply #'excl::.parse-error fmt args) - #-allegro (error - (make-condition 'parse-error :format-control fmt - :format-arguments args))) + (error 'uri-parse-error :fmt-control fmt :fmt-arguments args)) +#-allegro (defun internal-reader-error (stream fmt &rest args) - #+allegro - (apply #'excl::internal-reader-error stream fmt args) - #-allegro - (apply #'format stream - "#u takes a string or list argument: ~s" args)) + (apply #'format stream fmt args)) #-allegro (defvar *current-case-mode* :case-insensitive-upper) +#+allegro (eval-when (:compile-toplevel :load-toplevel :execute) + (import '(excl:*current-case-mode* + excl:delimited-string-to-list + excl::parse-body + excl::internal-reader-error + excl:if*))) -;; From Larry Hunter with modifications -(defun position-char (char string start max) +#-allegro +(defmethod position-char (char (string string) start max) (declare (optimize (speed 3) (safety 0) (space 0)) - (fixnum start max) (simple-string string)) + (fixnum start max) (string string)) (do* ((i start (1+ i))) ((= i max) nil) (declare (fixnum i)) - (when (char= char (schar string i)) (return i)))) - -#+allegro -(defun delimited-string-to-list (string &optional (separator #\space)) - (excl:delimited-string-to-list string)) + (when (char= char (char string i)) (return i)))) +#-allegro (defun delimited-string-to-list (string &optional (separator #\space) - skip-terminal) + skip-terminal) (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)) (type string string) @@ -112,30 +149,34 @@ ((null end) (if (< pos len) (push (subseq string pos) output) - (when (or (not skip-terminal) (zerop len)) - (push "" output))) - (nreverse output)) + (when (and (plusp len) (not skip-terminal)) + (push "" output))) + (nreverse output)) (declare (type fixnum pos len) (type (or null fixnum) end)) (push (subseq string pos end) output) (setq pos (1+ end)))) - -(defmacro if* (&rest args) - (do ((xx (reverse args) (cdr xx)) - (state :init) - (elseseen nil) - (totalcol nil) + +#-allegro +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar if*-keyword-list '("then" "thenret" "else" "elseif")) + + (defmacro if* (&rest args) + (do ((xx (reverse args) (cdr xx)) + (state :init) + (elseseen nil) + (totalcol nil) (lookat nil nil) - (col nil)) - ((null xx) - (cond ((eq state :compl) - `(cond ,@totalcol)) - (t (error "if*: illegal form ~s" args)))) - (cond ((and (symbolp (car xx)) - (member (symbol-name (car xx)) - if*-keyword-list - :test #'string-equal)) - (setq lookat (symbol-name (car xx))))) + (col nil)) + ((null xx) + (cond ((eq state :compl) + `(cond ,@totalcol)) + (t (error "if*: illegal form ~s" args)))) + (cond ((and (symbolp (car xx)) + (member (symbol-name (car xx)) + if*-keyword-list + :test #'string-equal)) + (setq lookat (symbol-name (car xx))))) (cond ((eq state :init) (cond (lookat (cond ((string-equal lookat "thenret") @@ -169,7 +210,7 @@ ((eq state :compl) (cond ((not (string-equal lookat "elseif")) (error "if*: missing elseif clause "))) - (setq state :init))))) + (setq state :init)))))) (defclass uri () @@ -207,7 +248,7 @@ ((nid :initarg :nid :initform nil :accessor urn-nid) (nss :initarg :nss :initform nil :accessor urn-nss))) -(eval-when (compile eval) +(eval-when (:compile-toplevel :execute) (defmacro clear-caching-on-slot-change (name) `(defmethod (setf ,name) :around (new-value (self uri)) (declare (ignore new-value)) @@ -342,23 +383,11 @@ ;;;;The rfc says this should be here, but it doesn't make sense. ;; #\= #\/ #\?)))) -(defparameter *reserved-path-characters2* - ;; These are the same characters that are in - ;; *reserved-path-characters*, minus #\/. Why? Because the parsed - ;; representation of the path can contain the %2f converted into a /. - ;; That's the whole point of having the parsed representation, so that - ;; lisp programs can deal with the path element data in the most - ;; convenient form. - (reserved-char-vector - (append *excluded-characters* - '(#\; -;;;;The rfc says this should be here, but it doesn't make sense. - ;; #\= - #\?)))) + (defparameter *reserved-fragment-characters* (reserved-char-vector (remove #\# *excluded-characters*))) -(eval-when (compile eval) +(eval-when (:compile-toplevel :execute) (defun gen-char-range-list (start end) (do* ((res '()) (endcode (1+ (char-int end))) @@ -508,7 +537,7 @@ (setq res (loop (when (>= start end) (return nil)) - (setq c (schar string start)) + (setq c (char string start)) (let ((ci (char-int c))) (if* legal-chars then (if* (and (eq :colon kind) (eq c #\:)) @@ -615,7 +644,13 @@ URI ~s contains illegal character ~s at position ~d." (:colon (failure)) (:question (failure)) (:hash (failure)) - (:slash (failure)) + (:slash + (if* (and (equalp "file" scheme) + (null host)) + then ;; file:///... + (push "/" path-components) + (setq state 6) + else (failure))) (:string (setq host tokval) (setq state 11)) (:end (failure)))) @@ -685,7 +720,7 @@ URI ~s contains illegal character ~s at position ~d." (return (values scheme host port - (apply #'concatenate 'simple-string (nreverse path-components)) + (apply #'concatenate 'string (nreverse path-components)) query fragment))) ;; URN parsing: (15 ;; seen urn:, read nid now @@ -714,7 +749,7 @@ URI ~s contains illegal character ~s at position ~d." (max (the fixnum (length string)))) ((= i max) nil) (declare (fixnum i max)) - (when (char= #\% (schar string i)) + (when (char= #\% (char string i)) (return t)))) (defun parse-path (path-string escape) @@ -728,19 +763,23 @@ URI ~s contains illegal character ~s at position ~d." (pl (cdr path-list) (cdr pl)) segments) ((null pl) path-list) - (if* (cdr (setq segments (delimited-string-to-list (car pl) #\;))) + + (if* (cdr (setq segments + (if* (string= "" (car pl)) + then '("") + else (delimited-string-to-list (car pl) #\;)))) then ;; there is a param -;;; (setf (car pl) segments) (setf (car pl) (mapcar #'(lambda (s) - (decode-escaped-encoding - s escape *reserved-path-characters2*)) - segments)) + (decode-escaped-encoding s escape + ;; decode all %xx: + nil)) + segments)) else ;; no param -;;; (setf (car pl) (car segments)) (setf (car pl) - (decode-escaped-encoding - (car segments) escape *reserved-path-characters2*))))) + (decode-escaped-encoding (car segments) escape + ;; decode all %xx: + nil))))) (defun decode-escaped-encoding (string escape &optional (reserved-chars @@ -753,33 +792,28 @@ URI ~s contains illegal character ~s at position ~d." (new-i 0 (1+ new-i)) ch ch2 chc chc2) ((= i max) - #+allegro - (excl::.primcall 'sys::shrink-svector new-string new-i) - #+sbcl - (sb-kernel:shrink-vector new-string new-i) - #-(or allegro sbcl) - (subseq new-string 0 new-i) - new-string) - (if* (char= #\% (setq ch (schar string i))) + (shrink-vector new-string new-i)) + (if* (char= #\% (setq ch (char string i))) then (when (> (+ i 3) max) (.parse-error "Unsyntactic escaped encoding in ~s." string)) - (setq ch (schar string (incf i))) - (setq ch2 (schar string (incf i))) + (setq ch (char string (incf i))) + (setq ch2 (char string (incf i))) (when (not (and (setq chc (digit-char-p ch 16)) (setq chc2 (digit-char-p ch2 16)))) (.parse-error "Non-hexidecimal digits after %: %c%c." ch ch2)) (let ((ci (+ (* 16 chc) chc2))) (if* (or (null reserved-chars) + (> ci 127) ; bug11527 (= 0 (sbit reserved-chars ci))) then ;; ok as is - (setf (schar new-string new-i) + (setf (char new-string new-i) (code-char ci)) - else (setf (schar new-string new-i) #\%) - (setf (schar new-string (incf new-i)) ch) - (setf (schar new-string (incf new-i)) ch2))) - else (setf (schar new-string new-i) ch)))) + else (setf (char new-string new-i) #\%) + (setf (char new-string (incf new-i)) ch) + (setf (char new-string (incf new-i)) ch2))) + else (setf (char new-string new-i) ch)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Printing @@ -795,26 +829,22 @@ URI ~s contains illegal character ~s at position ~d." (path (uri-path uri)) (query (uri-query uri)) (fragment (uri-fragment uri))) - (concatenate 'simple-string + (concatenate 'string (when scheme (encode-escaped-encoding (string-downcase ;; for upper case lisps (symbol-name scheme)) *reserved-characters* escape)) (when scheme ":") - (when host "//") + (when (or host (eq :file scheme)) "//") (when host (encode-escaped-encoding host *reserved-authority-characters* escape)) (when port ":") (when port -;;;; too slow until ACL 6.0: -;;; (format nil "~d" port) -;;; (princ-to-string port) - #-allegro (princ-to-string port) - #+allegro - (with-output-to-string (s) - (excl::maybe-print-fast s port)) + #-allegro (format nil "~D" port) + #+allegro (with-output-to-string (s) + (excl::maybe-print-fast s port)) ) (when path (encode-escaped-encoding path @@ -835,7 +865,7 @@ URI ~s contains illegal character ~s at position ~d." (pl (cdr path-list) (cdr pl)) (pe (car pl) (car pl))) ((null pl) - (when res (apply #'concatenate 'simple-string (nreverse res)))) + (when res (apply #'concatenate 'string (nreverse res)))) (when (or (null first) (prog1 (eq :absolute first) (setq first nil))) @@ -860,7 +890,7 @@ URI ~s contains illegal character ~s at position ~d." (setf (uri-string urn) (let ((nid (urn-nid urn)) (nss (urn-nss urn))) - (concatenate 'simple-string "urn:" nid ":" nss)))) + (concatenate 'string "urn:" nid ":" nss)))) (if* stream then (format stream "~a" (uri-string urn)) else (uri-string urn))) @@ -879,25 +909,19 @@ URI ~s contains illegal character ~s at position ~d." (new-i -1) c ci) ((= i max) - #+allegro - (excl::.primcall 'sys::shrink-svector new-string (incf new-i)) - #+sbcl - (sb-kernel:shrink-vector new-string (incf new-i)) - #-(or allegro sbcl) - (subseq new-string 0 (incf new-i)) - new-string) - (setq ci (char-int (setq c (schar string i)))) + (shrink-vector new-string (incf new-i))) + (setq ci (char-int (setq c (char string i)))) (if* (or (null reserved-chars) (> ci 127) (= 0 (sbit reserved-chars ci))) then ;; ok as is (incf new-i) - (setf (schar new-string new-i) c) + (setf (char new-string new-i) c) else ;; need to escape it (multiple-value-bind (q r) (truncate ci 16) - (setf (schar new-string (incf new-i)) #\%) - (setf (schar new-string (incf new-i)) (elt *escaped-encoding* q)) - (setf (schar new-string (incf new-i)) + (setf (char new-string (incf new-i)) #\%) + (setf (char new-string (incf new-i)) (elt *escaped-encoding* q)) + (setf (char new-string (incf new-i)) (elt *escaped-encoding* r)))))) (defmethod print-object ((uri uri) stream) @@ -922,12 +946,10 @@ URI ~s contains illegal character ~s at position ~d." (defmethod merge-uris ((uri string) (base uri) &optional place) (merge-uris (parse-uri uri) base place)) + (defmethod merge-uris ((uri uri) (base uri) &optional place) - ;; The following is from - ;; http://info.internet.isi.edu/in-notes/rfc/files/rfc2396.txt - ;; and is algorithm we use to merge URIs. - ;; - ;; For more information, see section 5.2 of the RFC. + ;; See ../doc/rfc2396.txt for info on the algorithm we use to merge + ;; URIs. ;; (tagbody ;;;; step 2 @@ -943,7 +965,7 @@ URI ~s contains illegal character ~s at position ~d." (when (uri-fragment uri) (setf (uri-fragment new) (uri-fragment uri))) new))) - + (setq uri (copy-uri uri :place place)) ;;;; step 3 @@ -958,6 +980,18 @@ URI ~s contains illegal character ~s at position ~d." ;;;; step 5 (let ((p (uri-parsed-path uri))) + + ;; bug13133: + ;; The following form causes our implementation to be at odds with + ;; RFC 2396, however this is apparently what was intended by the + ;; authors of the RFC. Specifically, (merge-uris "?y" "/foo") + ;; should return # instead of #, according to + ;; this: +;;; http://www.apache.org/~fielding/uri/rev-2002/issues.html#003-relative-query + (when (null p) + (setf (uri-path uri) (uri-path base)) + (go :done)) + (when (and p (eq :absolute (car p))) (when (equal '(:absolute "") p) ;; Canonicalize the way parsing does: @@ -1181,6 +1215,7 @@ URI ~s contains illegal character ~s at position ~d." ;; bootstrapping (uri= changed from function to method): (when (fboundp 'uri=) (fmakunbound 'uri=)) +(defgeneric uri= (uri1 uri2)) (defmethod uri= ((uri1 uri) (uri2 uri)) (when (not (eq (uri-scheme uri1) (uri-scheme uri2))) (return-from uri= nil)) @@ -1217,8 +1252,8 @@ URI ~s contains illegal character ~s at position ~d." (state :char) c1 c2) ((= i len) t) - (setq c1 (schar nss1 i)) - (setq c2 (schar nss2 i)) + (setq c1 (char nss1 i)) + (setq c2 (char nss2 i)) (ecase state (:char (if* (and (char= #\% c1) (char= #\% c2)) @@ -1257,8 +1292,7 @@ Executes the forms once for each uri with var bound to the current uri" (let ((f (gensym)) (g-ignore (gensym)) (g-uri-space (gensym)) - (body #+allegro (third (excl::parse-body forms env)) - #-allegro forms)) + (body (third (parse-body forms env)))) `(let ((,g-uri-space (or ,uri-space *uris*))) (prog nil (flet ((,f (,var &optional ,g-ignore) @@ -1280,6 +1314,7 @@ Executes the forms once for each uri with var bound to the current uri" stream "#u takes a string or list argument: ~s" arg))))) + #+allegro excl:: #+allegro @@ -1297,7 +1332,17 @@ excl:: ;; timings ;; (don't run under emacs with M-x fi:common-lisp) -#+ignore +#+allegro +(eval-when (:compile-toplevel :load-toplevel :execute) + (import 'excl::gc)) + +#-allegro +(defun gc (&rest options) + (declare (ignore options)) + #+sbcl (sb-ext::gc) + #+cmu (ext::gc) + ) + (defun time-uri-module () (declare (optimize (speed 3) (safety 0) (debug 0))) (let ((uri "http://www.franz.com/a/b;x;y;z/c/foo?bar=baz&xxx#foo")