;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: strings.lisp,v 1.37 2003/06/12 02:38:39 kevin Exp $
+;;;; $Id: strings.lisp,v 1.50 2003/07/21 00:52:56 kevin Exp $
;;;;
;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
(null (find-if #'not-whitespace? str))))
(defun replaced-string-length (str repl-alist)
- (declare (simple-string str))
- (let* ((orig-len (length str))
- (new-len orig-len))
- (declare (fixnum orig-len new-len))
- (dotimes (i orig-len)
- (declare (fixnum i))
+ (declare (simple-string str)
+ (optimize (speed 3) (safety 0) (space 0)))
+ (do* ((i 0 (1+ i))
+ (orig-len (length str))
+ (new-len orig-len))
+ ((= i orig-len) new-len)
+ (declare (fixnum i orig-len new-len))
(let* ((c (char str i))
(match (assoc c repl-alist :test #'char=)))
(declare (character c))
(when match
- (incf new-len (1- (length (cdr match)))))))
- new-len))
+ (incf new-len (1- (length
+ (the simple-string (cdr match)))))))))
(defun substitute-chars-strings (str repl-alist)
"Replace all instances of a chars with a string. repl-alist is an assoc
list of characters and replacement strings."
- (declare (simple-string str))
+ (declare (simple-string str)
+ (optimize (speed 3) (safety 0) (space 0)))
(do* ((orig-len (length str))
(new-string (make-string (replaced-string-length str repl-alist)))
(spos 0 (1+ spos))
(do* ((zero-code (char-code #\0))
(result (make-string len :initial-element #\0))
(minus? (minusp num))
- (val (if minus? (- 0 num) num) (floor (/ val 10)))
+ (val (if minus? (- num) num)
+ (nth-value 0 (floor val 10)))
(pos (1- len) (1- pos))
(mod (mod val 10) (mod val 10)))
((or (zerop val) (minusp pos))
"Outputs a string of LEN digit with an optional initial character PCHAR.
Leading zeros are present."
(declare (optimize (speed 3) (safety 0) (space 0))
- (type fixnum len) (type integer num))
+ (type fixnum len)
+ (type integer num))
(do* ((zero-code (char-code #\0))
(result (make-string len :initial-element #\0))
(minus? (minusp num))
- (val (if minus? (- 0 num) num) (floor (/ val 10)))
+ (val (if minus? (- 0 num) num)
+ (nth-value 0 (floor val 10)))
(pos (1- len) (1- pos))
(mod (mod val 10) (mod val 10)))
((or (zerop val) (minusp pos))
(unless (char= (schar str (+ i pos)) (schar substr i))
(return nil)))))
+(defun string-delimited-string-to-list (str substr)
+ "splits a string delimited by substr into a list of strings"
+ (declare (simple-string str substr)
+ (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)
+ (debug 0)))
+ (do* ((substr-len (length substr))
+ (strlen (length str))
+ (output '())
+ (pos 0)
+ (end (fast-string-search substr str substr-len pos strlen)
+ (fast-string-search substr str substr-len pos strlen)))
+ ((null end)
+ (when (< pos strlen)
+ (push (subseq str pos) output))
+ (nreverse output))
+ (declare (fixnum strlen substr-len pos)
+ (type (or fixnum null) end))
+ (push (subseq str pos end) output)
+ (setq pos (+ end substr-len))))
+
(defun string-to-list-skip-delimiter (str &optional (delim #\space))
"Return a list of strings, delimited by spaces, skipping spaces."
(declare (simple-string str)
(when (and i (< i end))
(push (subseq str i end) results))
(nreverse results))
- (declare (fixnum i j end))
+ (declare (fixnum end)
+ (type (or fixnum null) i j))
(push (subseq str i j) results)))
(defun string-starts-with (start str)
(and (>= (length str) (length start))
(string-equal start str :end2 (length start))))
+
+(defun count-string-char (s c)
+ "Return a count of the number of times a character appears in a string"
+ (declare (simple-string s)
+ (character c)
+ (optimize (speed 3) (safety 0)))
+ (do ((len (length s))
+ (i 0 (1+ i))
+ (count 0))
+ ((= i len) count)
+ (declare (fixnum i len count))
+ (when (char= (schar s i) c)
+ (incf count))))
+
+(defun count-string-char-if (pred s)
+ "Return a count of the number of times a predicate is true
+for characters in a string"
+ (declare (simple-string s)
+ (type (or function symbol) pred)
+ (optimize (speed 3) (safety 0) (space 0)))
+ (do ((len (length s))
+ (i 0 (1+ i))
+ (count 0))
+ ((= i len) count)
+ (declare (fixnum i len count))
+ (when (funcall pred (schar s i))
+ (incf count))))
+
+
+;;; URL Encoding
+
+(defun non-alphanumericp (ch)
+ (not (alphanumericp ch)))
+
+(defvar +hex-chars+ "0123456789ABCDEF")
+(declaim (type simple-string +hex-chars+))
+
+(defun hexchar (n)
+ (declare (type (integer 0 15) n))
+ (schar +hex-chars+ n))
+
+(defconstant +char-code-lower-a+ (char-code #\a))
+(defconstant +char-code-upper-a+ (char-code #\A))
+(defconstant +char-code-0+ (char-code #\0))
+(declaim (type fixnum +char-code-0+ +char-code-upper-a+
+ +char-code-0))
+
+(defun charhex (ch)
+ "convert hex character to decimal"
+ (let ((code (char-code (char-upcase ch))))
+ (declare (fixnum ch))
+ (if (>= code +char-code-upper-a+)
+ (+ 10 (- code +char-code-upper-a+))
+ (- code +char-code-0+))))
+
+(defun escape-uri-field (query)
+ "Escape non-alphanumeric characters for URI fields"
+ (declare (simple-string query)
+ (optimize (speed 3) (safety 0) (space 0)))
+ (do* ((count (count-string-char-if #'non-alphanumericp query))
+ (len (length query))
+ (new-len (+ len (* 2 count)))
+ (str (make-string new-len))
+ (spos 0 (1+ spos))
+ (dpos 0 (1+ dpos)))
+ ((= spos len) str)
+ (declare (fixnum count len new-len spos dpos)
+ (simple-string str))
+ (let ((ch (schar query spos)))
+ (if (non-alphanumericp ch)
+ (let ((c (char-code ch)))
+ (setf (schar str dpos) #\%)
+ (incf dpos)
+ (setf (schar str dpos) (hexchar (logand (ash c -4) 15)))
+ (incf dpos)
+ (setf (schar str dpos) (hexchar (logand c 15))))
+ (setf (schar str dpos) ch)))))
+
+(defun unescape-uri-field (query)
+ "Unescape non-alphanumeric characters for URI fields"
+ (declare (simple-string query)
+ (optimize (speed 3) (safety 0) (space 0)))
+ (do* ((count (count-string-char query #\%))
+ (len (length query))
+ (new-len (- len (* 2 count)))
+ (str (make-string new-len))
+ (spos 0 (1+ spos))
+ (dpos 0 (1+ dpos)))
+ ((= spos len) str)
+ (declare (fixnum count len new-len spos dpos)
+ (simple-string str))
+ (let ((ch (schar query spos)))
+ (if (char= #\% ch)
+ (let ((c1 (charhex (schar query (1+ spos))))
+ (c2 (charhex (schar query (+ spos 2)))))
+ (declare (fixnum c1 c2))
+ (setf (schar str dpos)
+ (code-char (logior c2 (ash c1 4))))
+ (incf spos 2))
+ (setf (schar str dpos) ch)))))
+
+
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defvar +unambigous-charset+
+ "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ")
+ (defconstant +unambigous-length+ (length +unambigous-charset+)))
+
+(defun random-char (&optional (set :lower-alpha))
+ (ecase set
+ (:lower-alpha
+ (code-char (+ +char-code-lower-a+ (random 26))))
+ (:lower-alphanumeric
+ (let ((n (random 36)))
+ (if (>= n 26)
+ (code-char (+ +char-code-0+ (- n 26)))
+ (code-char (+ +char-code-lower-a+ n)))))
+ (:upper-alpha
+ (code-char (+ +char-code-upper-a+ (random 26))))
+ (:unambigous
+ (schar +unambigous-charset+ (random +unambigous-length+)))
+ (:upper-lower-alpha
+ (let ((n (random 52)))
+ (if (>= n 26)
+ (code-char (+ +char-code-upper-a+ (- n 26)))
+ (code-char (+ +char-code-lower-a+ n)))))))
+
+
+(defun random-string (&key (length 10) (set :lower-alpha))
+ "Returns a random lower-case string."
+ (declare (optimize (speed 3)))
+ (let ((s (make-string length)))
+ (declare (simple-string s))
+ (dotimes (i length s)
+ (setf (schar s i) (random-char set)))))
+
+
+(defun first-char (s)
+ (declare (simple-string s))
+ (when (and (stringp s) (plusp (length s)))
+ (schar s 0)))
+
+(defun last-char (s)
+ (declare (simple-string s))
+ (when (stringp s)
+ (let ((len (length s)))
+ (when (plusp len))
+ (schar s (1- len)))))
+
+(defun ensure-string (v)
+ (typecase v
+ (string v)
+ (character (string v))
+ (symbol (symbol-name v))
+ (otherwise (write-to-string v))))
+
+(defun string-right-trim-one-char (char str)
+ (declare (simple-string str))
+ (let* ((len (length str))
+ (last (1- len)))
+ (declare (fixnum len last))
+ (if (char= char (schar str last))
+ (subseq str 0 last)
+ str)))
+
+
+(defun string-strip-ending (str endings)
+ (if (stringp endings)
+ (setq endings (list endings)))
+ (let ((len (length str)))
+ (dolist (ending endings str)
+ (when (and (>= len (length ending))
+ (string-equal ending
+ (subseq str (- len
+ (length ending)))))
+ (return-from string-strip-ending
+ (subseq str 0 (- len (length ending))))))))
+
+
+(defun string-maybe-shorten (str maxlen)
+ (let ((len (length str)))
+ (if (<= len maxlen)
+ str
+ (concatenate 'string (subseq str 0 (- maxlen 3)) "..."))))
+
+
+(defun shrink-vector (str size)
+ #+allegro
+ (excl::.primcall 'sys::shrink-svector str size)
+ #+cmu
+ (lisp::shrink-vector str size)
+ #+lispworks
+ (system::shrink-vector$vector str size)
+ #+sbcl
+ (sb-kernel:shrink-vector str size)
+ #+scl
+ (common-lisp::shrink-vector str size)
+ #-(or allegro cmu lispworks sbcl scl)
+ (setq str (subseq str 0 size))
+ str)
+
+(defun lex-string (string &key (whitespace '(#\space #\newline)))
+ "Separates a string at whitespace and returns a list of strings"
+ (flet ((whitespace? (char) (member char whitespace :test #'char=)))
+ (let ((tokens nil))
+ (do* ((token-start
+ (position-if-not #'whitespace? string)
+ (when token-end
+ (position-if-not #'whitespace? string :start (1+ token-end))))
+ (token-end
+ (when token-start
+ (position-if #'whitespace? string :start token-start))
+ (when token-start
+ (position-if #'whitespace? string :start token-start))))
+ ((null token-start) (nreverse tokens))
+ (push (subseq string token-start token-end) tokens)))))
+
+(defun split-alphanumeric-string (string)
+ "Separates a string at any non-alphanumeric chararacter"
+ (flet ((whitespace? (char) (non-alphanumericp char)))
+ (let ((tokens nil))
+ (do* ((token-start
+ (position-if-not #'whitespace? string)
+ (when token-end
+ (position-if-not #'whitespace? string :start (1+ token-end))))
+ (token-end
+ (when token-start
+ (position-if #'whitespace? string :start token-start))
+ (when token-start
+ (position-if #'whitespace? string :start token-start))))
+ ((null token-start) (nreverse tokens))
+ (push (subseq string token-start token-end) tokens)))))
+
+