X-Git-Url: http://git.kpe.io/?p=kmrcl.git;a=blobdiff_plain;f=strings.lisp;h=fbb130caad55b5d4442653f43d1d57290354bd4a;hp=cadd1acc29c6891b8a36e269201b0957f947322d;hb=12e4f3260043dbc89dc8efd78cb9f116e0173054;hpb=bf3d36016fc385a78d51d588dbb918599cfbc99a diff --git a/strings.lisp b/strings.lisp index cadd1ac..fbb130c 100644 --- a/strings.lisp +++ b/strings.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: strings.lisp,v 1.47 2003/07/09 19:19:19 kevin Exp $ +;;;; $Id$ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -150,22 +150,32 @@ (setq hash (+ hash (char-code (char str i))))) (logand hash bitmask))) -(defun string-not-null? (str) - (and str (not (zerop (length str))))) - -(defun whitespace? (c) - (declare (character c)) - (locally (declare (optimize (speed 3) (safety 0))) - (or (char= c #\Space) (char= c #\Tab) (char= c #\Return) - (char= c #\Linefeed)))) +(defun is-string-empty (str) + (zerop (length str))) + +(defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed + #+allegro #\%space + #+lispworks #\No-Break-Space)) -(defun not-whitespace? (c) - (not (whitespace? c))) +(defun is-char-whitespace (c) + (declare (character c) (optimize (speed 3) (safety 0))) + (or (char= c #\Space) (char= c #\Tab) (char= c #\Return) + (char= c #\Linefeed) + #+allegro (char= c #\%space) + #+lispworks (char= c #\No-Break-Space))) -(defun string-ws? (str) +(defun is-string-whitespace (str) "Return t if string is all whitespace" - (when (stringp str) - (null (find-if #'not-whitespace? str)))) + (every #'is-char-whitespace str)) + +(defun string-right-trim-whitespace (str) + (string-right-trim *whitespace-chars* str)) + +(defun string-left-trim-whitespace (str) + (string-left-trim *whitespace-chars* str)) + +(defun string-trim-whitespace (str) + (string-trim *whitespace-chars* str)) (defun replaced-string-length (str repl-alist) (declare (simple-string str) @@ -217,17 +227,20 @@ list of characters and replacement strings." (defun make-usb8-array (len) (make-array len :element-type '(unsigned-byte 8))) -(defun usb8-array-to-string (vec) - (declare (type (simple-array (unsigned-byte 8) (*)) vec)) - (let* ((len (length vec)) +(defun usb8-array-to-string (vec &key (start 0) end) + (declare (type (simple-array (unsigned-byte 8) (*)) vec) + (fixnum start)) + (unless end + (setq end (length vec))) + (let* ((len (- end start)) (str (make-string len))) (declare (fixnum len) (simple-string str) - (optimize (speed 3))) + (optimize (speed 3) (safety 0))) (do ((i 0 (1+ i))) ((= i len) str) (declare (fixnum i)) - (setf (schar str i) (code-char (aref vec i)))))) + (setf (schar str i) (code-char (aref vec (the fixnum (+ i start)))))))) (defun string-to-usb8-array (str) (declare (simple-string str)) @@ -403,9 +416,11 @@ for characters in a string" (declare (type (integer 0 15) n)) (schar +hex-chars+ n)) -(defconstant +char-code-0+ (char-code #\0)) -(defconstant +char-code-upper-a+ (char-code #\A)) -(declaim (type fixnum +char-code-0+ +char-code-upper-a+)) +(defconstant* +char-code-lower-a+ (char-code #\a)) +(defconstant* +char-code-upper-a+ (char-code #\A)) +(defconstant* +char-code-0+ (char-code #\0)) +(declaim (type fixnum +char-code-0+ +char-code-upper-a+ + +char-code-0)) (defun charhex (ch) "convert hex character to decimal" @@ -415,7 +430,7 @@ for characters in a string" (+ 10 (- code +char-code-upper-a+)) (- code +char-code-0+)))) -(defun escape-uri-field (query) +(defun encode-uri-string (query) "Escape non-alphanumeric characters for URI fields" (declare (simple-string query) (optimize (speed 3) (safety 0) (space 0))) @@ -438,7 +453,7 @@ for characters in a string" (setf (schar str dpos) (hexchar (logand c 15)))) (setf (schar str dpos) ch))))) -(defun unescape-uri-field (query) +(defun decode-uri-string (query) "Unescape non-alphanumeric characters for URI fields" (declare (simple-string query) (optimize (speed 3) (safety 0) (space 0))) @@ -462,16 +477,39 @@ for characters in a string" (setf (schar str dpos) ch))))) -(defconstant +char-code-a+ (char-code #\a)) -(defun random-string (&optional (len 10)) +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar +unambiguous-charset+ + "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ") + (defconstant* +unambiguous-length+ (length +unambiguous-charset+))) + +(defun random-char (&optional (set :lower-alpha)) + (ecase set + (:lower-alpha + (code-char (+ +char-code-lower-a+ (random 26)))) + (:lower-alphanumeric + (let ((n (random 36))) + (if (>= n 26) + (code-char (+ +char-code-0+ (- n 26))) + (code-char (+ +char-code-lower-a+ n))))) + (:upper-alpha + (code-char (+ +char-code-upper-a+ (random 26)))) + (:unambiguous + (schar +unambiguous-charset+ (random +unambiguous-length+))) + (:upper-lower-alpha + (let ((n (random 52))) + (if (>= n 26) + (code-char (+ +char-code-upper-a+ (- n 26))) + (code-char (+ +char-code-lower-a+ n))))))) + + +(defun random-string (&key (length 10) (set :lower-alpha)) "Returns a random lower-case string." (declare (optimize (speed 3))) - (let ((s (make-string len))) + (let ((s (make-string length))) (declare (simple-string s)) - (dotimes (i len s) - (setf (schar s i) - (code-char (+ +char-code-a+ (random 26))))))) + (dotimes (i length s) + (setf (schar s i) (random-char set))))) (defun first-char (s) @@ -503,3 +541,140 @@ for characters in a string" str))) +(defun string-strip-ending (str endings) + (if (stringp endings) + (setq endings (list endings))) + (let ((len (length str))) + (dolist (ending endings str) + (when (and (>= len (length ending)) + (string-equal ending + (subseq str (- len + (length ending))))) + (return-from string-strip-ending + (subseq str 0 (- len (length ending)))))))) + + +(defun string-maybe-shorten (str maxlen) + (string-elide str maxlen :end)) + +(defun string-elide (str maxlen position) + (declare (fixnum maxlen)) + (let ((len (length str))) + (declare (fixnum len)) + (cond + ((<= len maxlen) + str) + ((<= maxlen 3) + "...") + ((eq position :middle) + (multiple-value-bind (mid remain) (truncate maxlen 2) + (let ((end1 (- mid 1)) + (start2 (- len (- mid 2) remain))) + (concatenate 'string (subseq str 0 end1) "..." (subseq str start2))))) + ((or (eq position :end) t) + (concatenate 'string (subseq str 0 (- maxlen 3)) "..."))))) + +(defun shrink-vector (str size) + #+allegro + (excl::.primcall 'sys::shrink-svector str size) + #+cmu + (lisp::shrink-vector str size) + #+lispworks + (system::shrink-vector$vector str size) + #+sbcl + (sb-kernel:shrink-vector str size) + #+scl + (common-lisp::shrink-vector str size) + #-(or allegro cmu lispworks sbcl scl) + (setq str (subseq str 0 size)) + str) + +(defun lex-string (string &key (whitespace '(#\space #\newline))) + "Separates a string at whitespace and returns a list of strings" + (flet ((is-sep (char) (member char whitespace :test #'char=))) + (let ((tokens nil)) + (do* ((token-start + (position-if-not #'is-sep string) + (when token-end + (position-if-not #'is-sep string :start (1+ token-end)))) + (token-end + (when token-start + (position-if #'is-sep string :start token-start)) + (when token-start + (position-if #'is-sep string :start token-start)))) + ((null token-start) (nreverse tokens)) + (push (subseq string token-start token-end) tokens))))) + +(defun split-alphanumeric-string (string) + "Separates a string at any non-alphanumeric chararacter" + (declare (simple-string string) + (optimize (speed 3) (safety 0))) + (flet ((is-sep (char) + (declare (character char)) + (and (non-alphanumericp char) + (not (char= #\_ char))))) + (let ((tokens nil)) + (do* ((token-start + (position-if-not #'is-sep string) + (when token-end + (position-if-not #'is-sep string :start (1+ token-end)))) + (token-end + (when token-start + (position-if #'is-sep string :start token-start)) + (when token-start + (position-if #'is-sep string :start token-start)))) + ((null token-start) (nreverse tokens)) + (push (subseq string token-start token-end) tokens))))) + + +(defun trim-non-alphanumeric (word) + "Strip non-alphanumeric characters from beginning and end of a word." + (declare (simple-string word) + (optimize (speed 3) (safety 0) (space 0))) + (let* ((start 0) + (len (length word)) + (end len)) + (declare (fixnum start end len)) + (do ((done nil)) + ((or done (= start end))) + (if (alphanumericp (schar word start)) + (setq done t) + (incf start))) + (when (> end start) + (do ((done nil)) + ((or done (= start end))) + (if (alphanumericp (schar word (1- end))) + (setq done t) + (decf end)))) + (if (or (plusp start) (/= len end)) + (subseq word start end) + word))) + + +(defun collapse-whitespace (s) + "Convert multiple whitespace characters to a single space character." + (declare (simple-string s) + (optimize (speed 3) (safety 0))) + (with-output-to-string (stream) + (do ((pos 0 (1+ pos)) + (in-white nil) + (len (length s))) + ((= pos len)) + (declare (fixnum pos len)) + (let ((c (schar s pos))) + (declare (character c)) + (cond + ((kl:is-char-whitespace c) + (unless in-white + (write-char #\space stream)) + (setq in-white t)) + (t + (setq in-white nil) + (write-char c stream))))))) + +(defun string->list (string) + (let ((eof (list nil))) + (with-input-from-string (stream string) + (do ((x (read stream nil eof) (read stream nil eof)) + (l nil (cons x l))) + ((eq x eof) (nreverse l))))))