X-Git-Url: http://git.kpe.io/?p=kmrcl.git;a=blobdiff_plain;f=strings.lisp;h=166c5fa28bec6e30ff68dd0083f8ab583f6bc8ec;hp=fd2f37ca071e8dc6a7f8f7b1a849cbf849d19738;hb=c806ada1a12795e377d2ad588e56e051c7a92598;hpb=7367c68a5daa2ef45c7adf1f4097596f84f5e4dd diff --git a/strings.lisp b/strings.lisp index fd2f37c..166c5fa 100644 --- a/strings.lisp +++ b/strings.lisp @@ -9,7 +9,7 @@ ;;;; ;;;; $Id$ ;;;; -;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg +;;;; This file, part of KMRCL, is Copyright (c) 2002-2006 by Kevin M. Rosenberg ;;;; ;;;; KMRCL users are granted the rights to distribute and use this software ;;;; as governed by the terms of the Lisp Lesser GNU Public License @@ -61,7 +61,7 @@ (declare (fixnum i)) (when (char/= char (schar string i)) (return i)))) -(defun delimited-string-to-list (string &optional (separator #\space) +(defun delimited-string-to-list (string &optional (separator #\space) skip-terminal) "split a string with delimiter" (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)) @@ -106,7 +106,7 @@ (defun escape-backslashes (s) (substitute-string-for-char s #\\ "\\\\")) -(defun substitute-string-for-char (procstr match-char subst-str) +(defun substitute-string-for-char (procstr match-char subst-str) "Substitutes a string for a single matching character of a string" (substitute-chars-strings procstr (list (cons match-char subst-str)))) @@ -153,12 +153,16 @@ (defun is-string-empty (str) (zerop (length str))) -(defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed)) +(defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed + #+allegro #\%space + #+lispworks #\No-Break-Space)) -(defun is-char-whitespace (c) +(defun is-char-whitespace (c) (declare (character c) (optimize (speed 3) (safety 0))) (or (char= c #\Space) (char= c #\Tab) (char= c #\Return) - (char= c #\Linefeed))) + (char= c #\Linefeed) + #+allegro (char= c #\%space) + #+lispworks (char= c #\No-Break-Space))) (defun is-string-whitespace (str) "Return t if string is all whitespace" @@ -223,17 +227,20 @@ list of characters and replacement strings." (defun make-usb8-array (len) (make-array len :element-type '(unsigned-byte 8))) -(defun usb8-array-to-string (vec) - (declare (type (simple-array (unsigned-byte 8) (*)) vec)) - (let* ((len (length vec)) +(defun usb8-array-to-string (vec &key (start 0) end) + (declare (type (simple-array (unsigned-byte 8) (*)) vec) + (fixnum start)) + (unless end + (setq end (length vec))) + (let* ((len (- end start)) (str (make-string len))) (declare (fixnum len) (simple-string str) - (optimize (speed 3))) + (optimize (speed 3) (safety 0))) (do ((i 0 (1+ i))) ((= i len) str) (declare (fixnum i)) - (setf (schar str i) (code-char (aref vec i)))))) + (setf (schar str i) (code-char (aref vec (the fixnum (+ i start)))))))) (defun string-to-usb8-array (str) (declare (simple-string str)) @@ -270,27 +277,40 @@ list of characters and replacement strings." (unless (and last-elem last-list) (write-string separator strm))))) -(defun prefixed-fixnum-string (num pchar len) - "Outputs a string of LEN digit with an optional initial character PCHAR. -Leading zeros are present." - (declare (optimize (speed 3) (safety 0) (space 0)) - (type fixnum num len)) - (when pchar - (incf len)) - (do* ((zero-code (char-code #\0)) - (result (make-string len :initial-element #\0)) - (minus? (minusp num)) - (val (if minus? (- num) num) - (nth-value 0 (floor val 10))) - (pos (1- len) (1- pos)) - (mod (mod val 10) (mod val 10))) - ((or (zerop val) (minusp pos)) +(eval-when (:compile-toplevel :load-toplevel :execute) + (defmacro def-prefixed-number-string (fn-name type &optional doc) + `(defun ,fn-name (num pchar len) + ,@(when (stringp doc) (list doc)) + (declare (optimize (speed 3) (safety 0) (space 0)) + (fixnum len) + (,type num)) (when pchar - (setf (schar result 0) pchar)) - (when minus? (setf (schar result (if pchar 1 0)) #\-)) - result) - (declare (fixnum val mod zero-code pos) (simple-string result)) - (setf (schar result pos) (code-char (+ zero-code mod))))) + (incf len)) + (do* ((zero-code (char-code #\0)) + (result (make-string len :initial-element #\0)) + (minus? (minusp num)) + (val (if minus? (- num) num) + (nth-value 0 (floor val 10))) + (pos (1- len) (1- pos)) + (mod (mod val 10) (mod val 10))) + ((or (zerop val) (minusp pos)) + (when pchar + (setf (schar result 0) pchar)) + (when minus? (setf (schar result (if pchar 1 0)) #\-)) + result) + (declare (,type val) + (fixnum mod zero-code pos) + (boolean minus?) + (simple-string result)) + (setf (schar result pos) (code-char (the fixnum (+ zero-code mod)))))))) + +(def-prefixed-number-string prefixed-fixnum-string fixnum + "Outputs a string of LEN digit with an optional initial character PCHAR. +Leading zeros are present. LEN must be a fixnum.") + +(def-prefixed-number-string prefixed-integer-string integer + "Outputs a string of LEN digit with an optional initial character PCHAR. +Leading zeros are present. LEN must be an integer.") (defun integer-string (num len) "Outputs a string of LEN digit with an optional initial character PCHAR. @@ -346,7 +366,7 @@ Leading zeros are present." (type (or fixnum null) end)) (push (subseq str pos end) output) (setq pos (+ end substr-len)))) - + (defun string-to-list-skip-delimiter (str &optional (delim #\space)) "Return a list of strings, delimited by spaces, skipping spaces." (declare (simple-string str) @@ -409,21 +429,27 @@ for characters in a string" (declare (type (integer 0 15) n)) (schar +hex-chars+ n)) -(defconstant +char-code-lower-a+ (char-code #\a)) -(defconstant +char-code-upper-a+ (char-code #\A)) -(defconstant +char-code-0+ (char-code #\0)) +(defconstant* +char-code-lower-a+ (char-code #\a)) +(defconstant* +char-code-upper-a+ (char-code #\A)) +(defconstant* +char-code-0+ (char-code #\0)) (declaim (type fixnum +char-code-0+ +char-code-upper-a+ +char-code-0)) (defun charhex (ch) "convert hex character to decimal" (let ((code (char-code (char-upcase ch)))) - (declare (fixnum ch)) + (declare (fixnum ch)) (if (>= code +char-code-upper-a+) (+ 10 (- code +char-code-upper-a+)) (- code +char-code-0+)))) -(defun uriencode-string (query) +(defun binary-sequence-to-hex-string (seq) + (let ((list (etypecase seq + (list seq) + (sequence (map 'list #'identity seq))))) + (string-downcase (format nil "~{~2,'0X~}" list)))) + +(defun encode-uri-string (query) "Escape non-alphanumeric characters for URI fields" (declare (simple-string query) (optimize (speed 3) (safety 0) (space 0))) @@ -446,7 +472,7 @@ for characters in a string" (setf (schar str dpos) (hexchar (logand c 15)))) (setf (schar str dpos) ch))))) -(defun uridecode-string (query) +(defun decode-uri-string (query) "Unescape non-alphanumeric characters for URI fields" (declare (simple-string query) (optimize (speed 3) (safety 0) (space 0))) @@ -472,9 +498,9 @@ for characters in a string" (eval-when (:compile-toplevel :load-toplevel :execute) - (defvar +unambigous-charset+ + (defvar +unambiguous-charset+ "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ") - (defconstant +unambigous-length+ (length +unambigous-charset+))) + (defconstant* +unambiguous-length+ (length +unambiguous-charset+))) (defun random-char (&optional (set :lower-alpha)) (ecase set @@ -487,14 +513,14 @@ for characters in a string" (code-char (+ +char-code-lower-a+ n))))) (:upper-alpha (code-char (+ +char-code-upper-a+ (random 26)))) - (:unambigous - (schar +unambigous-charset+ (random +unambigous-length+))) + (:unambiguous + (schar +unambiguous-charset+ (random +unambiguous-length+))) (:upper-lower-alpha (let ((n (random 52))) (if (>= n 26) (code-char (+ +char-code-upper-a+ (- n 26))) (code-char (+ +char-code-lower-a+ n))))))) - + (defun random-string (&key (length 10) (set :lower-alpha)) "Returns a random lower-case string." @@ -545,14 +571,27 @@ for characters in a string" (length ending))))) (return-from string-strip-ending (subseq str 0 (- len (length ending)))))))) - + (defun string-maybe-shorten (str maxlen) - (let ((len (length str))) - (if (<= len maxlen) - str - (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))) + (string-elide str maxlen :end)) +(defun string-elide (str maxlen position) + (declare (fixnum maxlen)) + (let ((len (length str))) + (declare (fixnum len)) + (cond + ((<= len maxlen) + str) + ((<= maxlen 3) + "...") + ((eq position :middle) + (multiple-value-bind (mid remain) (truncate maxlen 2) + (let ((end1 (- mid 1)) + (start2 (- len (- mid 2) remain))) + (concatenate 'string (subseq str 0 end1) "..." (subseq str start2))))) + ((or (eq position :end) t) + (concatenate 'string (subseq str 0 (- maxlen 3)) "..."))))) (defun shrink-vector (str size) #+allegro @@ -574,7 +613,7 @@ for characters in a string" (flet ((is-sep (char) (member char whitespace :test #'char=))) (let ((tokens nil)) (do* ((token-start - (position-if-not #'is-sep string) + (position-if-not #'is-sep string) (when token-end (position-if-not #'is-sep string :start (1+ token-end)))) (token-end @@ -587,10 +626,15 @@ for characters in a string" (defun split-alphanumeric-string (string) "Separates a string at any non-alphanumeric chararacter" - (flet ((is-sep (char) (non-alphanumericp char))) + (declare (simple-string string) + (optimize (speed 3) (safety 0))) + (flet ((is-sep (char) + (declare (character char)) + (and (non-alphanumericp char) + (not (char= #\_ char))))) (let ((tokens nil)) (do* ((token-start - (position-if-not #'is-sep string) + (position-if-not #'is-sep string) (when token-end (position-if-not #'is-sep string :start (1+ token-end)))) (token-end @@ -602,4 +646,54 @@ for characters in a string" (push (subseq string token-start token-end) tokens))))) - +(defun trim-non-alphanumeric (word) + "Strip non-alphanumeric characters from beginning and end of a word." + (declare (simple-string word) + (optimize (speed 3) (safety 0) (space 0))) + (let* ((start 0) + (len (length word)) + (end len)) + (declare (fixnum start end len)) + (do ((done nil)) + ((or done (= start end))) + (if (alphanumericp (schar word start)) + (setq done t) + (incf start))) + (when (> end start) + (do ((done nil)) + ((or done (= start end))) + (if (alphanumericp (schar word (1- end))) + (setq done t) + (decf end)))) + (if (or (plusp start) (/= len end)) + (subseq word start end) + word))) + + +(defun collapse-whitespace (s) + "Convert multiple whitespace characters to a single space character." + (declare (simple-string s) + (optimize (speed 3) (safety 0))) + (with-output-to-string (stream) + (do ((pos 0 (1+ pos)) + (in-white nil) + (len (length s))) + ((= pos len)) + (declare (fixnum pos len)) + (let ((c (schar s pos))) + (declare (character c)) + (cond + ((kl:is-char-whitespace c) + (unless in-white + (write-char #\space stream)) + (setq in-white t)) + (t + (setq in-white nil) + (write-char c stream))))))) + +(defun string->list (string) + (let ((eof (list nil))) + (with-input-from-string (stream string) + (do ((x (read stream nil eof) (read stream nil eof)) + (l nil (cons x l))) + ((eq x eof) (nreverse l))))))