X-Git-Url: http://git.kpe.io/?p=kmrcl.git;a=blobdiff_plain;f=strings.lisp;h=6b6d9df01474248c55f72790edd1b905becc4a0d;hp=da39d48edd98a1b56cbd1153944cd0d7630a2d83;hb=5738e60dc3724dc7d022d0fd2d5f2dbe337be470;hpb=ba354a40a203103a4cf16cd3d21f89f707ba5205 diff --git a/strings.lisp b/strings.lisp index da39d48..6b6d9df 100644 --- a/strings.lisp +++ b/strings.lisp @@ -1,4 +1,4 @@ -<;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*- +;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*- ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: strings.lisp,v 1.32 2003/05/16 12:51:11 kevin Exp $ +;;;; $Id: strings.lisp,v 1.50 2003/07/21 00:52:56 kevin Exp $ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -17,7 +17,7 @@ ;;;; ************************************************************************* -(in-package :kmrcl) +(in-package #:kmrcl) ;;; Strings @@ -53,6 +53,14 @@ (declare (fixnum i)) (when (char= char (schar string i)) (return i)))) +(defun position-not-char (char string start max) + (declare (optimize (speed 3) (safety 0) (space 0)) + (fixnum start max) (simple-string string)) + (do* ((i start (1+ i))) + ((= i max) nil) + (declare (fixnum i)) + (when (char/= char (schar string i)) (return i)))) + (defun delimited-string-to-list (string &optional (separator #\space) skip-terminal) "split a string with delimiter" @@ -76,8 +84,8 @@ (setq pos (1+ end)))) -(defun list-to-delimited-string (list &optional (separator #\space)) - (format nil (format nil "~~{~~A~~^~A~~}" separator) list)) +(defun list-to-delimited-string (list &optional (separator " ")) + (format nil (concatenate 'string "~{~A~^" (string separator) "~}") list)) (defun string-invert (str) "Invert case of a string" @@ -160,23 +168,25 @@ (null (find-if #'not-whitespace? str)))) (defun replaced-string-length (str repl-alist) - (declare (string str)) - (let* ((orig-len (length str)) - (new-len orig-len)) - (declare (fixnum orig-len new-len)) - (dotimes (i orig-len) - (declare (fixnum i)) + (declare (simple-string str) + (optimize (speed 3) (safety 0) (space 0))) + (do* ((i 0 (1+ i)) + (orig-len (length str)) + (new-len orig-len)) + ((= i orig-len) new-len) + (declare (fixnum i orig-len new-len)) (let* ((c (char str i)) (match (assoc c repl-alist :test #'char=))) (declare (character c)) (when match - (incf new-len (1- (length (cdr match))))))) - new-len)) + (incf new-len (1- (length + (the simple-string (cdr match))))))))) (defun substitute-chars-strings (str repl-alist) "Replace all instances of a chars with a string. repl-alist is an assoc list of characters and replacement strings." - (declare (simple-string str)) + (declare (simple-string str) + (optimize (speed 3) (safety 0) (space 0))) (do* ((orig-len (length str)) (new-string (make-string (replaced-string-length str repl-alist))) (spos 0 (1+ spos)) @@ -190,7 +200,8 @@ list of characters and replacement strings." (if match (let* ((subst (cdr match)) (len (length subst))) - (declare (fixnum len)) + (declare (fixnum len) + (simple-string subst)) (dotimes (j len) (declare (fixnum j)) (setf (char new-string dpos) (char subst j)) @@ -201,39 +212,38 @@ list of characters and replacement strings." (defun escape-xml-string (string) "Escape invalid XML characters" - (substitute-chars-strings - string '((#\& . "&") (#\> . ">") (#\< . "<") (#\" . """)))) - + (substitute-chars-strings string '((#\& . "&") (#\< . "<")))) (defun make-usb8-array (len) - (make-array len :adjustable nil - :fill-pointer nil - :element-type '(unsigned-byte 8))) + (make-array len :element-type '(unsigned-byte 8))) (defun usb8-array-to-string (vec) + (declare (type (simple-array (unsigned-byte 8) (*)) vec)) (let* ((len (length vec)) (str (make-string len))) (declare (fixnum len) (simple-string str) (optimize (speed 3))) - (dotimes (i len) + (do ((i 0 (1+ i))) + ((= i len) str) (declare (fixnum i)) - (setf (schar str i) (code-char (aref vec i)))) - str)) + (setf (schar str i) (code-char (aref vec i)))))) (defun string-to-usb8-array (str) + (declare (simple-string str)) (let* ((len (length str)) (vec (make-usb8-array len))) (declare (fixnum len) - (type (array fixnum (*)) vec) + (type (simple-array (unsigned-byte 8) (*)) vec) (optimize (speed 3))) - (dotimes (i len) + (do ((i 0 (1+ i))) + ((= i len) vec) (declare (fixnum i)) - (setf (aref vec i) (char-code (schar str i)))) - vec)) + (setf (aref vec i) (char-code (schar str i)))))) (defun concat-separated-strings (separator &rest lists) - (format nil (format nil "~~{~~A~~^~A~~}" separator) (append-sublists lists))) + (format nil (concatenate 'string "~{~A~^" (string separator) "~}") + (append-sublists lists))) (defun only-null-list-elements-p (lst) (or (null lst) (every #'null lst))) @@ -264,7 +274,8 @@ Leading zeros are present." (do* ((zero-code (char-code #\0)) (result (make-string len :initial-element #\0)) (minus? (minusp num)) - (val (if minus? (- 0 num) num) (floor (/ val 10))) + (val (if minus? (- num) num) + (nth-value 0 (floor val 10))) (pos (1- len) (1- pos)) (mod (mod val 10) (mod val 10))) ((or (zerop val) (minusp pos)) @@ -279,15 +290,309 @@ Leading zeros are present." "Outputs a string of LEN digit with an optional initial character PCHAR. Leading zeros are present." (declare (optimize (speed 3) (safety 0) (space 0)) - (type fixnum len) (type integer num)) + (type fixnum len) + (type integer num)) (do* ((zero-code (char-code #\0)) (result (make-string len :initial-element #\0)) (minus? (minusp num)) - (val (if minus? (- 0 num) num) (floor (/ val 10))) + (val (if minus? (- 0 num) num) + (nth-value 0 (floor val 10))) (pos (1- len) (1- pos)) (mod (mod val 10) (mod val 10))) ((or (zerop val) (minusp pos)) - (when minus? (setf (schar result (if pchar 1 0)) #\-)) + (when minus? (setf (schar result 0) #\-)) result) (declare (fixnum mod zero-code pos) (simple-string result) (integer val)) (setf (schar result pos) (code-char (+ zero-code mod))))) + +(defun fast-string-search (substr str substr-length startpos endpos) + "Optimized search for a substring in a simple-string" + (declare (simple-string substr str) + (fixnum substr-length startpos endpos) + (optimize (speed 3) (space 0) (safety 0))) + (do* ((pos startpos (1+ pos)) + (lastpos (- endpos substr-length))) + ((> pos lastpos) nil) + (declare (fixnum pos lastpos)) + (do ((i 0 (1+ i))) + ((= i substr-length) + (return-from fast-string-search pos)) + (declare (fixnum i)) + (unless (char= (schar str (+ i pos)) (schar substr i)) + (return nil))))) + +(defun string-delimited-string-to-list (str substr) + "splits a string delimited by substr into a list of strings" + (declare (simple-string str substr) + (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0) + (debug 0))) + (do* ((substr-len (length substr)) + (strlen (length str)) + (output '()) + (pos 0) + (end (fast-string-search substr str substr-len pos strlen) + (fast-string-search substr str substr-len pos strlen))) + ((null end) + (when (< pos strlen) + (push (subseq str pos) output)) + (nreverse output)) + (declare (fixnum strlen substr-len pos) + (type (or fixnum null) end)) + (push (subseq str pos end) output) + (setq pos (+ end substr-len)))) + +(defun string-to-list-skip-delimiter (str &optional (delim #\space)) + "Return a list of strings, delimited by spaces, skipping spaces." + (declare (simple-string str) + (optimize (speed 0) (space 0) (safety 0))) + (do* ((results '()) + (end (length str)) + (i (position-not-char delim str 0 end) + (position-not-char delim str j end)) + (j (when i (position-char delim str i end)) + (when i (position-char delim str i end)))) + ((or (null i) (null j)) + (when (and i (< i end)) + (push (subseq str i end) results)) + (nreverse results)) + (declare (fixnum end) + (type (or fixnum null) i j)) + (push (subseq str i j) results))) + +(defun string-starts-with (start str) + (and (>= (length str) (length start)) + (string-equal start str :end2 (length start)))) + +(defun count-string-char (s c) + "Return a count of the number of times a character appears in a string" + (declare (simple-string s) + (character c) + (optimize (speed 3) (safety 0))) + (do ((len (length s)) + (i 0 (1+ i)) + (count 0)) + ((= i len) count) + (declare (fixnum i len count)) + (when (char= (schar s i) c) + (incf count)))) + +(defun count-string-char-if (pred s) + "Return a count of the number of times a predicate is true +for characters in a string" + (declare (simple-string s) + (type (or function symbol) pred) + (optimize (speed 3) (safety 0) (space 0))) + (do ((len (length s)) + (i 0 (1+ i)) + (count 0)) + ((= i len) count) + (declare (fixnum i len count)) + (when (funcall pred (schar s i)) + (incf count)))) + + +;;; URL Encoding + +(defun non-alphanumericp (ch) + (not (alphanumericp ch))) + +(defvar +hex-chars+ "0123456789ABCDEF") +(declaim (type simple-string +hex-chars+)) + +(defun hexchar (n) + (declare (type (integer 0 15) n)) + (schar +hex-chars+ n)) + +(defconstant +char-code-lower-a+ (char-code #\a)) +(defconstant +char-code-upper-a+ (char-code #\A)) +(defconstant +char-code-0+ (char-code #\0)) +(declaim (type fixnum +char-code-0+ +char-code-upper-a+ + +char-code-0)) + +(defun charhex (ch) + "convert hex character to decimal" + (let ((code (char-code (char-upcase ch)))) + (declare (fixnum ch)) + (if (>= code +char-code-upper-a+) + (+ 10 (- code +char-code-upper-a+)) + (- code +char-code-0+)))) + +(defun escape-uri-field (query) + "Escape non-alphanumeric characters for URI fields" + (declare (simple-string query) + (optimize (speed 3) (safety 0) (space 0))) + (do* ((count (count-string-char-if #'non-alphanumericp query)) + (len (length query)) + (new-len (+ len (* 2 count))) + (str (make-string new-len)) + (spos 0 (1+ spos)) + (dpos 0 (1+ dpos))) + ((= spos len) str) + (declare (fixnum count len new-len spos dpos) + (simple-string str)) + (let ((ch (schar query spos))) + (if (non-alphanumericp ch) + (let ((c (char-code ch))) + (setf (schar str dpos) #\%) + (incf dpos) + (setf (schar str dpos) (hexchar (logand (ash c -4) 15))) + (incf dpos) + (setf (schar str dpos) (hexchar (logand c 15)))) + (setf (schar str dpos) ch))))) + +(defun unescape-uri-field (query) + "Unescape non-alphanumeric characters for URI fields" + (declare (simple-string query) + (optimize (speed 3) (safety 0) (space 0))) + (do* ((count (count-string-char query #\%)) + (len (length query)) + (new-len (- len (* 2 count))) + (str (make-string new-len)) + (spos 0 (1+ spos)) + (dpos 0 (1+ dpos))) + ((= spos len) str) + (declare (fixnum count len new-len spos dpos) + (simple-string str)) + (let ((ch (schar query spos))) + (if (char= #\% ch) + (let ((c1 (charhex (schar query (1+ spos)))) + (c2 (charhex (schar query (+ spos 2))))) + (declare (fixnum c1 c2)) + (setf (schar str dpos) + (code-char (logior c2 (ash c1 4)))) + (incf spos 2)) + (setf (schar str dpos) ch))))) + + + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defvar +unambigous-charset+ + "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ") + (defconstant +unambigous-length+ (length +unambigous-charset+))) + +(defun random-char (&optional (set :lower-alpha)) + (ecase set + (:lower-alpha + (code-char (+ +char-code-lower-a+ (random 26)))) + (:lower-alphanumeric + (let ((n (random 36))) + (if (>= n 26) + (code-char (+ +char-code-0+ (- n 26))) + (code-char (+ +char-code-lower-a+ n))))) + (:upper-alpha + (code-char (+ +char-code-upper-a+ (random 26)))) + (:unambigous + (schar +unambigous-charset+ (random +unambigous-length+))) + (:upper-lower-alpha + (let ((n (random 52))) + (if (>= n 26) + (code-char (+ +char-code-upper-a+ (- n 26))) + (code-char (+ +char-code-lower-a+ n))))))) + + +(defun random-string (&key (length 10) (set :lower-alpha)) + "Returns a random lower-case string." + (declare (optimize (speed 3))) + (let ((s (make-string length))) + (declare (simple-string s)) + (dotimes (i length s) + (setf (schar s i) (random-char set))))) + + +(defun first-char (s) + (declare (simple-string s)) + (when (and (stringp s) (plusp (length s))) + (schar s 0))) + +(defun last-char (s) + (declare (simple-string s)) + (when (stringp s) + (let ((len (length s))) + (when (plusp len)) + (schar s (1- len))))) + +(defun ensure-string (v) + (typecase v + (string v) + (character (string v)) + (symbol (symbol-name v)) + (otherwise (write-to-string v)))) + +(defun string-right-trim-one-char (char str) + (declare (simple-string str)) + (let* ((len (length str)) + (last (1- len))) + (declare (fixnum len last)) + (if (char= char (schar str last)) + (subseq str 0 last) + str))) + + +(defun string-strip-ending (str endings) + (if (stringp endings) + (setq endings (list endings))) + (let ((len (length str))) + (dolist (ending endings str) + (when (and (>= len (length ending)) + (string-equal ending + (subseq str (- len + (length ending))))) + (return-from string-strip-ending + (subseq str 0 (- len (length ending)))))))) + + +(defun string-maybe-shorten (str maxlen) + (let ((len (length str))) + (if (<= len maxlen) + str + (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))) + + +(defun shrink-vector (str size) + #+allegro + (excl::.primcall 'sys::shrink-svector str size) + #+cmu + (lisp::shrink-vector str size) + #+lispworks + (system::shrink-vector$vector str size) + #+sbcl + (sb-kernel:shrink-vector str size) + #+scl + (common-lisp::shrink-vector str size) + #-(or allegro cmu lispworks sbcl scl) + (setq str (subseq str 0 size)) + str) + +(defun lex-string (string &key (whitespace '(#\space #\newline))) + "Separates a string at whitespace and returns a list of strings" + (flet ((whitespace? (char) (member char whitespace :test #'char=))) + (let ((tokens nil)) + (do* ((token-start + (position-if-not #'whitespace? string) + (when token-end + (position-if-not #'whitespace? string :start (1+ token-end)))) + (token-end + (when token-start + (position-if #'whitespace? string :start token-start)) + (when token-start + (position-if #'whitespace? string :start token-start)))) + ((null token-start) (nreverse tokens)) + (push (subseq string token-start token-end) tokens))))) + +(defun split-alphanumeric-string (string) + "Separates a string at any non-alphanumeric chararacter" + (flet ((whitespace? (char) (non-alphanumericp char))) + (let ((tokens nil)) + (do* ((token-start + (position-if-not #'whitespace? string) + (when token-end + (position-if-not #'whitespace? string :start (1+ token-end)))) + (token-end + (when token-start + (position-if #'whitespace? string :start token-start)) + (when token-start + (position-if #'whitespace? string :start token-start)))) + ((null token-start) (nreverse tokens)) + (push (subseq string token-start token-end) tokens))))) + +