1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: strings.lisp
6 ;;;; Purpose: Strings utility functions for KMRCL package
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
12 ;;;; This file, part of KMRCL, is Copyright (c) 2002-2006 by Kevin M. Rosenberg
14 ;;;; KMRCL users are granted the rights to distribute and use this software
15 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
16 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
17 ;;;; *************************************************************************
24 (defmacro string-append (outputstr &rest args)
25 `(setq ,outputstr (concatenate 'string ,outputstr ,@args)))
27 (defun list-to-string (lst)
28 "Converts a list to a string, doesn't include any delimiters between elements"
29 (format nil "~{~A~}" lst))
31 (defun count-string-words (str)
32 (declare (simple-string str)
33 (optimize (speed 3) (safety 0) (space 0)))
36 (declare (fixnum n-words))
37 (do* ((len (length str))
41 (if (alphanumericp (schar str i))
45 (setq in-word nil)))))
47 ;; From Larry Hunter with modifications
48 (defun position-char (char string start max)
49 (declare (optimize (speed 3) (safety 0) (space 0))
50 (fixnum start max) (simple-string string))
51 (do* ((i start (1+ i)))
54 (when (char= char (schar string i)) (return i))))
56 (defun position-not-char (char string start max)
57 (declare (optimize (speed 3) (safety 0) (space 0))
58 (fixnum start max) (simple-string string))
59 (do* ((i start (1+ i)))
62 (when (char/= char (schar string i)) (return i))))
64 (defun delimited-string-to-list (string &optional (separator #\space)
66 "split a string with delimiter"
67 (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))
69 (type character separator))
70 (do* ((len (length string))
73 (end (position-char separator string pos len)
74 (position-char separator string pos len)))
77 (push (subseq string pos) output)
78 (when (or (not skip-terminal) (zerop len))
81 (declare (type fixnum pos len)
82 (type (or null fixnum) end))
83 (push (subseq string pos end) output)
87 (defun list-to-delimited-string (list &optional (separator " "))
88 (format nil (concatenate 'string "~{~A~^" (string separator) "~}") list))
90 (defun string-invert (str)
91 "Invert case of a string"
92 (declare (optimize (speed 3) (compilation-speed 0) (debug 0) (safety 0))
94 (let ((up nil) (down nil))
96 (loop for char of-type character across str do
97 (cond ((upper-case-p char)
98 (if down (return-from skip str) (setf up t)))
100 (if up (return-from skip str) (setf down t)))))
101 (if up (string-downcase str) (string-upcase str)))))
103 (defun add-sql-quotes (s)
104 (substitute-string-for-char s #\' "''"))
106 (defun escape-backslashes (s)
107 (substitute-string-for-char s #\\ "\\\\"))
109 (defun substitute-string-for-char (procstr match-char subst-str)
110 "Substitutes a string for a single matching character of a string"
111 (substitute-chars-strings procstr (list (cons match-char subst-str))))
113 (defun string-substitute (string substring replacement-string)
114 "String substitute by Larry Hunter. Obtained from Google"
115 (let ((substring-length (length substring))
119 (search substring string)
120 (search substring string :start2 last-end)))
122 (concatenate 'string new-string (subseq string last-end)))
126 (subseq string last-end next-start)
128 (setq last-end (+ next-start substring-length)))))
130 (defun string-trim-last-character (s)
131 "Return the string less the last character"
132 (let ((len (length s)))
134 (subseq s 0 (1- len))
137 (defun nstring-trim-last-character (s)
138 "Return the string less the last character"
139 (let ((len (length s)))
141 (nsubseq s 0 (1- len))
144 (defun string-hash (str &optional (bitmask 65535))
146 (declare (fixnum hash)
148 (dotimes (i (length str))
150 (setq hash (+ hash (char-code (char str i)))))
151 (logand hash bitmask)))
153 (defun is-string-empty (str)
154 (zerop (length str)))
156 (defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed
158 #+lispworks #\No-Break-Space))
160 (defun is-char-whitespace (c)
161 (declare (character c) (optimize (speed 3) (safety 0)))
162 (or (char= c #\Space) (char= c #\Tab) (char= c #\Return)
164 #+allegro (char= c #\%space)
165 #+lispworks (char= c #\No-Break-Space)))
167 (defun is-string-whitespace (str)
168 "Return t if string is all whitespace"
169 (every #'is-char-whitespace str))
171 (defun string-right-trim-whitespace (str)
172 (string-right-trim *whitespace-chars* str))
174 (defun string-left-trim-whitespace (str)
175 (string-left-trim *whitespace-chars* str))
177 (defun string-trim-whitespace (str)
178 (string-trim *whitespace-chars* str))
180 (defun replaced-string-length (str repl-alist)
181 (declare (simple-string str)
182 (optimize (speed 3) (safety 0) (space 0)))
184 (orig-len (length str))
186 ((= i orig-len) new-len)
187 (declare (fixnum i orig-len new-len))
188 (let* ((c (char str i))
189 (match (assoc c repl-alist :test #'char=)))
190 (declare (character c))
192 (incf new-len (1- (length
193 (the simple-string (cdr match)))))))))
195 (defun substitute-chars-strings (str repl-alist)
196 "Replace all instances of a chars with a string. repl-alist is an assoc
197 list of characters and replacement strings."
198 (declare (simple-string str)
199 (optimize (speed 3) (safety 0) (space 0)))
200 (do* ((orig-len (length str))
201 (new-string (make-string (replaced-string-length str repl-alist)))
206 (declare (fixnum spos dpos) (simple-string new-string))
207 (let* ((c (char str spos))
208 (match (assoc c repl-alist :test #'char=)))
209 (declare (character c))
211 (let* ((subst (cdr match))
212 (len (length subst)))
213 (declare (fixnum len)
214 (simple-string subst))
217 (setf (char new-string dpos) (char subst j))
220 (setf (char new-string dpos) c)
223 (defun escape-xml-string (string)
224 "Escape invalid XML characters"
225 (substitute-chars-strings string '((#\& . "&") (#\< . "<"))))
227 (defun make-usb8-array (len)
228 (make-array len :element-type '(unsigned-byte 8)))
230 (defun usb8-array-to-string (vec &key (start 0) end)
231 (declare (type (simple-array (unsigned-byte 8) (*)) vec)
234 (setq end (length vec)))
235 (let* ((len (- end start))
236 (str (make-string len)))
237 (declare (fixnum len)
239 (optimize (speed 3) (safety 0)))
243 (setf (schar str i) (code-char (aref vec (the fixnum (+ i start))))))))
245 (defun string-to-usb8-array (str)
246 (declare (simple-string str))
247 (let* ((len (length str))
248 (vec (make-usb8-array len)))
249 (declare (fixnum len)
250 (type (simple-array (unsigned-byte 8) (*)) vec)
251 (optimize (speed 3)))
255 (setf (aref vec i) (char-code (schar str i))))))
257 (defun concat-separated-strings (separator &rest lists)
258 (format nil (concatenate 'string "~{~A~^" (string separator) "~}")
259 (append-sublists lists)))
261 (defun only-null-list-elements-p (lst)
262 (or (null lst) (every #'null lst)))
264 (defun print-separated-strings (strm separator &rest lists)
265 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0)
266 (compilation-speed 0)))
267 (do* ((rest-lists lists (cdr rest-lists))
268 (list (car rest-lists) (car rest-lists))
269 (last-list (only-null-list-elements-p (cdr rest-lists))
270 (only-null-list-elements-p (cdr rest-lists))))
271 ((null rest-lists) strm)
272 (do* ((lst list (cdr lst))
273 (elem (car lst) (car lst))
274 (last-elem (null (cdr lst)) (null (cdr lst))))
276 (write-string elem strm)
277 (unless (and last-elem last-list)
278 (write-string separator strm)))))
280 (eval-when (:compile-toplevel :load-toplevel :execute)
281 (defmacro def-prefixed-number-string (fn-name type &optional doc)
282 `(defun ,fn-name (num pchar len)
283 ,@(when (stringp doc) (list doc))
284 (declare (optimize (speed 3) (safety 0) (space 0))
289 (do* ((zero-code (char-code #\0))
290 (result (make-string len :initial-element #\0))
291 (minus? (minusp num))
292 (val (if minus? (- num) num)
293 (nth-value 0 (floor val 10)))
294 (pos (1- len) (1- pos))
295 (mod (mod val 10) (mod val 10)))
296 ((or (zerop val) (minusp pos))
298 (setf (schar result 0) pchar))
299 (when minus? (setf (schar result (if pchar 1 0)) #\-))
302 (fixnum mod zero-code pos)
304 (simple-string result))
305 (setf (schar result pos) (code-char (the fixnum (+ zero-code mod))))))))
307 (def-prefixed-number-string prefixed-fixnum-string fixnum
308 "Outputs a string of LEN digit with an optional initial character PCHAR.
309 Leading zeros are present. LEN must be a fixnum.")
311 (def-prefixed-number-string prefixed-integer-string integer
312 "Outputs a string of LEN digit with an optional initial character PCHAR.
313 Leading zeros are present. LEN must be an integer.")
315 (defun integer-string (num len)
316 "Outputs a string of LEN digit with an optional initial character PCHAR.
317 Leading zeros are present."
318 (declare (optimize (speed 3) (safety 0) (space 0))
321 (do* ((zero-code (char-code #\0))
322 (result (make-string len :initial-element #\0))
323 (minus? (minusp num))
324 (val (if minus? (- 0 num) num)
325 (nth-value 0 (floor val 10)))
326 (pos (1- len) (1- pos))
327 (mod (mod val 10) (mod val 10)))
328 ((or (zerop val) (minusp pos))
329 (when minus? (setf (schar result 0) #\-))
331 (declare (fixnum mod zero-code pos) (simple-string result) (integer val))
332 (setf (schar result pos) (code-char (+ zero-code mod)))))
334 (defun fast-string-search (substr str substr-length startpos endpos)
335 "Optimized search for a substring in a simple-string"
336 (declare (simple-string substr str)
337 (fixnum substr-length startpos endpos)
338 (optimize (speed 3) (space 0) (safety 0)))
339 (do* ((pos startpos (1+ pos))
340 (lastpos (- endpos substr-length)))
341 ((> pos lastpos) nil)
342 (declare (fixnum pos lastpos))
345 (return-from fast-string-search pos))
347 (unless (char= (schar str (+ i pos)) (schar substr i))
350 (defun string-delimited-string-to-list (str substr)
351 "splits a string delimited by substr into a list of strings"
352 (declare (simple-string str substr)
353 (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)
355 (do* ((substr-len (length substr))
356 (strlen (length str))
359 (end (fast-string-search substr str substr-len pos strlen)
360 (fast-string-search substr str substr-len pos strlen)))
363 (push (subseq str pos) output))
365 (declare (fixnum strlen substr-len pos)
366 (type (or fixnum null) end))
367 (push (subseq str pos end) output)
368 (setq pos (+ end substr-len))))
370 (defun string-to-list-skip-delimiter (str &optional (delim #\space))
371 "Return a list of strings, delimited by spaces, skipping spaces."
372 (declare (simple-string str)
373 (optimize (speed 0) (space 0) (safety 0)))
376 (i (position-not-char delim str 0 end)
377 (position-not-char delim str j end))
378 (j (when i (position-char delim str i end))
379 (when i (position-char delim str i end))))
380 ((or (null i) (null j))
381 (when (and i (< i end))
382 (push (subseq str i end) results))
384 (declare (fixnum end)
385 (type (or fixnum null) i j))
386 (push (subseq str i j) results)))
388 (defun string-starts-with (start str)
389 (and (>= (length str) (length start))
390 (string-equal start str :end2 (length start))))
392 (defun count-string-char (s c)
393 "Return a count of the number of times a character appears in a string"
394 (declare (simple-string s)
396 (optimize (speed 3) (safety 0)))
397 (do ((len (length s))
401 (declare (fixnum i len count))
402 (when (char= (schar s i) c)
405 (defun count-string-char-if (pred s)
406 "Return a count of the number of times a predicate is true
407 for characters in a string"
408 (declare (simple-string s)
409 (type (or function symbol) pred)
410 (optimize (speed 3) (safety 0) (space 0)))
411 (do ((len (length s))
415 (declare (fixnum i len count))
416 (when (funcall pred (schar s i))
422 (defun non-alphanumericp (ch)
423 (not (alphanumericp ch)))
425 (defvar +hex-chars+ "0123456789ABCDEF")
426 (declaim (type simple-string +hex-chars+))
429 (declare (type (integer 0 15) n))
430 (schar +hex-chars+ n))
432 (defconstant* +char-code-lower-a+ (char-code #\a))
433 (defconstant* +char-code-upper-a+ (char-code #\A))
434 (defconstant* +char-code-0+ (char-code #\0))
435 (declaim (type fixnum +char-code-0+ +char-code-upper-a+
439 "convert hex character to decimal"
440 (let ((code (char-code (char-upcase ch))))
441 (declare (fixnum ch))
442 (if (>= code +char-code-upper-a+)
443 (+ 10 (- code +char-code-upper-a+))
444 (- code +char-code-0+))))
446 (defun binary-sequence-to-hex-string (seq)
447 (let ((list (etypecase seq
449 (sequence (map 'list #'identity seq)))))
450 (string-downcase (format nil "~{~2,'0X~}" list))))
452 (defun encode-uri-string (query)
453 "Escape non-alphanumeric characters for URI fields"
454 (declare (simple-string query)
455 (optimize (speed 3) (safety 0) (space 0)))
456 (do* ((count (count-string-char-if #'non-alphanumericp query))
458 (new-len (+ len (* 2 count)))
459 (str (make-string new-len))
463 (declare (fixnum count len new-len spos dpos)
465 (let ((ch (schar query spos)))
466 (if (non-alphanumericp ch)
467 (let ((c (char-code ch)))
468 (setf (schar str dpos) #\%)
470 (setf (schar str dpos) (hexchar (logand (ash c -4) 15)))
472 (setf (schar str dpos) (hexchar (logand c 15))))
473 (setf (schar str dpos) ch)))))
475 (defun decode-uri-string (query)
476 "Unescape non-alphanumeric characters for URI fields"
477 (declare (simple-string query)
478 (optimize (speed 3) (safety 0) (space 0)))
479 (do* ((count (count-string-char query #\%))
481 (new-len (- len (* 2 count)))
482 (str (make-string new-len))
486 (declare (fixnum count len new-len spos dpos)
488 (let ((ch (schar query spos)))
490 (let ((c1 (charhex (schar query (1+ spos))))
491 (c2 (charhex (schar query (+ spos 2)))))
492 (declare (fixnum c1 c2))
493 (setf (schar str dpos)
494 (code-char (logior c2 (ash c1 4))))
496 (setf (schar str dpos) ch)))))
500 (eval-when (:compile-toplevel :load-toplevel :execute)
501 (defvar +unambiguous-charset+
502 "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ")
503 (defconstant* +unambiguous-length+ (length +unambiguous-charset+)))
505 (defun random-char (&optional (set :lower-alpha))
508 (code-char (+ +char-code-lower-a+ (random 26))))
510 (let ((n (random 36)))
512 (code-char (+ +char-code-0+ (- n 26)))
513 (code-char (+ +char-code-lower-a+ n)))))
515 (code-char (+ +char-code-upper-a+ (random 26))))
517 (schar +unambiguous-charset+ (random +unambiguous-length+)))
519 (let ((n (random 52)))
521 (code-char (+ +char-code-upper-a+ (- n 26)))
522 (code-char (+ +char-code-lower-a+ n)))))))
525 (defun random-string (&key (length 10) (set :lower-alpha))
526 "Returns a random lower-case string."
527 (declare (optimize (speed 3)))
528 (let ((s (make-string length)))
529 (declare (simple-string s))
530 (dotimes (i length s)
531 (setf (schar s i) (random-char set)))))
534 (defun first-char (s)
535 (declare (simple-string s))
536 (when (and (stringp s) (plusp (length s)))
540 (declare (simple-string s))
542 (let ((len (length s)))
544 (schar s (1- len)))))
546 (defun ensure-string (v)
549 (character (string v))
550 (symbol (symbol-name v))
551 (otherwise (write-to-string v))))
553 (defun string-right-trim-one-char (char str)
554 (declare (simple-string str))
555 (let* ((len (length str))
557 (declare (fixnum len last))
558 (if (char= char (schar str last))
563 (defun string-strip-ending (str endings)
564 (if (stringp endings)
565 (setq endings (list endings)))
566 (let ((len (length str)))
567 (dolist (ending endings str)
568 (when (and (>= len (length ending))
572 (return-from string-strip-ending
573 (subseq str 0 (- len (length ending))))))))
576 (defun string-maybe-shorten (str maxlen)
577 (string-elide str maxlen :end))
579 (defun string-elide (str maxlen position)
580 (declare (fixnum maxlen))
581 (let ((len (length str)))
582 (declare (fixnum len))
588 ((eq position :middle)
589 (multiple-value-bind (mid remain) (truncate maxlen 2)
590 (let ((end1 (- mid 1))
591 (start2 (- len (- mid 2) remain)))
592 (concatenate 'string (subseq str 0 end1) "..." (subseq str start2)))))
593 ((or (eq position :end) t)
594 (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))))
596 (defun shrink-vector (str size)
598 (excl::.primcall 'sys::shrink-svector str size)
600 (lisp::shrink-vector str size)
602 (system::shrink-vector$vector str size)
604 (sb-kernel:shrink-vector str size)
606 (common-lisp::shrink-vector str size)
607 #-(or allegro cmu lispworks sbcl scl)
608 (setq str (subseq str 0 size))
611 (defun lex-string (string &key (whitespace '(#\space #\newline)))
612 "Separates a string at whitespace and returns a list of strings"
613 (flet ((is-sep (char) (member char whitespace :test #'char=)))
616 (position-if-not #'is-sep string)
618 (position-if-not #'is-sep string :start (1+ token-end))))
621 (position-if #'is-sep string :start token-start))
623 (position-if #'is-sep string :start token-start))))
624 ((null token-start) (nreverse tokens))
625 (push (subseq string token-start token-end) tokens)))))
627 (defun split-alphanumeric-string (string)
628 "Separates a string at any non-alphanumeric chararacter"
629 (declare (simple-string string)
630 (optimize (speed 3) (safety 0)))
631 (flet ((is-sep (char)
632 (declare (character char))
633 (and (non-alphanumericp char)
634 (not (char= #\_ char)))))
637 (position-if-not #'is-sep string)
639 (position-if-not #'is-sep string :start (1+ token-end))))
642 (position-if #'is-sep string :start token-start))
644 (position-if #'is-sep string :start token-start))))
645 ((null token-start) (nreverse tokens))
646 (push (subseq string token-start token-end) tokens)))))
649 (defun trim-non-alphanumeric (word)
650 "Strip non-alphanumeric characters from beginning and end of a word."
651 (declare (simple-string word)
652 (optimize (speed 3) (safety 0) (space 0)))
656 (declare (fixnum start end len))
658 ((or done (= start end)))
659 (if (alphanumericp (schar word start))
664 ((or done (= start end)))
665 (if (alphanumericp (schar word (1- end)))
668 (if (or (plusp start) (/= len end))
669 (subseq word start end)
673 (defun collapse-whitespace (s)
674 "Convert multiple whitespace characters to a single space character."
675 (declare (simple-string s)
676 (optimize (speed 3) (safety 0)))
677 (with-output-to-string (stream)
678 (do ((pos 0 (1+ pos))
682 (declare (fixnum pos len))
683 (let ((c (schar s pos)))
684 (declare (character c))
686 ((kl:is-char-whitespace c)
688 (write-char #\space stream))
692 (write-char c stream)))))))
694 (defun string->list (string)
695 (let ((eof (list nil)))
696 (with-input-from-string (stream string)
697 (do ((x (read stream nil eof) (read stream nil eof))
699 ((eq x eof) (nreverse l))))))