1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: strings.lisp
6 ;;;; Purpose: Strings utility functions for KMRCL package
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
10 ;;;; This file, part of KMRCL, is Copyright (c) 2002-2006 by Kevin M. Rosenberg
12 ;;;; KMRCL users are granted the rights to distribute and use this software
13 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
14 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
15 ;;;; *************************************************************************
22 (defmacro string-append (outputstr &rest args)
23 `(setq ,outputstr (concatenate 'string ,outputstr ,@args)))
25 (defun list-to-string (lst)
26 "Converts a list to a string, doesn't include any delimiters between elements"
27 (format nil "~{~A~}" lst))
29 (defun count-string-words (str)
30 (declare (simple-string str)
31 (optimize (speed 3) (safety 0) (space 0)))
34 (declare (fixnum n-words))
35 (do* ((len (length str))
39 (if (alphanumericp (schar str i))
43 (setq in-word nil)))))
45 (defun position-char (char string start max)
46 (declare (optimize (speed 3) (safety 0) (space 0))
47 (fixnum start max) (simple-string string))
48 (do* ((i start (1+ i)))
51 (when (char= char (schar string i)) (return i))))
53 (defun position-not-char (char string start max)
54 (declare (optimize (speed 3) (safety 0) (space 0))
55 (fixnum start max) (simple-string string))
56 (do* ((i start (1+ i)))
59 (when (char/= char (schar string i)) (return i))))
61 (defun delimited-string-to-list (string &optional (separator #\space)
63 "split a string with delimiter"
64 (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))
66 (type character separator))
67 (do* ((len (length string))
70 (end (position-char separator string pos len)
71 (position-char separator string pos len)))
74 (push (subseq string pos) output)
75 (when (or (not skip-terminal) (zerop len))
78 (declare (type fixnum pos len)
79 (type (or null fixnum) end))
80 (push (subseq string pos end) output)
84 (defun list-to-delimited-string (list &optional (separator " "))
85 (format nil (concatenate 'string "~{~A~^" (string separator) "~}") list))
87 (defun string-invert (str)
88 "Invert case of a string"
89 (declare (optimize (speed 3) (compilation-speed 0) (debug 0) (safety 0))
91 (let ((up nil) (down nil))
93 (loop for char of-type character across str do
94 (cond ((upper-case-p char)
95 (if down (return-from skip str) (setf up t)))
97 (if up (return-from skip str) (setf down t)))))
98 (if up (string-downcase str) (string-upcase str)))))
100 (defun add-sql-quotes (s)
101 (substitute-string-for-char s #\' "''"))
103 (defun escape-backslashes (s)
104 (substitute-string-for-char s #\\ "\\\\"))
106 (defun substitute-string-for-char (procstr match-char subst-str)
107 "Substitutes a string for a single matching character of a string"
108 (substitute-chars-strings procstr (list (cons match-char subst-str))))
110 (defun string-substitute (string substring replacement-string)
111 "String substitute by Larry Hunter. Obtained from Google"
112 (let ((substring-length (length substring))
116 (search substring string)
117 (search substring string :start2 last-end)))
119 (concatenate 'string new-string (subseq string last-end)))
123 (subseq string last-end next-start)
125 (setq last-end (+ next-start substring-length)))))
127 (defun string-trim-last-character (s)
128 "Return the string less the last character"
129 (let ((len (length s)))
131 (subseq s 0 (1- len))
134 (defun nstring-trim-last-character (s)
135 "Return the string less the last character"
136 (let ((len (length s)))
138 (nsubseq s 0 (1- len))
141 (defun string-hash (str &optional (bitmask 65535))
143 (declare (fixnum hash)
145 (dotimes (i (length str))
147 (setq hash (+ hash (char-code (char str i)))))
148 (logand hash bitmask)))
150 (defun is-string-empty (str)
151 (zerop (length str)))
153 (defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed
155 #+lispworks #\No-Break-Space))
157 (defun is-char-whitespace (c)
158 (declare (character c) (optimize (speed 3) (safety 0)))
159 (or (char= c #\Space) (char= c #\Tab) (char= c #\Return)
161 #+allegro (char= c #\%space)
162 #+lispworks (char= c #\No-Break-Space)))
164 (defun is-string-whitespace (str)
165 "Return t if string is all whitespace"
166 (every #'is-char-whitespace str))
168 (defun string-right-trim-whitespace (str)
169 (string-right-trim *whitespace-chars* str))
171 (defun string-left-trim-whitespace (str)
172 (string-left-trim *whitespace-chars* str))
174 (defun string-trim-whitespace (str)
175 (string-trim *whitespace-chars* str))
177 (defun replaced-string-length (str repl-alist)
178 (declare (simple-string str)
179 (optimize (speed 3) (safety 0) (space 0)))
181 (orig-len (length str))
183 ((= i orig-len) new-len)
184 (declare (fixnum i orig-len new-len))
185 (let* ((c (char str i))
186 (match (assoc c repl-alist :test #'char=)))
187 (declare (character c))
189 (incf new-len (1- (length
190 (the simple-string (cdr match)))))))))
192 (defun substitute-chars-strings (str repl-alist)
193 "Replace all instances of a chars with a string. repl-alist is an assoc
194 list of characters and replacement strings."
195 (declare (simple-string str)
196 (optimize (speed 3) (safety 0) (space 0)))
197 (do* ((orig-len (length str))
198 (new-string (make-string (replaced-string-length str repl-alist)))
203 (declare (fixnum spos dpos) (simple-string new-string))
204 (let* ((c (char str spos))
205 (match (assoc c repl-alist :test #'char=)))
206 (declare (character c))
208 (let* ((subst (cdr match))
209 (len (length subst)))
210 (declare (fixnum len)
211 (simple-string subst))
214 (setf (char new-string dpos) (char subst j))
217 (setf (char new-string dpos) c)
220 (defun escape-xml-string (string)
221 "Escape invalid XML characters"
222 (substitute-chars-strings string '((#\& . "&") (#\< . "<"))))
224 (defun make-usb8-array (len)
225 (make-array len :element-type '(unsigned-byte 8)))
227 (defun usb8-array-to-string (vec &key (start 0) end)
228 (declare (type (simple-array (unsigned-byte 8) (*)) vec)
231 (setq end (length vec)))
232 (let* ((len (- end start))
233 (str (make-string len)))
234 (declare (fixnum len)
236 (optimize (speed 3) (safety 0)))
240 (setf (schar str i) (code-char (aref vec (the fixnum (+ i start))))))))
242 (defun string-to-usb8-array (str)
243 (declare (simple-string str))
244 (let* ((len (length str))
245 (vec (make-usb8-array len)))
246 (declare (fixnum len)
247 (type (simple-array (unsigned-byte 8) (*)) vec)
248 (optimize (speed 3)))
252 (setf (aref vec i) (char-code (schar str i))))))
254 (defun concat-separated-strings (separator &rest lists)
255 (format nil (concatenate 'string "~{~A~^" (string separator) "~}")
256 (append-sublists lists)))
258 (defun only-null-list-elements-p (lst)
259 (or (null lst) (every #'null lst)))
261 (defun print-separated-strings (strm separator &rest lists)
262 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0)
263 (compilation-speed 0)))
264 (do* ((rest-lists lists (cdr rest-lists))
265 (list (car rest-lists) (car rest-lists))
266 (last-list (only-null-list-elements-p (cdr rest-lists))
267 (only-null-list-elements-p (cdr rest-lists))))
268 ((null rest-lists) strm)
269 (do* ((lst list (cdr lst))
270 (elem (car lst) (car lst))
271 (last-elem (null (cdr lst)) (null (cdr lst))))
273 (write-string elem strm)
274 (unless (and last-elem last-list)
275 (write-string separator strm)))))
277 (eval-when (:compile-toplevel :load-toplevel :execute)
278 (defmacro def-prefixed-number-string (fn-name type &optional doc)
279 `(defun ,fn-name (num pchar len)
280 ,@(when (stringp doc) (list doc))
281 (declare (optimize (speed 3) (safety 0) (space 0))
286 (do* ((zero-code (char-code #\0))
287 (result (make-string len :initial-element #\0))
288 (minus? (minusp num))
289 (val (if minus? (- num) num)
290 (nth-value 0 (floor val 10)))
291 (pos (1- len) (1- pos))
292 (mod (mod val 10) (mod val 10)))
293 ((or (zerop val) (minusp pos))
295 (setf (schar result 0) pchar))
296 (when minus? (setf (schar result (if pchar 1 0)) #\-))
299 (fixnum mod zero-code pos)
301 (simple-string result))
302 (setf (schar result pos) (code-char (the fixnum (+ zero-code mod))))))))
304 (def-prefixed-number-string prefixed-fixnum-string fixnum
305 "Outputs a string of LEN digit with an optional initial character PCHAR.
306 Leading zeros are present. LEN must be a fixnum.")
308 (def-prefixed-number-string prefixed-integer-string integer
309 "Outputs a string of LEN digit with an optional initial character PCHAR.
310 Leading zeros are present. LEN must be an integer.")
312 (defun integer-string (num len)
313 "Outputs a string of LEN digit with an optional initial character PCHAR.
314 Leading zeros are present."
315 (declare (optimize (speed 3) (safety 0) (space 0))
318 (do* ((zero-code (char-code #\0))
319 (result (make-string len :initial-element #\0))
320 (minus? (minusp num))
321 (val (if minus? (- 0 num) num)
322 (nth-value 0 (floor val 10)))
323 (pos (1- len) (1- pos))
324 (mod (mod val 10) (mod val 10)))
325 ((or (zerop val) (minusp pos))
326 (when minus? (setf (schar result 0) #\-))
328 (declare (fixnum mod zero-code pos) (simple-string result) (integer val))
329 (setf (schar result pos) (code-char (+ zero-code mod)))))
331 (defun fast-string-search (substr str substr-length startpos endpos)
332 "Optimized search for a substring in a simple-string"
333 (declare (simple-string substr str)
334 (fixnum substr-length startpos endpos)
335 (optimize (speed 3) (space 0) (safety 0)))
336 (do* ((pos startpos (1+ pos))
337 (lastpos (- endpos substr-length)))
338 ((> pos lastpos) nil)
339 (declare (fixnum pos lastpos))
342 (return-from fast-string-search pos))
344 (unless (char= (schar str (+ i pos)) (schar substr i))
347 (defun string-delimited-string-to-list (str substr)
348 "splits a string delimited by substr into a list of strings"
349 (declare (simple-string str substr)
350 (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)
352 (do* ((substr-len (length substr))
353 (strlen (length str))
356 (end (fast-string-search substr str substr-len pos strlen)
357 (fast-string-search substr str substr-len pos strlen)))
360 (push (subseq str pos) output))
362 (declare (fixnum strlen substr-len pos)
363 (type (or fixnum null) end))
364 (push (subseq str pos end) output)
365 (setq pos (+ end substr-len))))
367 (defun string-to-list-skip-delimiter (str &optional (delim #\space))
368 "Return a list of strings, delimited by spaces, skipping spaces."
369 (declare (simple-string str)
370 (optimize (speed 0) (space 0) (safety 0)))
373 (i (position-not-char delim str 0 end)
374 (position-not-char delim str j end))
375 (j (when i (position-char delim str i end))
376 (when i (position-char delim str i end))))
377 ((or (null i) (null j))
378 (when (and i (< i end))
379 (push (subseq str i end) results))
381 (declare (fixnum end)
382 (type (or fixnum null) i j))
383 (push (subseq str i j) results)))
385 (defun string-starts-with (start str)
386 (and (>= (length str) (length start))
387 (string-equal start str :end2 (length start))))
389 (defun count-string-char (s c)
390 "Return a count of the number of times a character appears in a string"
391 (declare (simple-string s)
393 (optimize (speed 3) (safety 0)))
394 (do ((len (length s))
398 (declare (fixnum i len count))
399 (when (char= (schar s i) c)
402 (defun count-string-char-if (pred s)
403 "Return a count of the number of times a predicate is true
404 for characters in a string"
405 (declare (simple-string s)
406 (type (or function symbol) pred)
407 (optimize (speed 3) (safety 0) (space 0)))
408 (do ((len (length s))
412 (declare (fixnum i len count))
413 (when (funcall pred (schar s i))
419 (defun non-alphanumericp (ch)
420 (not (alphanumericp ch)))
422 (defvar +hex-chars+ "0123456789ABCDEF")
423 (declaim (type simple-string +hex-chars+))
426 (declare (type (integer 0 15) n))
427 (schar +hex-chars+ n))
429 (defconstant* +char-code-lower-a+ (char-code #\a))
430 (defconstant* +char-code-upper-a+ (char-code #\A))
431 (defconstant* +char-code-0+ (char-code #\0))
432 (declaim (type fixnum +char-code-0+ +char-code-upper-a+
436 "convert hex character to decimal"
437 (let ((code (char-code (char-upcase ch))))
438 (declare (fixnum ch))
439 (if (>= code +char-code-upper-a+)
440 (+ 10 (- code +char-code-upper-a+))
441 (- code +char-code-0+))))
443 (defun binary-sequence-to-hex-string (seq)
444 (let ((list (etypecase seq
446 (sequence (map 'list #'identity seq)))))
447 (string-downcase (format nil "~{~2,'0X~}" list))))
449 (defun encode-uri-string (query)
450 "Escape non-alphanumeric characters for URI fields"
451 (declare (simple-string query)
452 (optimize (speed 3) (safety 0) (space 0)))
453 (do* ((count (count-string-char-if #'non-alphanumericp query))
455 (new-len (+ len (* 2 count)))
456 (str (make-string new-len))
460 (declare (fixnum count len new-len spos dpos)
462 (let ((ch (schar query spos)))
463 (if (non-alphanumericp ch)
464 (let ((c (char-code ch)))
465 (setf (schar str dpos) #\%)
467 (setf (schar str dpos) (hexchar (logand (ash c -4) 15)))
469 (setf (schar str dpos) (hexchar (logand c 15))))
470 (setf (schar str dpos) ch)))))
472 (defun decode-uri-string (query)
473 "Unescape non-alphanumeric characters for URI fields"
474 (declare (simple-string query)
475 (optimize (speed 3) (safety 0) (space 0)))
476 (do* ((count (count-string-char query #\%))
478 (new-len (- len (* 2 count)))
479 (str (make-string new-len))
483 (declare (fixnum count len new-len spos dpos)
485 (let ((ch (schar query spos)))
487 (let ((c1 (charhex (schar query (1+ spos))))
488 (c2 (charhex (schar query (+ spos 2)))))
489 (declare (fixnum c1 c2))
490 (setf (schar str dpos)
491 (code-char (logior c2 (ash c1 4))))
493 (setf (schar str dpos) ch)))))
496 (defun uri-query-to-alist (query)
497 "Converts non-decoded URI query to an alist of settings"
498 (mapcar (lambda (set)
499 (let ((lst (kmrcl:delimited-string-to-list set #\=)))
500 (cons (first lst) (second lst))))
501 (kmrcl:delimited-string-to-list
502 (kmrcl:decode-uri-string query) #\&)))
504 (eval-when (:compile-toplevel :load-toplevel :execute)
505 (defvar +unambiguous-charset+
506 "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ")
507 (defconstant* +unambiguous-length+ (length +unambiguous-charset+)))
509 (defun random-char (&optional (set :lower-alpha))
512 (code-char (+ +char-code-lower-a+ (random 26))))
514 (let ((n (random 36)))
516 (code-char (+ +char-code-0+ (- n 26)))
517 (code-char (+ +char-code-lower-a+ n)))))
519 (code-char (+ +char-code-upper-a+ (random 26))))
521 (schar +unambiguous-charset+ (random +unambiguous-length+)))
523 (let ((n (random 52)))
525 (code-char (+ +char-code-upper-a+ (- n 26)))
526 (code-char (+ +char-code-lower-a+ n)))))))
529 (defun random-string (&key (length 10) (set :lower-alpha))
530 "Returns a random lower-case string."
531 (declare (optimize (speed 3)))
532 (let ((s (make-string length)))
533 (declare (simple-string s))
534 (dotimes (i length s)
535 (setf (schar s i) (random-char set)))))
538 (defun first-char (s)
539 (declare (simple-string s))
540 (when (and (stringp s) (plusp (length s)))
544 (declare (simple-string s))
546 (let ((len (length s)))
548 (schar s (1- len)))))
550 (defun ensure-string (v)
553 (character (string v))
554 (symbol (symbol-name v))
555 (otherwise (write-to-string v))))
557 (defun string-right-trim-one-char (char str)
558 (declare (simple-string str))
559 (let* ((len (length str))
561 (declare (fixnum len last))
562 (if (char= char (schar str last))
567 (defun remove-char-string (char str)
568 (declare (character char)
570 (do* ((len (length str))
571 (out (make-string len))
574 ((= pos len) (subseq out 0 opos))
575 (declare (fixnum pos opos len)
577 (let ((c (char str pos)))
578 (declare (character c))
579 (when (char/= c char)
580 (setf (schar out opos) c)
584 (defun string-strip-ending (str endings)
585 (if (stringp endings)
586 (setq endings (list endings)))
587 (let ((len (length str)))
588 (dolist (ending endings str)
589 (when (and (>= len (length ending))
593 (return-from string-strip-ending
594 (subseq str 0 (- len (length ending))))))))
597 (defun string-maybe-shorten (str maxlen)
598 (string-elide str maxlen :end))
600 (defun string-elide (str maxlen position)
601 (declare (fixnum maxlen))
602 (let ((len (length str)))
603 (declare (fixnum len))
609 ((eq position :middle)
610 (multiple-value-bind (mid remain) (truncate maxlen 2)
611 (let ((end1 (- mid 1))
612 (start2 (- len (- mid 2) remain)))
613 (concatenate 'string (subseq str 0 end1) "..." (subseq str start2)))))
614 ((or (eq position :end) t)
615 (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))))
617 (defun shrink-vector (str size)
619 (excl::.primcall 'sys::shrink-svector str size)
621 (lisp::shrink-vector str size)
623 (system::shrink-vector$vector str size)
625 (sb-kernel:shrink-vector str size)
627 (common-lisp::shrink-vector str size)
628 #-(or allegro cmu lispworks sbcl scl)
629 (setq str (subseq str 0 size))
632 (defun lex-string (string &key (whitespace '(#\space #\newline)))
633 "Separates a string at whitespace and returns a list of strings"
634 (flet ((is-sep (char) (member char whitespace :test #'char=)))
637 (position-if-not #'is-sep string)
639 (position-if-not #'is-sep string :start (1+ token-end))))
642 (position-if #'is-sep string :start token-start))
644 (position-if #'is-sep string :start token-start))))
645 ((null token-start) (nreverse tokens))
646 (push (subseq string token-start token-end) tokens)))))
648 (defun split-alphanumeric-string (string)
649 "Separates a string at any non-alphanumeric chararacter"
650 (declare (simple-string string)
651 (optimize (speed 3) (safety 0)))
652 (flet ((is-sep (char)
653 (declare (character char))
654 (and (non-alphanumericp char)
655 (not (char= #\_ char)))))
658 (position-if-not #'is-sep string)
660 (position-if-not #'is-sep string :start (1+ token-end))))
663 (position-if #'is-sep string :start token-start))
665 (position-if #'is-sep string :start token-start))))
666 ((null token-start) (nreverse tokens))
667 (push (subseq string token-start token-end) tokens)))))
670 (defun trim-non-alphanumeric (word)
671 "Strip non-alphanumeric characters from beginning and end of a word."
672 (declare (simple-string word)
673 (optimize (speed 3) (safety 0) (space 0)))
677 (declare (fixnum start end len))
679 ((or done (= start end)))
680 (if (alphanumericp (schar word start))
685 ((or done (= start end)))
686 (if (alphanumericp (schar word (1- end)))
689 (if (or (plusp start) (/= len end))
690 (subseq word start end)
694 (defun collapse-whitespace (s)
695 "Convert multiple whitespace characters to a single space character."
696 (declare (simple-string s)
697 (optimize (speed 3) (safety 0)))
698 (with-output-to-string (stream)
699 (do ((pos 0 (1+ pos))
703 (declare (fixnum pos len))
704 (let ((c (schar s pos)))
705 (declare (character c))
707 ((kl:is-char-whitespace c)
709 (write-char #\space stream))
713 (write-char c stream)))))))
715 (defun string->list (string)
716 (let ((eof (list nil)))
717 (with-input-from-string (stream string)
718 (do ((x (read stream nil eof) (read stream nil eof))
720 ((eq x eof) (nreverse l))))))