1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: strings.lisp
6 ;;;; Purpose: Strings utility functions for KMRCL package
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
12 ;;;; This file, part of KMRCL, is Copyright (c) 2002-2006 by Kevin M. Rosenberg
14 ;;;; KMRCL users are granted the rights to distribute and use this software
15 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
16 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
17 ;;;; *************************************************************************
24 (defmacro string-append (outputstr &rest args)
25 `(setq ,outputstr (concatenate 'string ,outputstr ,@args)))
27 (defun list-to-string (lst)
28 "Converts a list to a string, doesn't include any delimiters between elements"
29 (format nil "~{~A~}" lst))
31 (defun count-string-words (str)
32 (declare (simple-string str)
33 (optimize (speed 3) (safety 0) (space 0)))
36 (declare (fixnum n-words))
37 (do* ((len (length str))
41 (if (alphanumericp (schar str i))
45 (setq in-word nil)))))
47 ;; From Larry Hunter with modifications
48 (defun position-char (char string start max)
49 (declare (optimize (speed 3) (safety 0) (space 0))
50 (fixnum start max) (simple-string string))
51 (do* ((i start (1+ i)))
54 (when (char= char (schar string i)) (return i))))
56 (defun position-not-char (char string start max)
57 (declare (optimize (speed 3) (safety 0) (space 0))
58 (fixnum start max) (simple-string string))
59 (do* ((i start (1+ i)))
62 (when (char/= char (schar string i)) (return i))))
64 (defun delimited-string-to-list (string &optional (separator #\space)
66 "split a string with delimiter"
67 (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))
69 (type character separator))
70 (do* ((len (length string))
73 (end (position-char separator string pos len)
74 (position-char separator string pos len)))
77 (push (subseq string pos) output)
78 (when (or (not skip-terminal) (zerop len))
81 (declare (type fixnum pos len)
82 (type (or null fixnum) end))
83 (push (subseq string pos end) output)
87 (defun list-to-delimited-string (list &optional (separator " "))
88 (format nil (concatenate 'string "~{~A~^" (string separator) "~}") list))
90 (defun string-invert (str)
91 "Invert case of a string"
92 (declare (optimize (speed 3) (compilation-speed 0) (debug 0) (safety 0))
94 (let ((up nil) (down nil))
96 (loop for char of-type character across str do
97 (cond ((upper-case-p char)
98 (if down (return-from skip str) (setf up t)))
100 (if up (return-from skip str) (setf down t)))))
101 (if up (string-downcase str) (string-upcase str)))))
103 (defun add-sql-quotes (s)
104 (substitute-string-for-char s #\' "''"))
106 (defun escape-backslashes (s)
107 (substitute-string-for-char s #\\ "\\\\"))
109 (defun substitute-string-for-char (procstr match-char subst-str)
110 "Substitutes a string for a single matching character of a string"
111 (substitute-chars-strings procstr (list (cons match-char subst-str))))
113 (defun string-substitute (string substring replacement-string)
114 "String substitute by Larry Hunter. Obtained from Google"
115 (let ((substring-length (length substring))
119 (search substring string)
120 (search substring string :start2 last-end)))
122 (concatenate 'string new-string (subseq string last-end)))
126 (subseq string last-end next-start)
128 (setq last-end (+ next-start substring-length)))))
130 (defun string-trim-last-character (s)
131 "Return the string less the last character"
132 (let ((len (length s)))
134 (subseq s 0 (1- len))
137 (defun nstring-trim-last-character (s)
138 "Return the string less the last character"
139 (let ((len (length s)))
141 (nsubseq s 0 (1- len))
144 (defun string-hash (str &optional (bitmask 65535))
146 (declare (fixnum hash)
148 (dotimes (i (length str))
150 (setq hash (+ hash (char-code (char str i)))))
151 (logand hash bitmask)))
153 (defun is-string-empty (str)
154 (zerop (length str)))
156 (defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed
158 #+lispworks #\No-Break-Space))
160 (defun is-char-whitespace (c)
161 (declare (character c) (optimize (speed 3) (safety 0)))
162 (or (char= c #\Space) (char= c #\Tab) (char= c #\Return)
164 #+allegro (char= c #\%space)
165 #+lispworks (char= c #\No-Break-Space)))
167 (defun is-string-whitespace (str)
168 "Return t if string is all whitespace"
169 (every #'is-char-whitespace str))
171 (defun string-right-trim-whitespace (str)
172 (string-right-trim *whitespace-chars* str))
174 (defun string-left-trim-whitespace (str)
175 (string-left-trim *whitespace-chars* str))
177 (defun string-trim-whitespace (str)
178 (string-trim *whitespace-chars* str))
180 (defun replaced-string-length (str repl-alist)
181 (declare (simple-string str)
182 (optimize (speed 3) (safety 0) (space 0)))
184 (orig-len (length str))
186 ((= i orig-len) new-len)
187 (declare (fixnum i orig-len new-len))
188 (let* ((c (char str i))
189 (match (assoc c repl-alist :test #'char=)))
190 (declare (character c))
192 (incf new-len (1- (length
193 (the simple-string (cdr match)))))))))
195 (defun substitute-chars-strings (str repl-alist)
196 "Replace all instances of a chars with a string. repl-alist is an assoc
197 list of characters and replacement strings."
198 (declare (simple-string str)
199 (optimize (speed 3) (safety 0) (space 0)))
200 (do* ((orig-len (length str))
201 (new-string (make-string (replaced-string-length str repl-alist)))
206 (declare (fixnum spos dpos) (simple-string new-string))
207 (let* ((c (char str spos))
208 (match (assoc c repl-alist :test #'char=)))
209 (declare (character c))
211 (let* ((subst (cdr match))
212 (len (length subst)))
213 (declare (fixnum len)
214 (simple-string subst))
217 (setf (char new-string dpos) (char subst j))
220 (setf (char new-string dpos) c)
223 (defun escape-xml-string (string)
224 "Escape invalid XML characters"
225 (substitute-chars-strings string '((#\& . "&") (#\< . "<"))))
227 (defun make-usb8-array (len)
228 (make-array len :element-type '(unsigned-byte 8)))
230 (defun usb8-array-to-string (vec &key (start 0) end)
231 (declare (type (simple-array (unsigned-byte 8) (*)) vec)
234 (setq end (length vec)))
235 (let* ((len (- end start))
236 (str (make-string len)))
237 (declare (fixnum len)
239 (optimize (speed 3) (safety 0)))
243 (setf (schar str i) (code-char (aref vec (the fixnum (+ i start))))))))
245 (defun string-to-usb8-array (str)
246 (declare (simple-string str))
247 (let* ((len (length str))
248 (vec (make-usb8-array len)))
249 (declare (fixnum len)
250 (type (simple-array (unsigned-byte 8) (*)) vec)
251 (optimize (speed 3)))
255 (setf (aref vec i) (char-code (schar str i))))))
257 (defun concat-separated-strings (separator &rest lists)
258 (format nil (concatenate 'string "~{~A~^" (string separator) "~}")
259 (append-sublists lists)))
261 (defun only-null-list-elements-p (lst)
262 (or (null lst) (every #'null lst)))
264 (defun print-separated-strings (strm separator &rest lists)
265 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0)
266 (compilation-speed 0)))
267 (do* ((rest-lists lists (cdr rest-lists))
268 (list (car rest-lists) (car rest-lists))
269 (last-list (only-null-list-elements-p (cdr rest-lists))
270 (only-null-list-elements-p (cdr rest-lists))))
271 ((null rest-lists) strm)
272 (do* ((lst list (cdr lst))
273 (elem (car lst) (car lst))
274 (last-elem (null (cdr lst)) (null (cdr lst))))
276 (write-string elem strm)
277 (unless (and last-elem last-list)
278 (write-string separator strm)))))
280 (eval-when (:compile-toplevel :load-toplevel :execute)
281 (defmacro def-prefixed-number-string (fn-name type &optional doc)
282 `(defun ,fn-name (num pchar len)
283 ,@(when (stringp doc) (list doc))
284 (declare (optimize (speed 3) (safety 0) (space 0))
289 (do* ((zero-code (char-code #\0))
290 (result (make-string len :initial-element #\0))
291 (minus? (minusp num))
292 (val (if minus? (- num) num)
293 (nth-value 0 (floor val 10)))
294 (pos (1- len) (1- pos))
295 (mod (mod val 10) (mod val 10)))
296 ((or (zerop val) (minusp pos))
298 (setf (schar result 0) pchar))
299 (when minus? (setf (schar result (if pchar 1 0)) #\-))
302 (fixnum mod zero-code pos)
304 (simple-string result))
305 (setf (schar result pos) (code-char (the fixnum (+ zero-code mod))))))))
307 (def-prefixed-number-string prefixed-fixnum-string fixnum
308 "Outputs a string of LEN digit with an optional initial character PCHAR.
309 Leading zeros are present. LEN must be a fixnum.")
311 (def-prefixed-number-string prefixed-integer-string integer
312 "Outputs a string of LEN digit with an optional initial character PCHAR.
313 Leading zeros are present. LEN must be an integer.")
315 (defun integer-string (num len)
316 "Outputs a string of LEN digit with an optional initial character PCHAR.
317 Leading zeros are present."
318 (declare (optimize (speed 3) (safety 0) (space 0))
321 (do* ((zero-code (char-code #\0))
322 (result (make-string len :initial-element #\0))
323 (minus? (minusp num))
324 (val (if minus? (- 0 num) num)
325 (nth-value 0 (floor val 10)))
326 (pos (1- len) (1- pos))
327 (mod (mod val 10) (mod val 10)))
328 ((or (zerop val) (minusp pos))
329 (when minus? (setf (schar result 0) #\-))
331 (declare (fixnum mod zero-code pos) (simple-string result) (integer val))
332 (setf (schar result pos) (code-char (+ zero-code mod)))))
334 (defun fast-string-search (substr str substr-length startpos endpos)
335 "Optimized search for a substring in a simple-string"
336 (declare (simple-string substr str)
337 (fixnum substr-length startpos endpos)
338 (optimize (speed 3) (space 0) (safety 0)))
339 (do* ((pos startpos (1+ pos))
340 (lastpos (- endpos substr-length)))
341 ((> pos lastpos) nil)
342 (declare (fixnum pos lastpos))
345 (return-from fast-string-search pos))
347 (unless (char= (schar str (+ i pos)) (schar substr i))
350 (defun string-delimited-string-to-list (str substr)
351 "splits a string delimited by substr into a list of strings"
352 (declare (simple-string str substr)
353 (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)
355 (do* ((substr-len (length substr))
356 (strlen (length str))
359 (end (fast-string-search substr str substr-len pos strlen)
360 (fast-string-search substr str substr-len pos strlen)))
363 (push (subseq str pos) output))
365 (declare (fixnum strlen substr-len pos)
366 (type (or fixnum null) end))
367 (push (subseq str pos end) output)
368 (setq pos (+ end substr-len))))
370 (defun string-to-list-skip-delimiter (str &optional (delim #\space))
371 "Return a list of strings, delimited by spaces, skipping spaces."
372 (declare (simple-string str)
373 (optimize (speed 0) (space 0) (safety 0)))
376 (i (position-not-char delim str 0 end)
377 (position-not-char delim str j end))
378 (j (when i (position-char delim str i end))
379 (when i (position-char delim str i end))))
380 ((or (null i) (null j))
381 (when (and i (< i end))
382 (push (subseq str i end) results))
384 (declare (fixnum end)
385 (type (or fixnum null) i j))
386 (push (subseq str i j) results)))
388 (defun string-starts-with (start str)
389 (and (>= (length str) (length start))
390 (string-equal start str :end2 (length start))))
392 (defun count-string-char (s c)
393 "Return a count of the number of times a character appears in a string"
394 (declare (simple-string s)
396 (optimize (speed 3) (safety 0)))
397 (do ((len (length s))
401 (declare (fixnum i len count))
402 (when (char= (schar s i) c)
405 (defun count-string-char-if (pred s)
406 "Return a count of the number of times a predicate is true
407 for characters in a string"
408 (declare (simple-string s)
409 (type (or function symbol) pred)
410 (optimize (speed 3) (safety 0) (space 0)))
411 (do ((len (length s))
415 (declare (fixnum i len count))
416 (when (funcall pred (schar s i))
422 (defun non-alphanumericp (ch)
423 (not (alphanumericp ch)))
425 (defvar +hex-chars+ "0123456789ABCDEF")
426 (declaim (type simple-string +hex-chars+))
429 (declare (type (integer 0 15) n))
430 (schar +hex-chars+ n))
432 (defconstant* +char-code-lower-a+ (char-code #\a))
433 (defconstant* +char-code-upper-a+ (char-code #\A))
434 (defconstant* +char-code-0+ (char-code #\0))
435 (declaim (type fixnum +char-code-0+ +char-code-upper-a+
439 "convert hex character to decimal"
440 (let ((code (char-code (char-upcase ch))))
441 (declare (fixnum ch))
442 (if (>= code +char-code-upper-a+)
443 (+ 10 (- code +char-code-upper-a+))
444 (- code +char-code-0+))))
446 (defun binary-sequence-to-hex-string (seq)
447 (let ((list (etypecase seq
449 (sequence (map 'list #'identity seq)))))
450 (string-downcase (format nil "~{~2,'0X~}" list))))
452 (defun encode-uri-string (query)
453 "Escape non-alphanumeric characters for URI fields"
454 (declare (simple-string query)
455 (optimize (speed 3) (safety 0) (space 0)))
456 (do* ((count (count-string-char-if #'non-alphanumericp query))
458 (new-len (+ len (* 2 count)))
459 (str (make-string new-len))
463 (declare (fixnum count len new-len spos dpos)
465 (let ((ch (schar query spos)))
466 (if (non-alphanumericp ch)
467 (let ((c (char-code ch)))
468 (setf (schar str dpos) #\%)
470 (setf (schar str dpos) (hexchar (logand (ash c -4) 15)))
472 (setf (schar str dpos) (hexchar (logand c 15))))
473 (setf (schar str dpos) ch)))))
475 (defun decode-uri-string (query)
476 "Unescape non-alphanumeric characters for URI fields"
477 (declare (simple-string query)
478 (optimize (speed 3) (safety 0) (space 0)))
479 (do* ((count (count-string-char query #\%))
481 (new-len (- len (* 2 count)))
482 (str (make-string new-len))
486 (declare (fixnum count len new-len spos dpos)
488 (let ((ch (schar query spos)))
490 (let ((c1 (charhex (schar query (1+ spos))))
491 (c2 (charhex (schar query (+ spos 2)))))
492 (declare (fixnum c1 c2))
493 (setf (schar str dpos)
494 (code-char (logior c2 (ash c1 4))))
496 (setf (schar str dpos) ch)))))
499 (defun uri-query-to-alist (query)
500 "Converts non-decoded URI query to an alist of settings"
501 (mapcar (lambda (set)
502 (let ((lst (kmrcl:delimited-string-to-list set #\=)))
503 (cons (first lst) (second lst))))
504 (kmrcl:delimited-string-to-list
505 (kmrcl:decode-uri-string query) #\&)))
507 (eval-when (:compile-toplevel :load-toplevel :execute)
508 (defvar +unambiguous-charset+
509 "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ")
510 (defconstant* +unambiguous-length+ (length +unambiguous-charset+)))
512 (defun random-char (&optional (set :lower-alpha))
515 (code-char (+ +char-code-lower-a+ (random 26))))
517 (let ((n (random 36)))
519 (code-char (+ +char-code-0+ (- n 26)))
520 (code-char (+ +char-code-lower-a+ n)))))
522 (code-char (+ +char-code-upper-a+ (random 26))))
524 (schar +unambiguous-charset+ (random +unambiguous-length+)))
526 (let ((n (random 52)))
528 (code-char (+ +char-code-upper-a+ (- n 26)))
529 (code-char (+ +char-code-lower-a+ n)))))))
532 (defun random-string (&key (length 10) (set :lower-alpha))
533 "Returns a random lower-case string."
534 (declare (optimize (speed 3)))
535 (let ((s (make-string length)))
536 (declare (simple-string s))
537 (dotimes (i length s)
538 (setf (schar s i) (random-char set)))))
541 (defun first-char (s)
542 (declare (simple-string s))
543 (when (and (stringp s) (plusp (length s)))
547 (declare (simple-string s))
549 (let ((len (length s)))
551 (schar s (1- len)))))
553 (defun ensure-string (v)
556 (character (string v))
557 (symbol (symbol-name v))
558 (otherwise (write-to-string v))))
560 (defun string-right-trim-one-char (char str)
561 (declare (simple-string str))
562 (let* ((len (length str))
564 (declare (fixnum len last))
565 (if (char= char (schar str last))
570 (defun string-strip-ending (str endings)
571 (if (stringp endings)
572 (setq endings (list endings)))
573 (let ((len (length str)))
574 (dolist (ending endings str)
575 (when (and (>= len (length ending))
579 (return-from string-strip-ending
580 (subseq str 0 (- len (length ending))))))))
583 (defun string-maybe-shorten (str maxlen)
584 (string-elide str maxlen :end))
586 (defun string-elide (str maxlen position)
587 (declare (fixnum maxlen))
588 (let ((len (length str)))
589 (declare (fixnum len))
595 ((eq position :middle)
596 (multiple-value-bind (mid remain) (truncate maxlen 2)
597 (let ((end1 (- mid 1))
598 (start2 (- len (- mid 2) remain)))
599 (concatenate 'string (subseq str 0 end1) "..." (subseq str start2)))))
600 ((or (eq position :end) t)
601 (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))))
603 (defun shrink-vector (str size)
605 (excl::.primcall 'sys::shrink-svector str size)
607 (lisp::shrink-vector str size)
609 (system::shrink-vector$vector str size)
611 (sb-kernel:shrink-vector str size)
613 (common-lisp::shrink-vector str size)
614 #-(or allegro cmu lispworks sbcl scl)
615 (setq str (subseq str 0 size))
618 (defun lex-string (string &key (whitespace '(#\space #\newline)))
619 "Separates a string at whitespace and returns a list of strings"
620 (flet ((is-sep (char) (member char whitespace :test #'char=)))
623 (position-if-not #'is-sep string)
625 (position-if-not #'is-sep string :start (1+ token-end))))
628 (position-if #'is-sep string :start token-start))
630 (position-if #'is-sep string :start token-start))))
631 ((null token-start) (nreverse tokens))
632 (push (subseq string token-start token-end) tokens)))))
634 (defun split-alphanumeric-string (string)
635 "Separates a string at any non-alphanumeric chararacter"
636 (declare (simple-string string)
637 (optimize (speed 3) (safety 0)))
638 (flet ((is-sep (char)
639 (declare (character char))
640 (and (non-alphanumericp char)
641 (not (char= #\_ char)))))
644 (position-if-not #'is-sep string)
646 (position-if-not #'is-sep string :start (1+ token-end))))
649 (position-if #'is-sep string :start token-start))
651 (position-if #'is-sep string :start token-start))))
652 ((null token-start) (nreverse tokens))
653 (push (subseq string token-start token-end) tokens)))))
656 (defun trim-non-alphanumeric (word)
657 "Strip non-alphanumeric characters from beginning and end of a word."
658 (declare (simple-string word)
659 (optimize (speed 3) (safety 0) (space 0)))
663 (declare (fixnum start end len))
665 ((or done (= start end)))
666 (if (alphanumericp (schar word start))
671 ((or done (= start end)))
672 (if (alphanumericp (schar word (1- end)))
675 (if (or (plusp start) (/= len end))
676 (subseq word start end)
680 (defun collapse-whitespace (s)
681 "Convert multiple whitespace characters to a single space character."
682 (declare (simple-string s)
683 (optimize (speed 3) (safety 0)))
684 (with-output-to-string (stream)
685 (do ((pos 0 (1+ pos))
689 (declare (fixnum pos len))
690 (let ((c (schar s pos)))
691 (declare (character c))
693 ((kl:is-char-whitespace c)
695 (write-char #\space stream))
699 (write-char c stream)))))))
701 (defun string->list (string)
702 (let ((eof (list nil)))
703 (with-input-from-string (stream string)
704 (do ((x (read stream nil eof) (read stream nil eof))
706 ((eq x eof) (nreverse l))))))