1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: strings.lisp
6 ;;;; Purpose: Strings utility functions for KMRCL package
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
12 ;;;; This file, part of KMRCL, is Copyright (c) 2002-2006 by Kevin M. Rosenberg
14 ;;;; KMRCL users are granted the rights to distribute and use this software
15 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
16 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
17 ;;;; *************************************************************************
24 (defmacro string-append (outputstr &rest args)
25 `(setq ,outputstr (concatenate 'string ,outputstr ,@args)))
27 (defun list-to-string (lst)
28 "Converts a list to a string, doesn't include any delimiters between elements"
29 (format nil "~{~A~}" lst))
31 (defun count-string-words (str)
32 (declare (simple-string str)
33 (optimize (speed 3) (safety 0) (space 0)))
36 (declare (fixnum n-words))
37 (do* ((len (length str))
41 (if (alphanumericp (schar str i))
45 (setq in-word nil)))))
47 (defun position-char (char string start max)
48 (declare (optimize (speed 3) (safety 0) (space 0))
49 (fixnum start max) (simple-string string))
50 (do* ((i start (1+ i)))
53 (when (char= char (schar string i)) (return i))))
55 (defun position-not-char (char string start max)
56 (declare (optimize (speed 3) (safety 0) (space 0))
57 (fixnum start max) (simple-string string))
58 (do* ((i start (1+ i)))
61 (when (char/= char (schar string i)) (return i))))
63 (defun delimited-string-to-list (string &optional (separator #\space)
65 "split a string with delimiter"
66 (declare (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0))
68 (type character separator))
69 (do* ((len (length string))
72 (end (position-char separator string pos len)
73 (position-char separator string pos len)))
76 (push (subseq string pos) output)
77 (when (or (not skip-terminal) (zerop len))
80 (declare (type fixnum pos len)
81 (type (or null fixnum) end))
82 (push (subseq string pos end) output)
86 (defun list-to-delimited-string (list &optional (separator " "))
87 (format nil (concatenate 'string "~{~A~^" (string separator) "~}") list))
89 (defun string-invert (str)
90 "Invert case of a string"
91 (declare (optimize (speed 3) (compilation-speed 0) (debug 0) (safety 0))
93 (let ((up nil) (down nil))
95 (loop for char of-type character across str do
96 (cond ((upper-case-p char)
97 (if down (return-from skip str) (setf up t)))
99 (if up (return-from skip str) (setf down t)))))
100 (if up (string-downcase str) (string-upcase str)))))
102 (defun add-sql-quotes (s)
103 (substitute-string-for-char s #\' "''"))
105 (defun escape-backslashes (s)
106 (substitute-string-for-char s #\\ "\\\\"))
108 (defun substitute-string-for-char (procstr match-char subst-str)
109 "Substitutes a string for a single matching character of a string"
110 (substitute-chars-strings procstr (list (cons match-char subst-str))))
112 (defun string-substitute (string substring replacement-string)
113 "String substitute by Larry Hunter. Obtained from Google"
114 (let ((substring-length (length substring))
118 (search substring string)
119 (search substring string :start2 last-end)))
121 (concatenate 'string new-string (subseq string last-end)))
125 (subseq string last-end next-start)
127 (setq last-end (+ next-start substring-length)))))
129 (defun string-trim-last-character (s)
130 "Return the string less the last character"
131 (let ((len (length s)))
133 (subseq s 0 (1- len))
136 (defun nstring-trim-last-character (s)
137 "Return the string less the last character"
138 (let ((len (length s)))
140 (nsubseq s 0 (1- len))
143 (defun string-hash (str &optional (bitmask 65535))
145 (declare (fixnum hash)
147 (dotimes (i (length str))
149 (setq hash (+ hash (char-code (char str i)))))
150 (logand hash bitmask)))
152 (defun is-string-empty (str)
153 (zerop (length str)))
155 (defvar *whitespace-chars* '(#\space #\tab #\return #\linefeed
157 #+lispworks #\No-Break-Space))
159 (defun is-char-whitespace (c)
160 (declare (character c) (optimize (speed 3) (safety 0)))
161 (or (char= c #\Space) (char= c #\Tab) (char= c #\Return)
163 #+allegro (char= c #\%space)
164 #+lispworks (char= c #\No-Break-Space)))
166 (defun is-string-whitespace (str)
167 "Return t if string is all whitespace"
168 (every #'is-char-whitespace str))
170 (defun string-right-trim-whitespace (str)
171 (string-right-trim *whitespace-chars* str))
173 (defun string-left-trim-whitespace (str)
174 (string-left-trim *whitespace-chars* str))
176 (defun string-trim-whitespace (str)
177 (string-trim *whitespace-chars* str))
179 (defun replaced-string-length (str repl-alist)
180 (declare (simple-string str)
181 (optimize (speed 3) (safety 0) (space 0)))
183 (orig-len (length str))
185 ((= i orig-len) new-len)
186 (declare (fixnum i orig-len new-len))
187 (let* ((c (char str i))
188 (match (assoc c repl-alist :test #'char=)))
189 (declare (character c))
191 (incf new-len (1- (length
192 (the simple-string (cdr match)))))))))
194 (defun substitute-chars-strings (str repl-alist)
195 "Replace all instances of a chars with a string. repl-alist is an assoc
196 list of characters and replacement strings."
197 (declare (simple-string str)
198 (optimize (speed 3) (safety 0) (space 0)))
199 (do* ((orig-len (length str))
200 (new-string (make-string (replaced-string-length str repl-alist)))
205 (declare (fixnum spos dpos) (simple-string new-string))
206 (let* ((c (char str spos))
207 (match (assoc c repl-alist :test #'char=)))
208 (declare (character c))
210 (let* ((subst (cdr match))
211 (len (length subst)))
212 (declare (fixnum len)
213 (simple-string subst))
216 (setf (char new-string dpos) (char subst j))
219 (setf (char new-string dpos) c)
222 (defun escape-xml-string (string)
223 "Escape invalid XML characters"
224 (substitute-chars-strings string '((#\& . "&") (#\< . "<"))))
226 (defun make-usb8-array (len)
227 (make-array len :element-type '(unsigned-byte 8)))
229 (defun usb8-array-to-string (vec &key (start 0) end)
230 (declare (type (simple-array (unsigned-byte 8) (*)) vec)
233 (setq end (length vec)))
234 (let* ((len (- end start))
235 (str (make-string len)))
236 (declare (fixnum len)
238 (optimize (speed 3) (safety 0)))
242 (setf (schar str i) (code-char (aref vec (the fixnum (+ i start))))))))
244 (defun string-to-usb8-array (str)
245 (declare (simple-string str))
246 (let* ((len (length str))
247 (vec (make-usb8-array len)))
248 (declare (fixnum len)
249 (type (simple-array (unsigned-byte 8) (*)) vec)
250 (optimize (speed 3)))
254 (setf (aref vec i) (char-code (schar str i))))))
256 (defun concat-separated-strings (separator &rest lists)
257 (format nil (concatenate 'string "~{~A~^" (string separator) "~}")
258 (append-sublists lists)))
260 (defun only-null-list-elements-p (lst)
261 (or (null lst) (every #'null lst)))
263 (defun print-separated-strings (strm separator &rest lists)
264 (declare (optimize (speed 3) (safety 0) (space 0) (debug 0)
265 (compilation-speed 0)))
266 (do* ((rest-lists lists (cdr rest-lists))
267 (list (car rest-lists) (car rest-lists))
268 (last-list (only-null-list-elements-p (cdr rest-lists))
269 (only-null-list-elements-p (cdr rest-lists))))
270 ((null rest-lists) strm)
271 (do* ((lst list (cdr lst))
272 (elem (car lst) (car lst))
273 (last-elem (null (cdr lst)) (null (cdr lst))))
275 (write-string elem strm)
276 (unless (and last-elem last-list)
277 (write-string separator strm)))))
279 (eval-when (:compile-toplevel :load-toplevel :execute)
280 (defmacro def-prefixed-number-string (fn-name type &optional doc)
281 `(defun ,fn-name (num pchar len)
282 ,@(when (stringp doc) (list doc))
283 (declare (optimize (speed 3) (safety 0) (space 0))
288 (do* ((zero-code (char-code #\0))
289 (result (make-string len :initial-element #\0))
290 (minus? (minusp num))
291 (val (if minus? (- num) num)
292 (nth-value 0 (floor val 10)))
293 (pos (1- len) (1- pos))
294 (mod (mod val 10) (mod val 10)))
295 ((or (zerop val) (minusp pos))
297 (setf (schar result 0) pchar))
298 (when minus? (setf (schar result (if pchar 1 0)) #\-))
301 (fixnum mod zero-code pos)
303 (simple-string result))
304 (setf (schar result pos) (code-char (the fixnum (+ zero-code mod))))))))
306 (def-prefixed-number-string prefixed-fixnum-string fixnum
307 "Outputs a string of LEN digit with an optional initial character PCHAR.
308 Leading zeros are present. LEN must be a fixnum.")
310 (def-prefixed-number-string prefixed-integer-string integer
311 "Outputs a string of LEN digit with an optional initial character PCHAR.
312 Leading zeros are present. LEN must be an integer.")
314 (defun integer-string (num len)
315 "Outputs a string of LEN digit with an optional initial character PCHAR.
316 Leading zeros are present."
317 (declare (optimize (speed 3) (safety 0) (space 0))
320 (do* ((zero-code (char-code #\0))
321 (result (make-string len :initial-element #\0))
322 (minus? (minusp num))
323 (val (if minus? (- 0 num) num)
324 (nth-value 0 (floor val 10)))
325 (pos (1- len) (1- pos))
326 (mod (mod val 10) (mod val 10)))
327 ((or (zerop val) (minusp pos))
328 (when minus? (setf (schar result 0) #\-))
330 (declare (fixnum mod zero-code pos) (simple-string result) (integer val))
331 (setf (schar result pos) (code-char (+ zero-code mod)))))
333 (defun fast-string-search (substr str substr-length startpos endpos)
334 "Optimized search for a substring in a simple-string"
335 (declare (simple-string substr str)
336 (fixnum substr-length startpos endpos)
337 (optimize (speed 3) (space 0) (safety 0)))
338 (do* ((pos startpos (1+ pos))
339 (lastpos (- endpos substr-length)))
340 ((> pos lastpos) nil)
341 (declare (fixnum pos lastpos))
344 (return-from fast-string-search pos))
346 (unless (char= (schar str (+ i pos)) (schar substr i))
349 (defun string-delimited-string-to-list (str substr)
350 "splits a string delimited by substr into a list of strings"
351 (declare (simple-string str substr)
352 (optimize (speed 3) (safety 0) (space 0) (compilation-speed 0)
354 (do* ((substr-len (length substr))
355 (strlen (length str))
358 (end (fast-string-search substr str substr-len pos strlen)
359 (fast-string-search substr str substr-len pos strlen)))
362 (push (subseq str pos) output))
364 (declare (fixnum strlen substr-len pos)
365 (type (or fixnum null) end))
366 (push (subseq str pos end) output)
367 (setq pos (+ end substr-len))))
369 (defun string-to-list-skip-delimiter (str &optional (delim #\space))
370 "Return a list of strings, delimited by spaces, skipping spaces."
371 (declare (simple-string str)
372 (optimize (speed 0) (space 0) (safety 0)))
375 (i (position-not-char delim str 0 end)
376 (position-not-char delim str j end))
377 (j (when i (position-char delim str i end))
378 (when i (position-char delim str i end))))
379 ((or (null i) (null j))
380 (when (and i (< i end))
381 (push (subseq str i end) results))
383 (declare (fixnum end)
384 (type (or fixnum null) i j))
385 (push (subseq str i j) results)))
387 (defun string-starts-with (start str)
388 (and (>= (length str) (length start))
389 (string-equal start str :end2 (length start))))
391 (defun count-string-char (s c)
392 "Return a count of the number of times a character appears in a string"
393 (declare (simple-string s)
395 (optimize (speed 3) (safety 0)))
396 (do ((len (length s))
400 (declare (fixnum i len count))
401 (when (char= (schar s i) c)
404 (defun count-string-char-if (pred s)
405 "Return a count of the number of times a predicate is true
406 for characters in a string"
407 (declare (simple-string s)
408 (type (or function symbol) pred)
409 (optimize (speed 3) (safety 0) (space 0)))
410 (do ((len (length s))
414 (declare (fixnum i len count))
415 (when (funcall pred (schar s i))
421 (defun non-alphanumericp (ch)
422 (not (alphanumericp ch)))
424 (defvar +hex-chars+ "0123456789ABCDEF")
425 (declaim (type simple-string +hex-chars+))
428 (declare (type (integer 0 15) n))
429 (schar +hex-chars+ n))
431 (defconstant* +char-code-lower-a+ (char-code #\a))
432 (defconstant* +char-code-upper-a+ (char-code #\A))
433 (defconstant* +char-code-0+ (char-code #\0))
434 (declaim (type fixnum +char-code-0+ +char-code-upper-a+
438 "convert hex character to decimal"
439 (let ((code (char-code (char-upcase ch))))
440 (declare (fixnum ch))
441 (if (>= code +char-code-upper-a+)
442 (+ 10 (- code +char-code-upper-a+))
443 (- code +char-code-0+))))
445 (defun binary-sequence-to-hex-string (seq)
446 (let ((list (etypecase seq
448 (sequence (map 'list #'identity seq)))))
449 (string-downcase (format nil "~{~2,'0X~}" list))))
451 (defun encode-uri-string (query)
452 "Escape non-alphanumeric characters for URI fields"
453 (declare (simple-string query)
454 (optimize (speed 3) (safety 0) (space 0)))
455 (do* ((count (count-string-char-if #'non-alphanumericp query))
457 (new-len (+ len (* 2 count)))
458 (str (make-string new-len))
462 (declare (fixnum count len new-len spos dpos)
464 (let ((ch (schar query spos)))
465 (if (non-alphanumericp ch)
466 (let ((c (char-code ch)))
467 (setf (schar str dpos) #\%)
469 (setf (schar str dpos) (hexchar (logand (ash c -4) 15)))
471 (setf (schar str dpos) (hexchar (logand c 15))))
472 (setf (schar str dpos) ch)))))
474 (defun decode-uri-string (query)
475 "Unescape non-alphanumeric characters for URI fields"
476 (declare (simple-string query)
477 (optimize (speed 3) (safety 0) (space 0)))
478 (do* ((count (count-string-char query #\%))
480 (new-len (- len (* 2 count)))
481 (str (make-string new-len))
485 (declare (fixnum count len new-len spos dpos)
487 (let ((ch (schar query spos)))
489 (let ((c1 (charhex (schar query (1+ spos))))
490 (c2 (charhex (schar query (+ spos 2)))))
491 (declare (fixnum c1 c2))
492 (setf (schar str dpos)
493 (code-char (logior c2 (ash c1 4))))
495 (setf (schar str dpos) ch)))))
498 (defun uri-query-to-alist (query)
499 "Converts non-decoded URI query to an alist of settings"
500 (mapcar (lambda (set)
501 (let ((lst (kmrcl:delimited-string-to-list set #\=)))
502 (cons (first lst) (second lst))))
503 (kmrcl:delimited-string-to-list
504 (kmrcl:decode-uri-string query) #\&)))
506 (eval-when (:compile-toplevel :load-toplevel :execute)
507 (defvar +unambiguous-charset+
508 "abcdefghjkmnpqrstuvwxyz123456789ABCDEFGHJKLMNPQSRTUVWXYZ")
509 (defconstant* +unambiguous-length+ (length +unambiguous-charset+)))
511 (defun random-char (&optional (set :lower-alpha))
514 (code-char (+ +char-code-lower-a+ (random 26))))
516 (let ((n (random 36)))
518 (code-char (+ +char-code-0+ (- n 26)))
519 (code-char (+ +char-code-lower-a+ n)))))
521 (code-char (+ +char-code-upper-a+ (random 26))))
523 (schar +unambiguous-charset+ (random +unambiguous-length+)))
525 (let ((n (random 52)))
527 (code-char (+ +char-code-upper-a+ (- n 26)))
528 (code-char (+ +char-code-lower-a+ n)))))))
531 (defun random-string (&key (length 10) (set :lower-alpha))
532 "Returns a random lower-case string."
533 (declare (optimize (speed 3)))
534 (let ((s (make-string length)))
535 (declare (simple-string s))
536 (dotimes (i length s)
537 (setf (schar s i) (random-char set)))))
540 (defun first-char (s)
541 (declare (simple-string s))
542 (when (and (stringp s) (plusp (length s)))
546 (declare (simple-string s))
548 (let ((len (length s)))
550 (schar s (1- len)))))
552 (defun ensure-string (v)
555 (character (string v))
556 (symbol (symbol-name v))
557 (otherwise (write-to-string v))))
559 (defun string-right-trim-one-char (char str)
560 (declare (simple-string str))
561 (let* ((len (length str))
563 (declare (fixnum len last))
564 (if (char= char (schar str last))
569 (defun remove-char-string (char str)
570 (declare (character char)
572 (do* ((len (length str))
573 (out (make-string len))
576 ((= pos len) (subseq out 0 opos))
577 (declare (fixnum pos opos len)
579 (let ((c (char str pos)))
580 (declare (character c))
581 (when (char/= c char)
582 (setf (schar out opos) c)
586 (defun string-strip-ending (str endings)
587 (if (stringp endings)
588 (setq endings (list endings)))
589 (let ((len (length str)))
590 (dolist (ending endings str)
591 (when (and (>= len (length ending))
595 (return-from string-strip-ending
596 (subseq str 0 (- len (length ending))))))))
599 (defun string-maybe-shorten (str maxlen)
600 (string-elide str maxlen :end))
602 (defun string-elide (str maxlen position)
603 (declare (fixnum maxlen))
604 (let ((len (length str)))
605 (declare (fixnum len))
611 ((eq position :middle)
612 (multiple-value-bind (mid remain) (truncate maxlen 2)
613 (let ((end1 (- mid 1))
614 (start2 (- len (- mid 2) remain)))
615 (concatenate 'string (subseq str 0 end1) "..." (subseq str start2)))))
616 ((or (eq position :end) t)
617 (concatenate 'string (subseq str 0 (- maxlen 3)) "...")))))
619 (defun shrink-vector (str size)
621 (excl::.primcall 'sys::shrink-svector str size)
623 (lisp::shrink-vector str size)
625 (system::shrink-vector$vector str size)
627 (sb-kernel:shrink-vector str size)
629 (common-lisp::shrink-vector str size)
630 #-(or allegro cmu lispworks sbcl scl)
631 (setq str (subseq str 0 size))
634 (defun lex-string (string &key (whitespace '(#\space #\newline)))
635 "Separates a string at whitespace and returns a list of strings"
636 (flet ((is-sep (char) (member char whitespace :test #'char=)))
639 (position-if-not #'is-sep string)
641 (position-if-not #'is-sep string :start (1+ token-end))))
644 (position-if #'is-sep string :start token-start))
646 (position-if #'is-sep string :start token-start))))
647 ((null token-start) (nreverse tokens))
648 (push (subseq string token-start token-end) tokens)))))
650 (defun split-alphanumeric-string (string)
651 "Separates a string at any non-alphanumeric chararacter"
652 (declare (simple-string string)
653 (optimize (speed 3) (safety 0)))
654 (flet ((is-sep (char)
655 (declare (character char))
656 (and (non-alphanumericp char)
657 (not (char= #\_ char)))))
660 (position-if-not #'is-sep string)
662 (position-if-not #'is-sep string :start (1+ token-end))))
665 (position-if #'is-sep string :start token-start))
667 (position-if #'is-sep string :start token-start))))
668 ((null token-start) (nreverse tokens))
669 (push (subseq string token-start token-end) tokens)))))
672 (defun trim-non-alphanumeric (word)
673 "Strip non-alphanumeric characters from beginning and end of a word."
674 (declare (simple-string word)
675 (optimize (speed 3) (safety 0) (space 0)))
679 (declare (fixnum start end len))
681 ((or done (= start end)))
682 (if (alphanumericp (schar word start))
687 ((or done (= start end)))
688 (if (alphanumericp (schar word (1- end)))
691 (if (or (plusp start) (/= len end))
692 (subseq word start end)
696 (defun collapse-whitespace (s)
697 "Convert multiple whitespace characters to a single space character."
698 (declare (simple-string s)
699 (optimize (speed 3) (safety 0)))
700 (with-output-to-string (stream)
701 (do ((pos 0 (1+ pos))
705 (declare (fixnum pos len))
706 (let ((c (schar s pos)))
707 (declare (character c))
709 ((kl:is-char-whitespace c)
711 (write-char #\space stream))
715 (write-char c stream)))))))
717 (defun string->list (string)
718 (let ((eof (list nil)))
719 (with-input-from-string (stream string)
720 (do ((x (read stream nil eof) (read stream nil eof))
722 ((eq x eof) (nreverse l))))))