;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: strings.lisp,v 1.35 2003/06/06 21:59:30 kevin Exp $
+;;;; $Id: strings.lisp,v 1.36 2003/06/07 05:45:14 kevin Exp $
;;;;
;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
(declare (fixnum i))
(when (char= char (schar string i)) (return i))))
+(defun position-not-char (char string start max)
+ (declare (optimize (speed 3) (safety 0) (space 0))
+ (fixnum start max) (simple-string string))
+ (do* ((i start (1+ i)))
+ ((= i max) nil)
+ (declare (fixnum i))
+ (when (char/= char (schar string i)) (return i))))
+
(defun delimited-string-to-list (string &optional (separator #\space)
skip-terminal)
"split a string with delimiter"
(pos (1- len) (1- pos))
(mod (mod val 10) (mod val 10)))
((or (zerop val) (minusp pos))
- (when minus? (setf (schar result (if pchar 1 0)) #\-))
+ (when minus? (setf (schar result 0) #\-))
result)
(declare (fixnum mod zero-code pos) (simple-string result) (integer val))
(setf (schar result pos) (code-char (+ zero-code mod)))))
+
+(defun fast-string-search (substr str substr-length startpos endpos)
+ "Optimized search for a substring in a simple-string"
+ (declare (simple-string substr str)
+ (fixnum substr-length startpos endpos)
+ (optimize (speed 3) (space 0) (safety 0)))
+ (do* ((pos startpos (1+ pos))
+ (lastpos (- endpos substr-length)))
+ ((> pos lastpos) nil)
+ (declare (fixnum pos lastpos))
+ (do ((i 0 (1+ i)))
+ ((= i substr-length)
+ (return-from fast-string-search pos))
+ (declare (fixnum i))
+ (unless (char= (schar str (+ i pos)) (schar substr i))
+ (return nil)))))
+
+(defun string-to-list-skip-delimiter (str &optional (delim #\space))
+ "Return a list of strings, delimited by spaces, skipping spaces."
+ (declare (simple-string str)
+ (optimize (speed 0) (space 0) (safety 0)))
+ (do* ((results '())
+ (end (length str))
+ (i (position-not-char delim str 0 end)
+ (position-not-char delim str j end))
+ (j (when i (position-char delim str i end))
+ (when i (position-char delim str i end))))
+ ((or (null i) (null j))
+ (when (and i (< i end))
+ (push (subseq str i end) results))
+ (nreverse results))
+ (declare (fixnum i j end))
+ (push (subseq str i j) results)))
;;;; Author: Kevin M. Rosenberg
;;;; Date Started: Apr 2003
;;;;
-;;;; $Id: tests.lisp,v 1.16 2003/06/07 03:51:42 kevin Exp $
+;;;; $Id: tests.lisp,v 1.17 2003/06/07 05:45:14 kevin Exp $
;;;;
;;;; This file is Copyright (c) 2000-2002 by Kevin M. Rosenberg
;;;;
(deftest pxml.1
(xml-tag-contents "tag1" "<tag>Test</tag>")
- nil nil)
+ nil nil nil)
(deftest pxml.1o
(kmrcl::xml-tag-contents-old "tag1" "<tag>Test</tag>")
nil nil)
-(deftest fss.1
- (kmrcl::fast-string-search "" "" 0 0 0) 0)
-
-(deftest fss.2
- (kmrcl::fast-string-search "" "abc" 0 0 2) 0)
-
-(deftest fss.3
- (kmrcl::fast-string-search "abc" "" 3 0 0) nil)
-
-(deftest fss.4
- (kmrcl::fast-string-search "abc" "abcde" 3 0 4) 0)
-
-(deftest fss.5
- (kmrcl::fast-string-search "abc" "012abcde" 3 0 7) 3)
-
-(deftest fss.6
- (kmrcl::fast-string-search "abc" "012abcde" 3 0 7) 3)
-
-(deftest fss.7
- (kmrcl::fast-string-search "abc" "012abcde" 3 3 7) 3)
-
-(deftest fss.8
- (kmrcl::fast-string-search "abc" "012abcde" 3 4 7) nil)
-
-(deftest fss.9
- (kmrcl::fast-string-search "abcde" "012abcde" 5 3 8) 3)
-
-(deftest fss.10
- (kmrcl::fast-string-search "abcde" "012abcde" 5 3 7) nil)
+(deftest pxml.2
+ (xml-tag-contents "tag" "<tag>Test</tag>")
+ "Test" 15 nil)
+
+(deftest pxml.3
+ (xml-tag-contents "tag" "<tag >Test</tag>")
+ "Test" 17 nil)
+
+(deftest pxml.4
+ (xml-tag-contents "tag" "<tag a=\"b\"></tag>")
+ "" 17 ("a=\"b\""))
+
+(deftest pxml.5
+ (xml-tag-contents "tag" "<tag a=\"b\" >Test</tag>")
+ "Test" 22 ("a=\"b\""))
+
+(deftest pxml.6
+ (xml-tag-contents "tag" "<tag a=\"b\" c=\"ab\">Test</tag>")
+ "Test" 29 ("a=\"b\"" "c=\"ab\""))
+
+(deftest pxml.7
+ (xml-tag-contents "tag" "<taga a=\"b\" c=\"ab\">Test</taga>")
+ nil nil nil)
+
+(deftest pxml.8
+ (xml-tag-contents "tag" "<taga a=\"b\" c=\"ab\">Test<tag>ab</tag></taga>")
+ "ab" 37 nil)
+
+(deftest pxml.9
+ (xml-tag-contents "tag" "<taga a=\"b\" c=\"ab\">Test<tag>ab</ag></taga>")
+ nil nil nil)
+
+(deftest fss.1 (fast-string-search "" "" 0 0 0) 0)
+(deftest fss.2 (fast-string-search "" "abc" 0 0 2) 0)
+(deftest fss.3 (fast-string-search "abc" "" 3 0 0) nil)
+(deftest fss.4 (fast-string-search "abc" "abcde" 3 0 4) 0)
+(deftest fss.5 (fast-string-search "abc" "012abcde" 3 0 7) 3)
+(deftest fss.6 (fast-string-search "abc" "012abcde" 3 0 7) 3)
+(deftest fss.7 (fast-string-search "abc" "012abcde" 3 3 7) 3)
+(deftest fss.8 (fast-string-search "abc" "012abcde" 3 4 7) nil)
+(deftest fss.9 (fast-string-search "abcde" "012abcde" 5 3 8) 3)
+(deftest fss.9b (cl:search "abcde" "012abcde" :start2 3 :end2 8) 3)
+(deftest fss.10 (fast-string-search "abcde" "012abcde" 5 3 7) nil)
+(deftest fss.10b (cl:search "abcde" "012abcde" :start2 3 :end2 7) nil)
+
+(deftest stlsd.1 (string-to-list-skip-delimiter "") ())
+(deftest stlsd.2 (string-to-list-skip-delimiter "abc") ("abc"))
+(deftest stlsd.3 (string-to-list-skip-delimiter "ab c") ("ab" "c"))
+(deftest stlsd.4 (string-to-list-skip-delimiter "ab c") ("ab" "c"))
+(deftest stlsd.5 (string-to-list-skip-delimiter "ab c") ("ab" "c"))
+(deftest stlsd.6 (string-to-list-skip-delimiter "ab c ") ("ab" "c"))
+(deftest stlsd.7 (string-to-list-skip-delimiter " ab c ") ("ab" "c"))
+(deftest stlsd.8 (string-to-list-skip-delimiter "ab,,c" #\,) ("ab" "c"))
+(deftest stlsd.9 (string-to-list-skip-delimiter "ab,,c,," #\,) ("ab" "c"))
+(deftest stlsd.10 (string-to-list-skip-delimiter " ab") ("ab"))
;;; MOP Testing
;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: xml-utils.lisp,v 1.9 2003/06/07 03:51:42 kevin Exp $
+;;;; $Id: xml-utils.lisp,v 1.10 2003/06/07 05:45:14 kevin Exp $
;;;;
;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
(remove-tree-if #'string-ws? (parse-xml str)))
|#
-(defun positions-xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
- "Returns three values: the start and end positions of contents between
- the xml tags and the position following the close of the end tag."
- (let ((done nil)
- (pos start-xmlstr)
- (taglen (length tag))
- (startpos nil)
- (endpos nil)
- (nextpos nil))
- (unless end-xmlstr
- (setq end-xmlstr (length xmlstr)))
- (while (not done)
- (let ((bracketpos (position #\< xmlstr :start pos :end end-xmlstr)))
- (if bracketpos
- (let* ((starttag (1+ bracketpos))
- (endtag (+ starttag taglen)))
- (if (and (< endtag end-xmlstr)
- (string= tag xmlstr :start2 starttag :end2 endtag))
- (let* ((char-after-tag (char xmlstr endtag)))
- (declare (character char-after-tag))
- (if (or (char= #\> char-after-tag) (char= #\space char-after-tag))
- (progn
- (if (char= #\> char-after-tag)
- (setq startpos (1+ endtag))
- (setq startpos (1+ (position #\> xmlstr :start (1+ endtag)))))
- (setq endpos (search (format nil "</~a>" tag) xmlstr
- :start2 startpos :end2 end-xmlstr))
- (setq done t)
- (if (and startpos endpos)
- (progn
- (setq nextpos (+ endpos taglen 3))
- (setq pos nextpos))
- (setf startpos nil
- endpos nil)))
- (setq pos (1+ endtag))))
- (setq pos (1+ starttag)))
- (when (> pos end-xmlstr)
- (setq done t)))
- (setq done t))))
- (values startpos endpos nextpos)))
-
-(defun fast-string-search (substr str substr-length startpos endpos)
- (declare (simple-string substr str)
- (fixnum substr-length startpos endpos)
- (optimize (speed 3) (space 0) (safety 0)))
- (do* ((pos startpos (1+ pos))
- (lastpos (- endpos substr-length)))
- ((> pos lastpos) nil)
- (declare (fixnum pos lastpos))
- (do ((i 0 (1+ i)))
- ((= i substr-length)
- (return-from fast-string-search pos))
- (declare (fixnum i))
- (unless (char= (schar str (+ i pos)) (schar substr i))
- (return nil)))))
-(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr)
- (let ((bracketpos (seaposition-char #\< xmlstr start-pos end-xmlstr)))
- (when bracketpos
- (let* ((starttag (1+ bracketpos))
- (endtag (+ starttag taglen)))
- (if (and (< endtag end-xmlstr)
- (string= tag xmlstr :start2 starttag :end2 endtag))
- (let* ((char-after-tag (char xmlstr endtag)))
- (declare (character char-after-tag))
- (if (or (char= #\> char-after-tag)
- (char= #\space char-after-tag))
- (progn
- (if (char= #\> char-after-tag)
- (setq startpos (1+ endtag))
- (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr))))
- ))))))))
+(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr)
+ "Searches for the start of a tag in an xmlstring. Returns (VALUES STARTPOS ATTRIBUTE-LIST)"
+ (declare (simple-string tag xmlstr)
+ (fixnum taglen start-pos end-xmlstr))
+ (do* ((search-str (concatenate 'string "<" tag))
+ (bracketpos (fast-string-search search-str xmlstr
+ (1+ taglen) start-pos end-xmlstr)
+ (fast-string-search search-str xmlstr
+ (1+ taglen) start-pos end-xmlstr)))
+ ((null bracketpos) nil)
+ (let* ((endtag (+ bracketpos 1 taglen))
+ (char-after-tag (schar xmlstr endtag)))
+ (when (or (char= #\> char-after-tag)
+ (char= #\space char-after-tag))
+ (if (char= #\> char-after-tag)
+ (return-from find-start-tag (values (1+ endtag) nil))
+ (let ((endbrack (position-char #\> xmlstr (1+ endtag) end-xmlstr)))
+ (if endbrack
+ (return-from find-start-tag
+ (values (1+ endbrack)
+ (string-to-list-skip-delimiter
+ (subseq xmlstr endtag endbrack))))
+ (values nil nil)))))
+ (setq start-pos endtag))))
+
+
+(defun find-end-tag (tag taglen xmlstr start-pos end-xmlstr)
+ (fast-string-search
+ (concatenate 'string "</" tag ">") xmlstr
+ (+ taglen 3) start-pos end-xmlstr))
+
(defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
(end-xmlstr (length xmlstr)))
"Returns three values: the start and end positions of contents between
the xml tags and the position following the close of the end tag."
- (let ((done nil)
- (pos start-xmlstr)
- (taglen (length tag))
- (startpos nil)
- (endpos nil)
- (nextpos nil))
- (while (not done)
- (let ((bracketpos (position-char #\< xmlstr pos end-xmlstr)))
- (unless bracketpos
- (return-from positions-xml-tag-contents
- (values nil nil nil)))
- (let* ((starttag (1+ bracketpos))
- (endtag (+ starttag taglen)))
- (if (and (< endtag end-xmlstr)
- (string= tag xmlstr :start2 starttag :end2 endtag))
- (let* ((char-after-tag (char xmlstr endtag)))
- (declare (character char-after-tag))
- (if (or (char= #\> char-after-tag)
- (char= #\space char-after-tag))
- (progn
- (if (char= #\> char-after-tag)
- (setq startpos (1+ endtag))
- (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr))))
- (setq endpos (search (format nil "</~a>" tag) xmlstr
- :start2 startpos :end2 end-xmlstr))
- (if (and startpos endpos)
- (progn
- (setq nextpos (+ endpos taglen 3))
- (setq pos nextpos))
- (setf startpos nil
- endpos nil))
- (setq done t))
- (setq pos (1+ endtag))))
- (setq pos (1+ starttag)))
- (when (> pos end-xmlstr)
- (setq done t))))))
- (values startpos endpos nextpos)))
-
-
-(defun xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
- "Returns two values: the string between XML start and end tag
-and position of character following end tag."
- (multiple-value-bind
- (startpos endpos nextpos)
- (positions-xml-tag-contents-old tag xmlstr start-xmlstr end-xmlstr)
- (if (and startpos endpos)
- (values (subseq xmlstr startpos endpos) nextpos)
- (values nil nil))))
-
-(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
+ (let* ((taglen (length tag)))
+ (multiple-value-bind (start attributes)
+ (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr)
+ (unless start
+ (return-from positions-xml-tag-contents (values nil nil nil nil)))
+ (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr)))
+ (unless end
+ (return-from positions-xml-tag-contents (values nil nil nil nil)))
+ (values start end (+ end taglen 3) attributes)))))
+
+
+(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
+ (end-xmlstr (length xmlstr)))
"Returns two values: the string between XML start and end tag
and position of character following end tag."
(multiple-value-bind
- (startpos endpos nextpos)
+ (startpos endpos nextpos attributes)
(positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr)
(if (and startpos endpos)
- (values (subseq xmlstr startpos endpos) nextpos)
- (values nil nil))))
+ (values (subseq xmlstr startpos endpos) nextpos attributes)
+ (values nil nil nil))))
(defun xml-cdata (str)
(concatenate 'string "<![CDATA[" str "]]>"))