From 5dbd1fda3cf8f68c070cf3036dc6b1b536bc9f5a Mon Sep 17 00:00:00 2001 From: "Kevin M. Rosenberg" Date: Sat, 7 Jun 2003 05:45:14 +0000 Subject: [PATCH] r5066: *** empty log message *** --- package.lisp | 5 +- strings.lisp | 45 ++++++++++++- tests.lisp | 88 ++++++++++++++++--------- xml-utils.lisp | 173 ++++++++++++++----------------------------------- 4 files changed, 152 insertions(+), 159 deletions(-) diff --git a/package.lisp b/package.lisp index c6f2ad9..79306a8 100644 --- a/package.lisp +++ b/package.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: package.lisp,v 1.34 2003/06/06 21:59:30 kevin Exp $ +;;;; $Id: package.lisp,v 1.35 2003/06/07 05:45:14 kevin Exp $ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -40,10 +40,13 @@ #:mapcar-append-string #:mapcar2-append-string #:position-char + #:position-not-char #:delimited-string-to-list #:list-to-delimited-string #:prefixed-fixnum-string #:integer-string + #:fast-string-search + #:string-to-list-skip-delimiter #:flatten diff --git a/strings.lisp b/strings.lisp index f933fe8..a070731 100644 --- a/strings.lisp +++ b/strings.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: strings.lisp,v 1.35 2003/06/06 21:59:30 kevin Exp $ +;;;; $Id: strings.lisp,v 1.36 2003/06/07 05:45:14 kevin Exp $ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -53,6 +53,14 @@ (declare (fixnum i)) (when (char= char (schar string i)) (return i)))) +(defun position-not-char (char string start max) + (declare (optimize (speed 3) (safety 0) (space 0)) + (fixnum start max) (simple-string string)) + (do* ((i start (1+ i))) + ((= i max) nil) + (declare (fixnum i)) + (when (char/= char (schar string i)) (return i)))) + (defun delimited-string-to-list (string &optional (separator #\space) skip-terminal) "split a string with delimiter" @@ -286,7 +294,40 @@ Leading zeros are present." (pos (1- len) (1- pos)) (mod (mod val 10) (mod val 10))) ((or (zerop val) (minusp pos)) - (when minus? (setf (schar result (if pchar 1 0)) #\-)) + (when minus? (setf (schar result 0) #\-)) result) (declare (fixnum mod zero-code pos) (simple-string result) (integer val)) (setf (schar result pos) (code-char (+ zero-code mod))))) + +(defun fast-string-search (substr str substr-length startpos endpos) + "Optimized search for a substring in a simple-string" + (declare (simple-string substr str) + (fixnum substr-length startpos endpos) + (optimize (speed 3) (space 0) (safety 0))) + (do* ((pos startpos (1+ pos)) + (lastpos (- endpos substr-length))) + ((> pos lastpos) nil) + (declare (fixnum pos lastpos)) + (do ((i 0 (1+ i))) + ((= i substr-length) + (return-from fast-string-search pos)) + (declare (fixnum i)) + (unless (char= (schar str (+ i pos)) (schar substr i)) + (return nil))))) + +(defun string-to-list-skip-delimiter (str &optional (delim #\space)) + "Return a list of strings, delimited by spaces, skipping spaces." + (declare (simple-string str) + (optimize (speed 0) (space 0) (safety 0))) + (do* ((results '()) + (end (length str)) + (i (position-not-char delim str 0 end) + (position-not-char delim str j end)) + (j (when i (position-char delim str i end)) + (when i (position-char delim str i end)))) + ((or (null i) (null j)) + (when (and i (< i end)) + (push (subseq str i end) results)) + (nreverse results)) + (declare (fixnum i j end)) + (push (subseq str i j) results))) diff --git a/tests.lisp b/tests.lisp index d0ca889..d61d439 100644 --- a/tests.lisp +++ b/tests.lisp @@ -7,7 +7,7 @@ ;;;; Author: Kevin M. Rosenberg ;;;; Date Started: Apr 2003 ;;;; -;;;; $Id: tests.lisp,v 1.16 2003/06/07 03:51:42 kevin Exp $ +;;;; $Id: tests.lisp,v 1.17 2003/06/07 05:45:14 kevin Exp $ ;;;; ;;;; This file is Copyright (c) 2000-2002 by Kevin M. Rosenberg ;;;; @@ -100,41 +100,67 @@ (deftest pxml.1 (xml-tag-contents "tag1" "Test") - nil nil) + nil nil nil) (deftest pxml.1o (kmrcl::xml-tag-contents-old "tag1" "Test") nil nil) -(deftest fss.1 - (kmrcl::fast-string-search "" "" 0 0 0) 0) - -(deftest fss.2 - (kmrcl::fast-string-search "" "abc" 0 0 2) 0) - -(deftest fss.3 - (kmrcl::fast-string-search "abc" "" 3 0 0) nil) - -(deftest fss.4 - (kmrcl::fast-string-search "abc" "abcde" 3 0 4) 0) - -(deftest fss.5 - (kmrcl::fast-string-search "abc" "012abcde" 3 0 7) 3) - -(deftest fss.6 - (kmrcl::fast-string-search "abc" "012abcde" 3 0 7) 3) - -(deftest fss.7 - (kmrcl::fast-string-search "abc" "012abcde" 3 3 7) 3) - -(deftest fss.8 - (kmrcl::fast-string-search "abc" "012abcde" 3 4 7) nil) - -(deftest fss.9 - (kmrcl::fast-string-search "abcde" "012abcde" 5 3 8) 3) - -(deftest fss.10 - (kmrcl::fast-string-search "abcde" "012abcde" 5 3 7) nil) +(deftest pxml.2 + (xml-tag-contents "tag" "Test") + "Test" 15 nil) + +(deftest pxml.3 + (xml-tag-contents "tag" "Test") + "Test" 17 nil) + +(deftest pxml.4 + (xml-tag-contents "tag" "") + "" 17 ("a=\"b\"")) + +(deftest pxml.5 + (xml-tag-contents "tag" "Test") + "Test" 22 ("a=\"b\"")) + +(deftest pxml.6 + (xml-tag-contents "tag" "Test") + "Test" 29 ("a=\"b\"" "c=\"ab\"")) + +(deftest pxml.7 + (xml-tag-contents "tag" "Test") + nil nil nil) + +(deftest pxml.8 + (xml-tag-contents "tag" "Testab") + "ab" 37 nil) + +(deftest pxml.9 + (xml-tag-contents "tag" "Testab") + nil nil nil) + +(deftest fss.1 (fast-string-search "" "" 0 0 0) 0) +(deftest fss.2 (fast-string-search "" "abc" 0 0 2) 0) +(deftest fss.3 (fast-string-search "abc" "" 3 0 0) nil) +(deftest fss.4 (fast-string-search "abc" "abcde" 3 0 4) 0) +(deftest fss.5 (fast-string-search "abc" "012abcde" 3 0 7) 3) +(deftest fss.6 (fast-string-search "abc" "012abcde" 3 0 7) 3) +(deftest fss.7 (fast-string-search "abc" "012abcde" 3 3 7) 3) +(deftest fss.8 (fast-string-search "abc" "012abcde" 3 4 7) nil) +(deftest fss.9 (fast-string-search "abcde" "012abcde" 5 3 8) 3) +(deftest fss.9b (cl:search "abcde" "012abcde" :start2 3 :end2 8) 3) +(deftest fss.10 (fast-string-search "abcde" "012abcde" 5 3 7) nil) +(deftest fss.10b (cl:search "abcde" "012abcde" :start2 3 :end2 7) nil) + +(deftest stlsd.1 (string-to-list-skip-delimiter "") ()) +(deftest stlsd.2 (string-to-list-skip-delimiter "abc") ("abc")) +(deftest stlsd.3 (string-to-list-skip-delimiter "ab c") ("ab" "c")) +(deftest stlsd.4 (string-to-list-skip-delimiter "ab c") ("ab" "c")) +(deftest stlsd.5 (string-to-list-skip-delimiter "ab c") ("ab" "c")) +(deftest stlsd.6 (string-to-list-skip-delimiter "ab c ") ("ab" "c")) +(deftest stlsd.7 (string-to-list-skip-delimiter " ab c ") ("ab" "c")) +(deftest stlsd.8 (string-to-list-skip-delimiter "ab,,c" #\,) ("ab" "c")) +(deftest stlsd.9 (string-to-list-skip-delimiter "ab,,c,," #\,) ("ab" "c")) +(deftest stlsd.10 (string-to-list-skip-delimiter " ab") ("ab")) ;;; MOP Testing diff --git a/xml-utils.lisp b/xml-utils.lisp index 4dc37fc..2c2db39 100644 --- a/xml-utils.lisp +++ b/xml-utils.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: xml-utils.lisp,v 1.9 2003/06/07 03:51:42 kevin Exp $ +;;;; $Id: xml-utils.lisp,v 1.10 2003/06/07 05:45:14 kevin Exp $ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -35,141 +35,64 @@ (remove-tree-if #'string-ws? (parse-xml str))) |# -(defun positions-xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) - "Returns three values: the start and end positions of contents between - the xml tags and the position following the close of the end tag." - (let ((done nil) - (pos start-xmlstr) - (taglen (length tag)) - (startpos nil) - (endpos nil) - (nextpos nil)) - (unless end-xmlstr - (setq end-xmlstr (length xmlstr))) - (while (not done) - (let ((bracketpos (position #\< xmlstr :start pos :end end-xmlstr))) - (if bracketpos - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position #\> xmlstr :start (1+ endtag))))) - (setq endpos (search (format nil "" tag) xmlstr - :start2 startpos :end2 end-xmlstr)) - (setq done t) - (if (and startpos endpos) - (progn - (setq nextpos (+ endpos taglen 3)) - (setq pos nextpos)) - (setf startpos nil - endpos nil))) - (setq pos (1+ endtag)))) - (setq pos (1+ starttag))) - (when (> pos end-xmlstr) - (setq done t))) - (setq done t)))) - (values startpos endpos nextpos))) - -(defun fast-string-search (substr str substr-length startpos endpos) - (declare (simple-string substr str) - (fixnum substr-length startpos endpos) - (optimize (speed 3) (space 0) (safety 0))) - (do* ((pos startpos (1+ pos)) - (lastpos (- endpos substr-length))) - ((> pos lastpos) nil) - (declare (fixnum pos lastpos)) - (do ((i 0 (1+ i))) - ((= i substr-length) - (return-from fast-string-search pos)) - (declare (fixnum i)) - (unless (char= (schar str (+ i pos)) (schar substr i)) - (return nil))))) -(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr) - (let ((bracketpos (seaposition-char #\< xmlstr start-pos end-xmlstr))) - (when bracketpos - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) - (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr)))) - )))))))) +(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr) + "Searches for the start of a tag in an xmlstring. Returns (VALUES STARTPOS ATTRIBUTE-LIST)" + (declare (simple-string tag xmlstr) + (fixnum taglen start-pos end-xmlstr)) + (do* ((search-str (concatenate 'string "<" tag)) + (bracketpos (fast-string-search search-str xmlstr + (1+ taglen) start-pos end-xmlstr) + (fast-string-search search-str xmlstr + (1+ taglen) start-pos end-xmlstr))) + ((null bracketpos) nil) + (let* ((endtag (+ bracketpos 1 taglen)) + (char-after-tag (schar xmlstr endtag))) + (when (or (char= #\> char-after-tag) + (char= #\space char-after-tag)) + (if (char= #\> char-after-tag) + (return-from find-start-tag (values (1+ endtag) nil)) + (let ((endbrack (position-char #\> xmlstr (1+ endtag) end-xmlstr))) + (if endbrack + (return-from find-start-tag + (values (1+ endbrack) + (string-to-list-skip-delimiter + (subseq xmlstr endtag endbrack)))) + (values nil nil))))) + (setq start-pos endtag)))) + + +(defun find-end-tag (tag taglen xmlstr start-pos end-xmlstr) + (fast-string-search + (concatenate 'string "") xmlstr + (+ taglen 3) start-pos end-xmlstr)) + (defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr (length xmlstr))) "Returns three values: the start and end positions of contents between the xml tags and the position following the close of the end tag." - (let ((done nil) - (pos start-xmlstr) - (taglen (length tag)) - (startpos nil) - (endpos nil) - (nextpos nil)) - (while (not done) - (let ((bracketpos (position-char #\< xmlstr pos end-xmlstr))) - (unless bracketpos - (return-from positions-xml-tag-contents - (values nil nil nil))) - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) - (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr)))) - (setq endpos (search (format nil "" tag) xmlstr - :start2 startpos :end2 end-xmlstr)) - (if (and startpos endpos) - (progn - (setq nextpos (+ endpos taglen 3)) - (setq pos nextpos)) - (setf startpos nil - endpos nil)) - (setq done t)) - (setq pos (1+ endtag)))) - (setq pos (1+ starttag))) - (when (> pos end-xmlstr) - (setq done t)))))) - (values startpos endpos nextpos))) - - -(defun xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) - "Returns two values: the string between XML start and end tag -and position of character following end tag." - (multiple-value-bind - (startpos endpos nextpos) - (positions-xml-tag-contents-old tag xmlstr start-xmlstr end-xmlstr) - (if (and startpos endpos) - (values (subseq xmlstr startpos endpos) nextpos) - (values nil nil)))) - -(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) + (let* ((taglen (length tag))) + (multiple-value-bind (start attributes) + (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr) + (unless start + (return-from positions-xml-tag-contents (values nil nil nil nil))) + (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr))) + (unless end + (return-from positions-xml-tag-contents (values nil nil nil nil))) + (values start end (+ end taglen 3) attributes))))) + + +(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) + (end-xmlstr (length xmlstr))) "Returns two values: the string between XML start and end tag and position of character following end tag." (multiple-value-bind - (startpos endpos nextpos) + (startpos endpos nextpos attributes) (positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr) (if (and startpos endpos) - (values (subseq xmlstr startpos endpos) nextpos) - (values nil nil)))) + (values (subseq xmlstr startpos endpos) nextpos attributes) + (values nil nil nil)))) (defun xml-cdata (str) (concatenate 'string "")) -- 2.34.1