;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: xml-utils.lisp,v 1.6 2002/12/04 16:49:23 kevin Exp $
+;;;; $Id: xml-utils.lisp,v 1.11 2003/06/07 22:34:16 kevin Exp $
;;;;
;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
;;;; *************************************************************************
-(in-package :kmrcl)
-(declaim (optimize (speed 3) (safety 2) (compilation-speed 0) (debug 3)))
+(in-package #:kmrcl)
(defun wrap-with-xml (str entity)
;;; XML Extraction Functions
-#|
-#+allegro (require :pxml)
-#+allegro
-(defun parse-xml-no-ws (str)
- "Return list structure of XML string with removing whitespace strings"
- (remove-tree-if #'string-ws? (parse-xml str)))
-|#
+(defun find-start-tag (tag taglen xmlstr start end)
+ "Searches for the start of a tag in an xmlstring. Returns STARTPOS ATTRIBUTE-LIST)"
+ (declare (simple-string tag xmlstr)
+ (fixnum taglen start end)
+ (optimize (speed 3) (safety 0) (space 0)))
+ (do* ((search-str (concatenate 'string "<" tag))
+ (search-len (1+ taglen))
+ (bracketpos (fast-string-search search-str xmlstr search-len start end)
+ (fast-string-search search-str xmlstr search-len start end)))
+ ((null bracketpos) nil)
+ (let* ((endtag (+ bracketpos 1 taglen))
+ (char-after-tag (schar xmlstr endtag)))
+ (when (or (char= #\> char-after-tag)
+ (char= #\space char-after-tag))
+ (if (char= #\> char-after-tag)
+ (return-from find-start-tag (values (1+ endtag) nil))
+ (let ((endbrack (position-char #\> xmlstr (1+ endtag) end)))
+ (if endbrack
+ (return-from find-start-tag
+ (values (1+ endbrack)
+ (string-to-list-skip-delimiter
+ (subseq xmlstr endtag endbrack))))
+ (values nil nil)))))
+ (setq start endtag))))
-(defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
+
+(defun find-end-tag (tag taglen xmlstr start end)
+ (fast-string-search
+ (concatenate 'string "</" tag ">") xmlstr
+ (+ taglen 3) start end))
+
+(defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
+ (end-xmlstr (length xmlstr)))
"Returns three values: the start and end positions of contents between
the xml tags and the position following the close of the end tag."
- (let ((done nil)
- (pos start-xmlstr)
- (taglen (length tag))
- (startpos nil)
- (endpos nil)
- (nextpos nil))
- (unless end-xmlstr
- (setq end-xmlstr (length xmlstr)))
- (while (not done)
- (let ((bracketpos (position #\< xmlstr :start pos :end end-xmlstr)))
- (if bracketpos
- (let* ((starttag (1+ bracketpos))
- (endtag (+ starttag taglen)))
- (if (and (< endtag end-xmlstr)
- (string= tag xmlstr :start2 starttag :end2 endtag))
- (let* ((char-after-tag (char xmlstr endtag)))
- (declare (character char-after-tag))
- (if (or (char= #\> char-after-tag) (char= #\space char-after-tag))
- (progn
- (if (char= #\> char-after-tag)
- (setq startpos (1+ endtag))
- (setq startpos (1+ (position #\> xmlstr :start (1+ endtag)))))
- (setq endpos (search (format nil "</~a>" tag) xmlstr
- :start2 startpos :end2 end-xmlstr))
- (setq done t)
- (if (and startpos endpos)
- (progn
- (setq nextpos (+ endpos taglen 3))
- (setq pos nextpos))
- (setf startpos nil
- endpos nil)))
- (setq pos (1+ endtag))))
- (setq pos (1+ starttag)))
- (when (> pos end-xmlstr)
- (setq done t)))
- (setq done t))))
- (values startpos endpos nextpos)))
+ (let* ((taglen (length tag)))
+ (multiple-value-bind (start attributes)
+ (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr)
+ (unless start
+ (return-from positions-xml-tag-contents (values nil nil nil nil)))
+ (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr)))
+ (unless end
+ (return-from positions-xml-tag-contents (values nil nil nil nil)))
+ (values start end (+ end taglen 3) attributes)))))
-(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
+(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
+ (end-xmlstr (length xmlstr)))
"Returns two values: the string between XML start and end tag
and position of character following end tag."
(multiple-value-bind
- (startpos endpos nextpos)
+ (startpos endpos nextpos attributes)
(positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr)
(if (and startpos endpos)
- (values (subseq xmlstr startpos endpos) nextpos)
- (values nil nil))))
+ (values (subseq xmlstr startpos endpos) nextpos attributes)
+ (values nil nil nil))))
(defun xml-cdata (str)
(concatenate 'string "<![CDATA[" str "]]>"))
+(defun write-xml-cdata (str s)
+ (declare (simple-string str) (optimize (speed 3) (safety 0) (space 0)))
+ (do ((len (length str))
+ (i 0 (1+ i)))
+ ((= i len) str)
+ (declare (fixnum i len))
+ (let ((c (schar str i)))
+ (case c
+ (#\< (write-string "<" s))
+ (#\& (write-string "&" s))
+ (t (write-char c s))))))
+