;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: xml-utils.lisp,v 1.4 2002/10/10 16:23:48 kevin Exp $
+;;;; $Id: xml-utils.lisp,v 1.9 2003/06/07 03:51:42 kevin Exp $
;;;;
;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
;;;; *************************************************************************
-(in-package :kmrcl)
-(declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))
+(in-package #:kmrcl)
(defun wrap-with-xml (str entity)
;;; XML Extraction Functions
+#|
+#+allegro (require :pxml)
#+allegro
(defun parse-xml-no-ws (str)
"Return list structure of XML string with removing whitespace strings"
(remove-tree-if #'string-ws? (parse-xml str)))
+|#
-(defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
+(defun positions-xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
"Returns three values: the start and end positions of contents between
the xml tags and the position following the close of the end tag."
(let ((done nil)
(if bracketpos
(let* ((starttag (1+ bracketpos))
(endtag (+ starttag taglen)))
- (if (string= tag xmlstr :start2 starttag :end2 endtag)
+ (if (and (< endtag end-xmlstr)
+ (string= tag xmlstr :start2 starttag :end2 endtag))
(let* ((char-after-tag (char xmlstr endtag)))
(declare (character char-after-tag))
(if (or (char= #\> char-after-tag) (char= #\space char-after-tag))
(setq done t))))
(values startpos endpos nextpos)))
+(defun fast-string-search (substr str substr-length startpos endpos)
+ (declare (simple-string substr str)
+ (fixnum substr-length startpos endpos)
+ (optimize (speed 3) (space 0) (safety 0)))
+ (do* ((pos startpos (1+ pos))
+ (lastpos (- endpos substr-length)))
+ ((> pos lastpos) nil)
+ (declare (fixnum pos lastpos))
+ (do ((i 0 (1+ i)))
+ ((= i substr-length)
+ (return-from fast-string-search pos))
+ (declare (fixnum i))
+ (unless (char= (schar str (+ i pos)) (schar substr i))
+ (return nil)))))
+
+(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr)
+ (let ((bracketpos (seaposition-char #\< xmlstr start-pos end-xmlstr)))
+ (when bracketpos
+ (let* ((starttag (1+ bracketpos))
+ (endtag (+ starttag taglen)))
+ (if (and (< endtag end-xmlstr)
+ (string= tag xmlstr :start2 starttag :end2 endtag))
+ (let* ((char-after-tag (char xmlstr endtag)))
+ (declare (character char-after-tag))
+ (if (or (char= #\> char-after-tag)
+ (char= #\space char-after-tag))
+ (progn
+ (if (char= #\> char-after-tag)
+ (setq startpos (1+ endtag))
+ (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr))))
+ ))))))))
+
+(defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
+ (end-xmlstr (length xmlstr)))
+ "Returns three values: the start and end positions of contents between
+ the xml tags and the position following the close of the end tag."
+ (let ((done nil)
+ (pos start-xmlstr)
+ (taglen (length tag))
+ (startpos nil)
+ (endpos nil)
+ (nextpos nil))
+ (while (not done)
+ (let ((bracketpos (position-char #\< xmlstr pos end-xmlstr)))
+ (unless bracketpos
+ (return-from positions-xml-tag-contents
+ (values nil nil nil)))
+ (let* ((starttag (1+ bracketpos))
+ (endtag (+ starttag taglen)))
+ (if (and (< endtag end-xmlstr)
+ (string= tag xmlstr :start2 starttag :end2 endtag))
+ (let* ((char-after-tag (char xmlstr endtag)))
+ (declare (character char-after-tag))
+ (if (or (char= #\> char-after-tag)
+ (char= #\space char-after-tag))
+ (progn
+ (if (char= #\> char-after-tag)
+ (setq startpos (1+ endtag))
+ (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr))))
+ (setq endpos (search (format nil "</~a>" tag) xmlstr
+ :start2 startpos :end2 end-xmlstr))
+ (if (and startpos endpos)
+ (progn
+ (setq nextpos (+ endpos taglen 3))
+ (setq pos nextpos))
+ (setf startpos nil
+ endpos nil))
+ (setq done t))
+ (setq pos (1+ endtag))))
+ (setq pos (1+ starttag)))
+ (when (> pos end-xmlstr)
+ (setq done t))))))
+ (values startpos endpos nextpos)))
+
+
+(defun xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
+ "Returns two values: the string between XML start and end tag
+and position of character following end tag."
+ (multiple-value-bind
+ (startpos endpos nextpos)
+ (positions-xml-tag-contents-old tag xmlstr start-xmlstr end-xmlstr)
+ (if (and startpos endpos)
+ (values (subseq xmlstr startpos endpos) nextpos)
+ (values nil nil))))
(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil))
"Returns two values: the string between XML start and end tag
(defun xml-cdata (str)
(concatenate 'string "<![CDATA[" str "]]>"))
+(defun write-xml-cdata (str s)
+ (declare (simple-string str) (optimize (speed 3) (safety 0) (space 0)))
+ (do ((len (length str))
+ (i 0 (1+ i)))
+ ((= i len) str)
+ (declare (fixnum i len))
+ (let ((c (schar str i)))
+ (case c
+ (#\< (write-string "<" s))
+ (#\& (write-string "&" s))
+ (t (write-char c s))))))
+