X-Git-Url: http://git.kpe.io/?p=kmrcl.git;a=blobdiff_plain;f=xml-utils.lisp;h=a3305caa097ce68ed1d2b45df4e9f2acd565e8fe;hp=4dc37fc53140f68c91f1b0a2925628f362e8398d;hb=54cd6cb1b9550ac2310e2c6dffc9cdecd2bdccd3;hpb=8bae678dafa346432c868f240dd172f347519305 diff --git a/xml-utils.lisp b/xml-utils.lisp index 4dc37fc..a3305ca 100644 --- a/xml-utils.lisp +++ b/xml-utils.lisp @@ -7,8 +7,6 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: xml-utils.lisp,v 1.9 2003/06/07 03:51:42 kevin Exp $ -;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; ;;;; KMRCL users are granted the rights to distribute and use this software @@ -19,162 +17,69 @@ (in-package #:kmrcl) -(defun wrap-with-xml (str entity) - "Returns string of xml header along with entity tag start/end with str contents" - (format nil "~%~%<~a>~%~a~%~%" - str entity entity)) - - ;;; XML Extraction Functions -#| -#+allegro (require :pxml) -#+allegro -(defun parse-xml-no-ws (str) - "Return list structure of XML string with removing whitespace strings" - (remove-tree-if #'string-ws? (parse-xml str))) -|# - -(defun positions-xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) - "Returns three values: the start and end positions of contents between - the xml tags and the position following the close of the end tag." - (let ((done nil) - (pos start-xmlstr) - (taglen (length tag)) - (startpos nil) - (endpos nil) - (nextpos nil)) - (unless end-xmlstr - (setq end-xmlstr (length xmlstr))) - (while (not done) - (let ((bracketpos (position #\< xmlstr :start pos :end end-xmlstr))) - (if bracketpos - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position #\> xmlstr :start (1+ endtag))))) - (setq endpos (search (format nil "" tag) xmlstr - :start2 startpos :end2 end-xmlstr)) - (setq done t) - (if (and startpos endpos) - (progn - (setq nextpos (+ endpos taglen 3)) - (setq pos nextpos)) - (setf startpos nil - endpos nil))) - (setq pos (1+ endtag)))) - (setq pos (1+ starttag))) - (when (> pos end-xmlstr) - (setq done t))) - (setq done t)))) - (values startpos endpos nextpos))) - -(defun fast-string-search (substr str substr-length startpos endpos) - (declare (simple-string substr str) - (fixnum substr-length startpos endpos) - (optimize (speed 3) (space 0) (safety 0))) - (do* ((pos startpos (1+ pos)) - (lastpos (- endpos substr-length))) - ((> pos lastpos) nil) - (declare (fixnum pos lastpos)) - (do ((i 0 (1+ i))) - ((= i substr-length) - (return-from fast-string-search pos)) - (declare (fixnum i)) - (unless (char= (schar str (+ i pos)) (schar substr i)) - (return nil))))) - -(defun find-start-tag (tag taglen xmlstr start-pos end-xmlstr) - (let ((bracketpos (seaposition-char #\< xmlstr start-pos end-xmlstr))) - (when bracketpos - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) - (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr)))) - )))))))) +(defun find-start-tag (tag taglen xmlstr start end) + "Searches for the start of a tag in an xmlstring. Returns STARTPOS ATTRIBUTE-LIST)" + (declare (simple-string tag xmlstr) + (fixnum taglen start end) + (optimize (speed 3) (safety 0) (space 0))) + (do* ((search-str (concatenate 'string "<" tag)) + (search-len (1+ taglen)) + (bracketpos (fast-string-search search-str xmlstr search-len start end) + (fast-string-search search-str xmlstr search-len start end))) + ((null bracketpos) nil) + (let* ((endtag (+ bracketpos 1 taglen)) + (char-after-tag (schar xmlstr endtag))) + (when (or (char= #\> char-after-tag) + (char= #\space char-after-tag)) + (if (char= #\> char-after-tag) + (return-from find-start-tag (values (1+ endtag) nil)) + (let ((endbrack (position-char #\> xmlstr (1+ endtag) end))) + (if endbrack + (return-from find-start-tag + (values (1+ endbrack) + (string-to-list-skip-delimiter + (subseq xmlstr endtag endbrack)))) + (values nil nil))))) + (setq start endtag)))) + + +(defun find-end-tag (tag taglen xmlstr start end) + (fast-string-search + (concatenate 'string "") xmlstr + (+ taglen 3) start end)) (defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) - (end-xmlstr (length xmlstr))) + (end-xmlstr (length xmlstr))) "Returns three values: the start and end positions of contents between the xml tags and the position following the close of the end tag." - (let ((done nil) - (pos start-xmlstr) - (taglen (length tag)) - (startpos nil) - (endpos nil) - (nextpos nil)) - (while (not done) - (let ((bracketpos (position-char #\< xmlstr pos end-xmlstr))) - (unless bracketpos - (return-from positions-xml-tag-contents - (values nil nil nil))) - (let* ((starttag (1+ bracketpos)) - (endtag (+ starttag taglen))) - (if (and (< endtag end-xmlstr) - (string= tag xmlstr :start2 starttag :end2 endtag)) - (let* ((char-after-tag (char xmlstr endtag))) - (declare (character char-after-tag)) - (if (or (char= #\> char-after-tag) - (char= #\space char-after-tag)) - (progn - (if (char= #\> char-after-tag) - (setq startpos (1+ endtag)) - (setq startpos (1+ (position-char #\> xmlstr (1+ endtag) end-xmlstr)))) - (setq endpos (search (format nil "" tag) xmlstr - :start2 startpos :end2 end-xmlstr)) - (if (and startpos endpos) - (progn - (setq nextpos (+ endpos taglen 3)) - (setq pos nextpos)) - (setf startpos nil - endpos nil)) - (setq done t)) - (setq pos (1+ endtag)))) - (setq pos (1+ starttag))) - (when (> pos end-xmlstr) - (setq done t)))))) - (values startpos endpos nextpos))) - - -(defun xml-tag-contents-old (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) - "Returns two values: the string between XML start and end tag -and position of character following end tag." - (multiple-value-bind - (startpos endpos nextpos) - (positions-xml-tag-contents-old tag xmlstr start-xmlstr end-xmlstr) - (if (and startpos endpos) - (values (subseq xmlstr startpos endpos) nextpos) - (values nil nil)))) - -(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) (end-xmlstr nil)) - "Returns two values: the string between XML start and end tag + (let* ((taglen (length tag))) + (multiple-value-bind (start attributes) + (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr) + (unless start + (return-from positions-xml-tag-contents (values nil nil nil nil))) + (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr))) + (unless end + (return-from positions-xml-tag-contents (values nil nil nil nil))) + (values start end (+ end taglen 3) attributes))))) + + +(defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0) + (end-xmlstr (length xmlstr))) + "Returns two values: the string between XML start and end tag and position of character following end tag." - (multiple-value-bind - (startpos endpos nextpos) + (multiple-value-bind + (startpos endpos nextpos attributes) (positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr) (if (and startpos endpos) - (values (subseq xmlstr startpos endpos) nextpos) - (values nil nil)))) + (values (subseq xmlstr startpos endpos) nextpos attributes) + (values nil nil nil)))) -(defun xml-cdata (str) +(defun cdata-string (str) (concatenate 'string "")) -(defun write-xml-cdata (str s) +(defun write-cdata (str s) (declare (simple-string str) (optimize (speed 3) (safety 0) (space 0))) (do ((len (length str)) (i 0 (1+ i))) @@ -182,7 +87,88 @@ and position of character following end tag." (declare (fixnum i len)) (let ((c (schar str i))) (case c - (#\< (write-string "<" s)) - (#\& (write-string "&" s)) - (t (write-char c s)))))) + (#\< (write-string "<" s)) + (#\& (write-string "&" s)) + (t (write-char c s)))))) + +(defun xml-declaration-stream (stream &key (version "1.0") standalone encoding) + (format stream "~%" + version + (if encoding + (format nil " encoding=\"~A\"" encoding) + "" + ) + (if standalone + (format nil " standalone=\"~A\"" standalone) + ""))) + +(defun doctype-stream (stream top-element availability registered organization type + label language url entities) + (format stream " stream) + (write-char #\newline stream)) + +(defun doctype-format (stream format &key top-element (availability "PUBLIC") + (registered nil) organization (type "DTD") label + (language "EN") url entities) + (case format + ((:xhtml11 :xhtml) + (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.1" language + (if url url "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") + entities)) + (:xhtml10-strict + (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Strict" language + (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-strict.dtd") + entities)) + (:xhtml10-transitional + (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Transitional" language + (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-transitional.dtd") + entities)) + (:xhtml-frameset + (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Frameset" language + (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-frameset.dtd") + entities)) + (:html2 + (doctype-stream stream "HTML" availability registered "IETF" type "HTML" language url entities)) + (:html3 + (doctype-stream stream "HTML" availability registered "IETF" type "HTML 3.0" language url entities)) + (:html3.2 + (doctype-stream stream "HTML" availability registered "W3C" type "HTML 3.2 Final" language url entities)) + ((:html :html4) + (doctype-stream stream "HTML" availability registered "W3C" type "HTML 4.01 Final" language url entities)) + ((:docbook :docbook42) + (doctype-stream stream (if top-element top-element "book") + availability registered "OASIS" type "Docbook XML 4.2" language + (if url url "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd") + entities)) + (t + (unless top-element (warn "Missing top-element in doctype-format")) + (unless organization (warn "Missing organization in doctype-format")) + (unless label (warn "Missing label in doctype-format")) + (doctype-stream stream top-element availability registered organization type label language url + entities)))) + + +(defun sgml-header-stream (format stream &key entities (encoding "iso-8859-1") standalone (version "1.0") + top-element (availability "PUBLIC") registered organization (type "DTD") + label (language "EN") url) + (when (in format :xhtml :xhtml11 :xhtml10-strict :xhtml10-transitional :xhtml10-frameset :xml :docbook) + (xml-declaration-stream stream :version version :encoding encoding :standalone standalone)) + (unless (eq :xml format) + (doctype-format stream format :top-element top-element + :availability availability :registered registered + :organization organization :type type :label label :language language + :url url :entities entities)) + stream)