1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: xml-utils.lisp
6 ;;;; Purpose: XML utilities
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
12 ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
14 ;;;; KMRCL users are granted the rights to distribute and use this software
15 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
16 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
17 ;;;; *************************************************************************
22 ;;; XML Extraction Functions
24 (defun find-start-tag (tag taglen xmlstr start end)
25 "Searches for the start of a tag in an xmlstring. Returns STARTPOS ATTRIBUTE-LIST)"
26 (declare (simple-string tag xmlstr)
27 (fixnum taglen start end)
28 (optimize (speed 3) (safety 0) (space 0)))
29 (do* ((search-str (concatenate 'string "<" tag))
30 (search-len (1+ taglen))
31 (bracketpos (fast-string-search search-str xmlstr search-len start end)
32 (fast-string-search search-str xmlstr search-len start end)))
33 ((null bracketpos) nil)
34 (let* ((endtag (+ bracketpos 1 taglen))
35 (char-after-tag (schar xmlstr endtag)))
36 (when (or (char= #\> char-after-tag)
37 (char= #\space char-after-tag))
38 (if (char= #\> char-after-tag)
39 (return-from find-start-tag (values (1+ endtag) nil))
40 (let ((endbrack (position-char #\> xmlstr (1+ endtag) end)))
42 (return-from find-start-tag
44 (string-to-list-skip-delimiter
45 (subseq xmlstr endtag endbrack))))
47 (setq start endtag))))
50 (defun find-end-tag (tag taglen xmlstr start end)
52 (concatenate 'string "</" tag ">") xmlstr
53 (+ taglen 3) start end))
55 (defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
56 (end-xmlstr (length xmlstr)))
57 "Returns three values: the start and end positions of contents between
58 the xml tags and the position following the close of the end tag."
59 (let* ((taglen (length tag)))
60 (multiple-value-bind (start attributes)
61 (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr)
63 (return-from positions-xml-tag-contents (values nil nil nil nil)))
64 (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr)))
66 (return-from positions-xml-tag-contents (values nil nil nil nil)))
67 (values start end (+ end taglen 3) attributes)))))
70 (defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
71 (end-xmlstr (length xmlstr)))
72 "Returns two values: the string between XML start and end tag
73 and position of character following end tag."
75 (startpos endpos nextpos attributes)
76 (positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr)
77 (if (and startpos endpos)
78 (values (subseq xmlstr startpos endpos) nextpos attributes)
79 (values nil nil nil))))
81 (defun cdata-string (str)
82 (concatenate 'string "<![CDATA[" str "]]>"))
84 (defun write-cdata (str s)
85 (declare (simple-string str) (optimize (speed 3) (safety 0) (space 0)))
86 (do ((len (length str))
89 (declare (fixnum i len))
90 (let ((c (schar str i)))
92 (#\< (write-string "<" s))
93 (#\& (write-string "&" s))
94 (t (write-char c s))))))
96 (defun xml-declaration-stream (stream &key (version "1.0") standalone encoding)
97 (format stream "<?xml version=\"~A\"~A~A ?>~%"
100 (format nil " encoding=\"~A\"" encoding)
104 (format nil " standalone=\"~A\"" standalone)
107 (defun dtd-stream (stream format &key name public-id system-id entities)
109 ((:xhtml11 :xhtml :xhtml10-strict :xhtml10-transitional :xhtml-frameset)
113 ((:docbook :docbook42)
115 (setq name "book"))))
120 (setq public-id "-//IETF//DTD HTML//EN"))
121 (setq system-id nil))
124 (setq public-id "-//W3C//DTD XHTML 1.1//EN"))
126 (setq system-id "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")))
129 (setq public-id "-//W3C//DTD XHTML 1.0 Strict//EN"))
131 (setq system-id "http://www.w3.org/TR/xhtml10/DTD/xhtml10-strict.dtd")))
132 (:xhtml10-transitional
134 (setq public-id "-//W3C//DTD XHTML 1.0 Transitional//EN"))
136 (setq system-id "http://www.w3.org/TR/xhtml10/DTD/xhtml10-transitional.dtd")))
139 (setq public-id "-//W3C//DTD XHTML 1.0 Frameset//EN"))
141 (setq system-id "http://www.w3.org/TR/xhtml10/DTD/xhtml10-frameset.dtd")))
142 ((:docbook :docbook42)
144 (setq public-id "-//OASIS//DTD DocBook XML V4.2//EN")
146 (setq system-id "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd")))))
148 (format stream "<!DOCTYPE ~A PUBLIC \"~A\"~A"
151 (format nil " \"~A\"" system-id)
155 (format stream " [~%~A~%]" entities))
156 (write-char #\> stream)
157 (write-char #\newline stream)
160 (defun sgml-header-stream (stream format &key entities (encoding "iso-8859-1") standalone (version "1.0")
161 name public-id system-id)
162 (when (in format :xhtml :xhtml11 :xhtml10-strict :xhtml10-transitional :xhtml10-frameset :xml :docbook)
163 (xml-declaration-stream stream :version version :encoding encoding :standalone standalone))
164 (unless (eq :xml format)
165 (dtd-stream stream format :name name :public-id public-id :system-id system-id