1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: xml-utils.lisp
6 ;;;; Purpose: XML utilities
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
10 ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg
12 ;;;; KMRCL users are granted the rights to distribute and use this software
13 ;;;; as governed by the terms of the Lisp Lesser GNU Public License
14 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
15 ;;;; *************************************************************************
20 ;;; XML Extraction Functions
22 (defun find-start-tag (tag taglen xmlstr start end)
23 "Searches for the start of a tag in an xmlstring. Returns STARTPOS ATTRIBUTE-LIST)"
24 (declare (simple-string tag xmlstr)
25 (fixnum taglen start end)
26 (optimize (speed 3) (safety 0) (space 0)))
27 (do* ((search-str (concatenate 'string "<" tag))
28 (search-len (1+ taglen))
29 (bracketpos (fast-string-search search-str xmlstr search-len start end)
30 (fast-string-search search-str xmlstr search-len start end)))
31 ((null bracketpos) nil)
32 (let* ((endtag (+ bracketpos 1 taglen))
33 (char-after-tag (schar xmlstr endtag)))
34 (when (or (char= #\> char-after-tag)
35 (char= #\space char-after-tag))
36 (if (char= #\> char-after-tag)
37 (return-from find-start-tag (values (1+ endtag) nil))
38 (let ((endbrack (position-char #\> xmlstr (1+ endtag) end)))
40 (return-from find-start-tag
42 (string-to-list-skip-delimiter
43 (subseq xmlstr endtag endbrack))))
45 (setq start endtag))))
48 (defun find-end-tag (tag taglen xmlstr start end)
50 (concatenate 'string "</" tag ">") xmlstr
51 (+ taglen 3) start end))
53 (defun positions-xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
54 (end-xmlstr (length xmlstr)))
55 "Returns three values: the start and end positions of contents between
56 the xml tags and the position following the close of the end tag."
57 (let* ((taglen (length tag)))
58 (multiple-value-bind (start attributes)
59 (find-start-tag tag taglen xmlstr start-xmlstr end-xmlstr)
61 (return-from positions-xml-tag-contents (values nil nil nil nil)))
62 (let ((end (find-end-tag tag taglen xmlstr start end-xmlstr)))
64 (return-from positions-xml-tag-contents (values nil nil nil nil)))
65 (values start end (+ end taglen 3) attributes)))))
68 (defun xml-tag-contents (tag xmlstr &optional (start-xmlstr 0)
69 (end-xmlstr (length xmlstr)))
70 "Returns two values: the string between XML start and end tag
71 and position of character following end tag."
73 (startpos endpos nextpos attributes)
74 (positions-xml-tag-contents tag xmlstr start-xmlstr end-xmlstr)
75 (if (and startpos endpos)
76 (values (subseq xmlstr startpos endpos) nextpos attributes)
77 (values nil nil nil))))
79 (defun cdata-string (str)
80 (concatenate 'string "<![CDATA[" str "]]>"))
82 (defun write-cdata (str s)
83 (declare (simple-string str) (optimize (speed 3) (safety 0) (space 0)))
84 (do ((len (length str))
87 (declare (fixnum i len))
88 (let ((c (schar str i)))
90 (#\< (write-string "<" s))
91 (#\& (write-string "&" s))
92 (t (write-char c s))))))
94 (defun xml-declaration-stream (stream &key (version "1.0") standalone encoding)
95 (format stream "<?xml version=\"~A\"~A~A ?>~%"
98 (format nil " encoding=\"~A\"" encoding)
102 (format nil " standalone=\"~A\"" standalone)
105 (defun doctype-stream (stream top-element availability registered organization type
106 label language url entities)
107 (format stream "<!DOCTYPE ~A ~A \"~A//~A//~A ~A//~A\"" top-element
108 availability (if registered "+" "-") organization type label language)
111 (write-char #\space stream)
112 (write-char #\" stream)
113 (write-string url stream)
114 (write-char #\" stream))
117 (format stream " [~%~A~%]" entities))
119 (write-char #\> stream)
120 (write-char #\newline stream))
122 (defun doctype-format (stream format &key top-element (availability "PUBLIC")
123 (registered nil) organization (type "DTD") label
124 (language "EN") url entities)
127 (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.1" language
128 (if url url "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")
131 (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Strict" language
132 (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-strict.dtd")
134 (:xhtml10-transitional
135 (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Transitional" language
136 (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-transitional.dtd")
139 (doctype-stream stream "html" availability registered "W3C" type "XHTML 1.0 Frameset" language
140 (if url url "http://www.w3.org/TR/xhtml10/DTD/xhtml10-frameset.dtd")
143 (doctype-stream stream "HTML" availability registered "IETF" type "HTML" language url entities))
145 (doctype-stream stream "HTML" availability registered "IETF" type "HTML 3.0" language url entities))
147 (doctype-stream stream "HTML" availability registered "W3C" type "HTML 3.2 Final" language url entities))
149 (doctype-stream stream "HTML" availability registered "W3C" type "HTML 4.01 Final" language url entities))
150 ((:docbook :docbook42)
151 (doctype-stream stream (if top-element top-element "book")
152 availability registered "OASIS" type "Docbook XML 4.2" language
153 (if url url "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd")
156 (unless top-element (warn "Missing top-element in doctype-format"))
157 (unless organization (warn "Missing organization in doctype-format"))
158 (unless label (warn "Missing label in doctype-format"))
159 (doctype-stream stream top-element availability registered organization type label language url
163 (defun sgml-header-stream (format stream &key entities (encoding "iso-8859-1") standalone (version "1.0")
164 top-element (availability "PUBLIC") registered organization (type "DTD")
165 label (language "EN") url)
166 (when (in format :xhtml :xhtml11 :xhtml10-strict :xhtml10-transitional :xhtml10-frameset :xml :docbook)
167 (xml-declaration-stream stream :version version :encoding encoding :standalone standalone))
168 (unless (eq :xml format)
169 (doctype-format stream format :top-element top-element
170 :availability availability :registered registered
171 :organization organization :type type :label label :language language
172 :url url :entities entities))