From 1e8aa1df433841c85c5a0b44fbd92964672e18b5 Mon Sep 17 00:00:00 2001 From: "Kevin M. Rosenberg" Date: Fri, 20 Jun 2003 02:21:41 +0000 Subject: [PATCH] r5162: *** empty log message *** --- ChangeLog | 27 +- build.cl | 2 +- debian/rules | 2 +- phtml-test.cl | 2 +- phtml.cl | 374 ++++++++++++++++++++++++--- phtml.htm | 257 ++++++++++++++++++ pxml.htm | 387 ++++++++++++++++++++++++++++ pxml0.cl | 12 +- pxml1.cl | 54 ++-- pxml2.cl | 6 +- pxml3.cl | 3 +- debian/xmlutils.asd => xmlutils.asd | 13 +- 12 files changed, 1050 insertions(+), 89 deletions(-) create mode 100644 phtml.htm create mode 100644 pxml.htm rename debian/xmlutils.asd => xmlutils.asd (87%) diff --git a/ChangeLog b/ChangeLog index 2c39fab..52aed9e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,18 +1,25 @@ -******************************************************************************* -merge from trunk to acl6 branch (for 6.1.beta) -command: ../../join.sh trunk trunk_to_acl6_merge2 trunk_to_acl6_merge3 xmlutils -******************************************************************************* +2003-02-13 Kevin Layer -******************************************************************************* -merge from trunk to acl6 branch -command: ../../join.sh trunk trunk_to_acl6_merge1 trunk_to_acl6_merge2 xmlutils -******************************************************************************* + from jkf: + * phtml.cl: bug13050: parse-html close tag closes consecutive + identical open tags ******************************************************************************* -merge from trunk to acl6 branch -command: ../../join.sh trunk acl6 trunk_to_acl6_merge1 xmlutils +Mon Jun 17 11:29:36 PDT 2002 +merge from trunk to acl62 branch (for 6.2) +command: ../../join.sh trunk acl62 trunk_to_acl62_merge1 xmlutils ******************************************************************************* +2002-05-14 John Foderaro + + * phtml.cl: - add :parse-entities arg to parse-html. If true then + entities are converted to the character they represent. + +2002-04-29 John Foderaro + + * pxml1.cl (check-xmldecl): - if the xml tag specifies an + encoding then set lisp's external format to that encoding. + 2001-06-08 Steve Haflich * pxml.htm: Added mention that it is necessary to load or require diff --git a/build.cl b/build.cl index 0467461..e48e291 100644 --- a/build.cl +++ b/build.cl @@ -1,4 +1,4 @@ -;; $Id: build.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $ +;; $Id: build.cl,v 1.2 2003/06/20 02:21:23 kevin Exp $ (in-package :user) diff --git a/debian/rules b/debian/rules index 1d48b6b..0f7454f 100755 --- a/debian/rules +++ b/debian/rules @@ -42,7 +42,7 @@ install: build dh_clean -k # Add here commands to install the package into debian/xmlutils. dh_installdirs $(clc-systems) $(clc-xmlutils) $(doc-dir) - dh_install debian/xmlutils.asd phtml.cl $(wildcard pxml[0-3].cl) build.cl $(clc-xmlutils) + dh_install xmlutils.asd phtml.cl $(wildcard pxml[0-3].cl) build.cl $(clc-xmlutils) dh_install $(shell echo *.html) $(doc-dir) dh_link $(clc-xmlutils)/xmlutils.asd $(clc-systems)/xmlutils.asd diff --git a/phtml-test.cl b/phtml-test.cl index f852c62..639a0a6 100644 --- a/phtml-test.cl +++ b/phtml-test.cl @@ -19,7 +19,7 @@ ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: phtml-test.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $ +;; $Id: phtml-test.cl,v 1.2 2003/06/20 02:21:23 kevin Exp $ (eval-when (compile load eval) (require :tester)) diff --git a/phtml.cl b/phtml.cl index f763ac4..14cbb3a 100644 --- a/phtml.cl +++ b/phtml.cl @@ -1,3 +1,8 @@ +(sys:defpatch "phtml" 1 + "parse-html close tag closes consecutive identical open tags." + :type :system + :post-loadable t) + ;; copyright (c) 1986-2000 Franz Inc, Berkeley, CA ;; ;; This code is free software; you can redistribute it and/or @@ -19,11 +24,13 @@ ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: phtml.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $ +;; $Id: phtml.cl,v 1.2 2003/06/20 02:21:23 kevin Exp $ ;; phtml.cl - parse html ;; Change Log +;; 05/14/02 - add :parse-entities arg to parse-html. If true then +;; entities are converted to the character they represent. ;; ;; 02/05/01 symbols mapped to preferred case at runtime (as opposed to ;; a compile time macro determining the case mapping) @@ -253,6 +260,266 @@ (not (zerop (logand (svref *characteristics* code) bit)))))) +(defvar *html-entity-to-code* + (let ((table (make-hash-table :test #'equal))) + (dolist (ent '(("nbsp" . 160) + ("iexcl" . 161) + ("cent" . 162) + ("pound" . 163) + ("curren" . 164) + ("yen" . 165) + ("brvbar" . 166) + ("sect" . 167) + ("uml" . 168) + ("copy" . 169) + ("ordf" . 170) + ("laquo" . 171) + ("not" . 172) + ("shy" . 173) + ("reg" . 174) + ("macr" . 175) + ("deg" . 176) + ("plusmn" . 177) + ("sup2" . 178) + ("sup3" . 179) + ("acute" . 180) + ("micro" . 181) + ("para" . 182) + ("middot" . 183) + ("cedil" . 184) + ("sup1" . 185) + ("ordm" . 186) + ("raquo" . 187) + ("frac14" . 188) + ("frac12" . 189) + ("frac34" . 190) + ("iquest" . 191) + ("Agrave" . 192) + ("Aacute" . 193) + ("Acirc" . 194) + ("Atilde" . 195) + ("Auml" . 196) + ("Aring" . 197) + ("AElig" . 198) + ("Ccedil" . 199) + ("Egrave" . 200) + ("Eacute" . 201) + ("Ecirc" . 202) + ("Euml" . 203) + ("Igrave" . 204) + ("Iacute" . 205) + ("Icirc" . 206) + ("Iuml" . 207) + ("ETH" . 208) + ("Ntilde" . 209) + ("Ograve" . 210) + ("Oacute" . 211) + ("Ocirc" . 212) + ("Otilde" . 213) + ("Ouml" . 214) + ("times" . 215) + ("Oslash" . 216) + ("Ugrave" . 217) + ("Uacute" . 218) + ("Ucirc" . 219) + ("Uuml" . 220) + ("Yacute" . 221) + ("THORN" . 222) + ("szlig" . 223) + ("agrave" . 224) + ("aacute" . 225) + ("acirc" . 226) + ("atilde" . 227) + ("auml" . 228) + ("aring" . 229) + ("aelig" . 230) + ("ccedil" . 231) + ("egrave" . 232) + ("eacute" . 233) + ("ecirc" . 234) + ("euml" . 235) + ("igrave" . 236) + ("iacute" . 237) + ("icirc" . 238) + ("iuml" . 239) + ("eth" . 240) + ("ntilde" . 241) + ("ograve" . 242) + ("oacute" . 243) + ("ocirc" . 244) + ("otilde" . 245) + ("ouml" . 246) + ("divide" . 247) + ("oslash" . 248) + ("ugrave" . 249) + ("uacute" . 250) + ("ucirc" . 251) + ("uuml" . 252) + ("yacute" . 253) + ("thorn" . 254) + ("yuml" . 255) + ("fnof" . 402) + ("Alpha" . 913) + ("Beta" . 914) + ("Gamma" . 915) + ("Delta" . 916) + ("Epsilon" . 917) + ("Zeta" . 918) + ("Eta" . 919) + ("Theta" . 920) + ("Iota" . 921) + ("Kappa" . 922) + ("Lambda" . 923) + ("Mu" . 924) + ("Nu" . 925) + ("Xi" . 926) + ("Omicron" . 927) + ("Pi" . 928) + ("Rho" . 929) + ("Sigma" . 931) + ("Tau" . 932) + ("Upsilon" . 933) + ("Phi" . 934) + ("Chi" . 935) + ("Psi" . 936) + ("Omega" . 937) + ("alpha" . 945) + ("beta" . 946) + ("gamma" . 947) + ("delta" . 948) + ("epsilon" . 949) + ("zeta" . 950) + ("eta" . 951) + ("theta" . 952) + ("iota" . 953) + ("kappa" . 954) + ("lambda" . 955) + ("mu" . 956) + ("nu" . 957) + ("xi" . 958) + ("omicron" . 959) + ("pi" . 960) + ("rho" . 961) + ("sigmaf" . 962) + ("sigma" . 963) + ("tau" . 964) + ("upsilon" . 965) + ("phi" . 966) + ("chi" . 967) + ("psi" . 968) + ("omega" . 969) + ("thetasym" . 977) + ("upsih" . 978) + ("piv" . 982) + ("bull" . 8226) + ("hellip" . 8230) + ("prime" . 8242) + ("Prime" . 8243) + ("oline" . 8254) + ("frasl" . 8260) + ("weierp" . 8472) + ("image" . 8465) + ("real" . 8476) + ("trade" . 8482) + ("alefsym" . 8501) + ("larr" . 8592) + ("uarr" . 8593) + ("rarr" . 8594) + ("darr" . 8595) + ("harr" . 8596) + ("crarr" . 8629) + ("lArr" . 8656) + ("uArr" . 8657) + ("rArr" . 8658) + ("dArr" . 8659) + ("hArr" . 8660) + ("forall" . 8704) + ("part" . 8706) + ("exist" . 8707) + ("empty" . 8709) + ("nabla" . 8711) + ("isin" . 8712) + ("notin" . 8713) + ("ni" . 8715) + ("prod" . 8719) + ("sum" . 8721) + ("minus" . 8722) + ("lowast" . 8727) + ("radic" . 8730) + ("prop" . 8733) + ("infin" . 8734) + ("ang" . 8736) + ("and" . 8743) + ("or" . 8744) + ("cap" . 8745) + ("cup" . 8746) + ("int" . 8747) + ("there4" . 8756) + ("sim" . 8764) + ("cong" . 8773) + ("asymp" . 8776) + ("ne" . 8800) + ("equiv" . 8801) + ("le" . 8804) + ("ge" . 8805) + ("sub" . 8834) + ("sup" . 8835) + ("nsub" . 8836) + ("sube" . 8838) + ("supe" . 8839) + ("oplus" . 8853) + ("otimes" . 8855) + ("perp" . 8869) + ("sdot" . 8901) + ("lceil" . 8968) + ("rceil" . 8969) + ("lfloor" . 8970) + ("rfloor" . 8971) + ("lang" . 9001) + ("rang" . 9002) + ("loz" . 9674) + ("spades" . 9824) + ("clubs" . 9827) + ("hearts" . 9829) + ("diams" . 9830) + ("quot" . 34) + ("amp" . 38) + ("lt" . 60) + ("gt" . 62) + ("OElig" . 338) + ("oelig" . 339) + ("Scaron" . 352) + ("scaron" . 353) + ("Yuml" . 376) + ("circ" . 710) + ("tilde" . 732) + ("ensp" . 8194) + ("emsp" . 8195) + ("thinsp" . 8201) + ("zwnj" . 8204) + ("zwj" . 8205) + ("lrm" . 8206) + ("rlm" . 8207) + ("ndash" . 8211) + ("mdash" . 8212) + ("lsquo" . 8216) + ("rsquo" . 8217) + ("sbquo" . 8218) + ("ldquo" . 8220) + ("rdquo" . 8221) + ("bdquo" . 8222) + ("dagger" . 8224) + ("Dagger" . 8225) + ("permil" . 8240) + ("lsaquo" . 8249) + ("rsaquo" . 8250) + ("euro" . 8364) + )) + (setf (gethash (car ent) table) (cdr ent))) + table)) + + + (defstruct tokenbuf cur ;; next index to use to grab from tokenbuf max ;; index one beyond last character @@ -301,7 +568,7 @@ (defun next-token (stream ignore-strings raw-mode-delimiter - read-sequence-func tokenbuf) + read-sequence-func tokenbuf parse-entities) (declare (optimize (speed 3) (safety 1))) ;; return two values: ;; the next token from the stream. @@ -382,6 +649,41 @@ (return) else ; collect a tag (setq state state-readtagfirst)) + elseif (and parse-entities (eq ch #\&)) + then ; reading an entity. entity ends at semicolon + (let (res (max 10)) + (loop (let ((ch (next-char stream))) + (if* (null ch) + then (error "End of file after & entity marker") + elseif (eq ch #\;) + then (return) + elseif (zerop (decf max)) + then (error "No semicolon found after entity starting: &~{~a~}" (nreverse res)) + else (push ch res)))) + (setq res (nreverse res)) + (if* (eq (car res) #\#) + then ; decimal entity + (let ((count 0)) + (dolist (ch (cdr res)) + (let ((code (char-code ch))) + (if* (<= #.(char-code #\0) + code + #.(char-code #\9)) + then (setq count + (+ (* 10 count) + (- code + #.(char-code #\0)))) + else (error "non decimal digit after &# - ~s" ch) + ))) + (add-to-coll coll (code-char count))) + else (let ((name (make-array (length res) + :element-type 'character + :initial-contents res))) + (let ((ch (gethash name *html-entity-to-code*))) + (if* ch + then (add-to-coll coll (code-char ch)) + else (error "No such entity as ~s" name)))))) + else ; we will check for & here eventually (if* (not (eq ch #\return)) then (add-to-coll coll ch)))) @@ -588,7 +890,7 @@ (#.state-readtag (when (null tag-to-return) - (error "unexpected end of input encountered")) + (error "unexpected end of input encountered")) ;; we've read a tag with no attributes (put-back-collector coll) (values tag-to-return @@ -743,17 +1045,19 @@ (defmethod parse-html ((p stream) &key callback-only callbacks collect-rogue-tags - no-body-tags) + no-body-tags + parse-entities) (declare (optimize (speed 3) (safety 1))) (phtml-internal p nil callback-only callbacks collect-rogue-tags - no-body-tags)) + no-body-tags parse-entities)) (defmacro tag-callback (tag) `(rest (assoc ,tag callbacks))) -(defun phtml-internal (p read-sequence-func callback-only callbacks collect-rogue-tags - - no-body-tags) +(defun phtml-internal (p read-sequence-func callback-only + callbacks collect-rogue-tags + no-body-tags + parse-entities) (declare (optimize (speed 3) (safety 1))) (let ((raw-mode-delimiter nil) (pending nil) @@ -767,9 +1071,11 @@ (guts) (rogue-tags) ) - (labels ((close-off-tags (name stop-at collect-rogues) + (labels ((close-off-tags (name stop-at collect-rogues once-only) ;; close off an open 'name' tag, but search no further ;; than a 'stop-at' tag. + #+ignore (format t "close off name ~s, stop at ~s, ct ~s~%" + name stop-at current-tag) (if* (member (tag-name current-tag) name :test #'eq) then ;; close current tag(s) (loop @@ -778,11 +1084,12 @@ *known-tags*))) (push (tag-name current-tag) rogue-tags)) (close-current-tag) - (when (or (member (tag-name current-tag) - *ch-format*) - (not (member - (tag-name current-tag) name :test #'eq))) - (return))) + (if* (or once-only + (member (tag-name current-tag) + *ch-format*) + (not (member + (tag-name current-tag) name :test #'eq))) + then (return))) elseif (member (tag-name current-tag) stop-at :test #'eq) then nil else ; search if there is a tag to close @@ -825,9 +1132,11 @@ (push element guts)))) (save-state () - ;; push the current tag state since we're starting + ;; push the current tag state since we're starting: ;; a new open tag - (push (cons current-tag guts) pending)) + (push (cons current-tag guts) pending) + #+ignore (format t "state saved, pending ~s~%" pending) + ) (strip-rev-pcdata (stuff) @@ -867,15 +1176,15 @@ (if* (eq kind :start-tag) then (push val new-opens) elseif (member val new-opens :test #'eq) then (setf new-opens (remove val new-opens :count 1)) - else (close-off-tags (list val) nil nil) + else (close-off-tags (list val) nil nil nil) ))))) (get-next-token (force) (if* (or force (null (tokenbuf-first-pass tokenbuf))) then (multiple-value-bind (val kind) (next-token p nil raw-mode-delimiter read-sequence-func - tokenbuf) - (values val kind)) + tokenbuf parse-entities) + (values val kind)) else (let ((val (first (tokenbuf-first-pass tokenbuf))) (kind (second (tokenbuf-first-pass tokenbuf)))) @@ -886,7 +1195,8 @@ (loop (multiple-value-bind (val kind) (get-next-token nil) - ;;(format t "val: ~s kind: ~s~%" val kind) + #+ignore (format t "val: ~s kind: ~s last-tag ~s pending ~s~%" val kind + last-tag pending) (case kind (:pcdata (when (or (and callback-only current-callback-tags) @@ -903,7 +1213,7 @@ (when (and (= (length raw-mode-delimiter) 1) ;; xml tag... (or (and callback-only current-callback-tags) (not callback-only))) - (close-off-tags (list last-tag) nil nil)) + (close-off-tags (list last-tag) nil nil t)) (setf raw-mode-delimiter nil) ) @@ -928,7 +1238,7 @@ then "" else "")) elseif (or (eq last-tag :script) - (and (listp last-tag) (eq (first last-tag) :script))) + (and (listp last-tag) (eq (first last-tag) :script))) then (setf raw-mode-delimiter (if* (eq excl:*current-case-mode* :CASE-INSENSITIVE-UPPER) @@ -945,7 +1255,7 @@ (not callback-only)) (if* auto-close then (setq auto-close-stop (tag-auto-close-stop name)) - (close-off-tags auto-close auto-close-stop nil)) + (close-off-tags auto-close auto-close-stop nil nil)) (when (and pending-ch-format (not no-end)) (if* (member name *ch-format* :test #'eq) then nil elseif (member name *in-line* :test #'eq) then @@ -953,7 +1263,7 @@ (check-in-line name) else ;; close ALL pending char tags and then reopen (dolist (this-tag (reverse pending-ch-format)) - (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil)) + (close-off-tags (list (if (listp this-tag) (first this-tag) this-tag)) nil nil nil)) )) (if* no-end then ; this is a singleton tag @@ -995,7 +1305,7 @@ (setf raw-mode-delimiter nil) (when (or (and callback-only current-callback-tags) (not callback-only)) - (close-off-tags (list val) nil nil) + (close-off-tags (list val) nil nil t) (when (member val *ch-format* :test #'eq) (setf pending-ch-format (remove val pending-ch-format :count 1 @@ -1022,7 +1332,7 @@ ;; close off all tags (when (or (and callback-only current-callback-tags) (not callback-only)) - (close-off-tags '(:start-parse) nil collect-rogue-tags)) + (close-off-tags '(:start-parse) nil collect-rogue-tags nil)) (put-back-tokenbuf tokenbuf) (if collect-rogue-tags (return (values (cdar guts) rogue-tags)) @@ -1031,21 +1341,25 @@ (defmethod parse-html (file &key callback-only callbacks collect-rogue-tags - no-body-tags) + no-body-tags parse-entities) (declare (optimize (speed 3) (safety 1))) (with-open-file (p file :direction :input) (parse-html p :callback-only callback-only :callbacks callbacks :collect-rogue-tags collect-rogue-tags - :no-body-tags no-body-tags))) + :no-body-tags no-body-tags + :parse-entities parse-entities + ))) (defmethod parse-html ((str string) &key callback-only callbacks collect-rogue-tags - no-body-tags) + no-body-tags parse-entities) (declare (optimize (speed 3) (safety 1))) (parse-html (make-string-input-stream str) :callback-only callback-only :callbacks callbacks :collect-rogue-tags collect-rogue-tags - :no-body-tags no-body-tags)) + :no-body-tags no-body-tags + :parse-entities parse-entities + )) diff --git a/phtml.htm b/phtml.htm new file mode 100644 index 0000000..4a16083 --- /dev/null +++ b/phtml.htm @@ -0,0 +1,257 @@ + + + +A Lisp Based HTML Parser + + + + + +

A Lisp Based HTML Parser

+ +

Introduction/Simple Example
+LHTML  parse output format
+Case mode notes
+Parsing HTML comments
+Parsing <SCRIPT> and <STYLE> tags
+Parsing SGML <! tags
+Parsing Illegal and Deprecated Tags
+Default Attribute Values
+Parsing Interleaved Character Formatting Tags
+parse-html reference
+   methods
+   phtml-internal

+ +

The parse-html generic function processes HTML +input, returning a list of HTML tags, attributes, and text. Here is a simple example:
+
+(parse-html "<HTML>
+                    +<HEAD>
+                    +<TITLE>Example HTML input</TITLE>
+                    +<BODY>
+                    +<P>Here is some text with a <B>bold</B> word<br>and a <A +HREF=\"help.html\">link</P>
+                    +</HTML>")

+ +

generates:
+
+((:html (:head (:title "Example HTML input"))
+  (:body (:p "Here is some text with a " (:b "bold") " +word" :br "and a "
+                  +((:a :href "help.html") "link")))))
+

+ +

The output format is known as LHTML format; it is the same format that the
+aserve htmlgen macro accepts.
+
+LHTML format
+
+LHTML is a list representation of HTML tags and content.
+
+Each list member may be: + +

    +
  1. a string containing text content, such as "Here is some text with a "
    +
  2. +
  3. a keyword package symbol representing a HTML tag with no associated attributes
    + or content, such as :br.
    +
  4. +
  5. a list representing an HTML tag with associated attributes and/or content,
    + such as (:b "bold") or ((:a :href "help.html") "link"). If + the HTML tag
    + does not have associated attributes, then the first list member will be a
    + keyword package symbol representing the HTML tag, and the other elements will
    + represent the content, which can be a string (text content), a keyword package symbol + (HTML
    + tag with no attributes or content), or list (nested HTML tag with
    + associated attributes and/or content). If there are associated attributes,
    + then the first list member will be a list containing a keyword package symbol
    + followed by two list members for each associated attribute; the first member is a keyword
    + package symbol representing the attribute, and the next member is a string corresponding
    + to the attribute value.
    +
  6. +
+ +

Case Mode and LHTML

+ +

If excl:*current-case-mode* is :CASE-INSENSITIVE-UPPER, keyword package symbols will be
+in upper case; otherwise, they will be in lower case.

+ +

HTML Comments

+ +

HTML comments are represented use a :comment symbol. For example,
+
+(parse-html "<!-- this is a comment-->")
+
+--> ((:comment " this is a comment"))

+ +

HTML <SCRIPT> and <STYLE> tags

+ +

All <SCRIPT> and <STYLE> content is not parsed; it is returned as text +content.
+
+For example,
+
+(parse-html "<SCRIPT>this <B>will not</B> be +parsed</SCRIPT>")
+
+--> ((:script "this <B>will not</B> be parsed"))

+ +

XML and SGML <! tags

+ +

Since, some HTML pages contain special XML/SGML tags, non-comment tags
+starting with '<!' are treated specially:
+
+(parse-html "<!doctype this is some text>")
+
+--> ((:!doctype " this is some text"))

+ +

Illegal and Deprecated HTML

+ +

There is plenty of illegal and deprecated HTML on the web that popular browsers
+nonetheless successfully display. The parse-html parser is generous - it will not
+raise an error condition upon encountering most input. In particular, it does not
+maintain a list of legal HTML tags and will successfully parse nonsense input.
+
+For example,
+
+(parse-html "<this> <is> <some> <nonsense> +<input>")
+
+--> ((:this (:is (:some (:nonsense :input)))))
+
+In some situations, you may prefer a two-pass parse that results in a parse where
+deep nesting related to unrecognized tags is minimized:
+
+(let ((string "<this> <is> <some> <nonsense> </some> +<input>"))
+        (multiple-value-bind (res rogues)
+          (parse-html string +:collect-rogue-tags t)
+            (declare (ignorable +res))
+            (parse-html string +:no-body-tags rogues)))
+
+--> (:this :is (:some (:nonsense)) :input)
+
+See the :collect-rogue-tags and :no-body-tags argument +descriptions in the reference
+section below for more information.

+ +

Default Attribute values

+ +

As per the HTML 4.0 specification, attributes without specified values are given a +lower case
+string value that matches the attribute name.
+
+For example,
+
+(parse-html "<P here ARE some attributes>")
+
+--> (((:p :here "here" :are "are" :some "some" +:attributes "attributes")))

+ +

Interleaved Character Formatting Tags

+ +

Existing HTML pages often have character format tags that are interleaved among
+other tags. Such interleaving is removed in a manner consistent with the HTML 4.0
+specification.
+
+For example,
+
+(parse-html "<P>Here is <B>bold text<P>that spans</B>two +paragraphs")
+
+--> ((:p "Here is " (:b "bold text")) (:p (:b "that +spans") "two paragraphs"))

+ +
+ +

parse-html Reference
+
+parse-html [Generic function]
+
+Arguments: input-source &key callbacks callback-only
+            collect-rogue-tags +no-body-tags parse-entities
+
+Returns LHTML output, as described above.
+
+The callbacks argument, if non-nil, should be an association list. Each list member's
+car (first) element specifies a keyword package symbol, and each list member's cdr (rest)
+element specifies a function object or a symbol naming a function. The function should
+expect one argument. The function will be invoked once for each time the HTML tag
+corresponding to the specified keyword package symbol is encountered in the HTML input; +the
+argument will be an LHTML list containing the tag, along with associated attributes and
+content. The default callbacks argument value is nil.
+
+The callback-only argument, if non-nil, directs parse-html to not generate a complete +LHTML
+output. Instead, LHTML lists will only be generated when necessary as arguments for +functions
+specified in the callbacks association list. This results in faster parser execution. The +default
+callback-only argument value is nil.
+
+The collect-rogue-tags argument, if non-nil, directs parse-html to return an additional +value,
+a list containing any unrecognized tags closed by the end of input.
+
+The no-body-tags argument, if non-nil, should be a list containing unknown tags that, if
+encountered, will be treated as a tag with no body or content, and thus, no associated end
+tag. Typically, the argument is a list or modified list resulting from an earlier +parse-html
+execution with the :collect-rogue-tags argument specified as non-nil.

+ +

If the parse-entities argument is true then entities are converted to the character +they name.  Thus for example the &lt; entity is converted to the less than sign.
+
+parse-html Methods
+
+parse-html (p stream) &key callbacks callback-only
+            collect-rogue-tags +no-body-tags parse-entities
+
+parse-html (str string) &key callbacks callback-only
+            collect-rogue-tags +no-body-tags parse-entities
+
+parse-html (file t) &key callbacks callback-only
+            collect-rogue-tags +no-body-tags parse-entities
+
+The t method assumes the argument is a pathname suitable
+for use with the with-open-file macro.
+
+
+phtml-internal [Function]
+
+Arguments: stream read-sequence-func callback-only callbacks
+collect-rogue-tags no-body-tags parse-entities
+
+This function may be used when more control is needed for supplying
+the HTML input. The read-sequence-func argument, if non-nil, should be a function
+object or a symbol naming a function. When phtml-internal requires another buffer
+of HTML input, it will invoke the read-sequence-func function with two arguments -
+the first argument is an internal buffer character array and the second argument is
+the phtml-internal stream argument. If read-sequence-fun is nil, phtml-internal
+will invoke read-sequence to fill the buffer. The read-sequence-func function must
+return the number of character array elements successfully stored in the buffer.
+
+
+
+
+
+
+
+

+ + diff --git a/pxml.htm b/pxml.htm new file mode 100644 index 0000000..2cf26d5 --- /dev/null +++ b/pxml.htm @@ -0,0 +1,387 @@ + + + +A Lisp Based XML Parser + + + + + +

A Lisp Based XML Parser

+ +

Introduction/Simple Example
+LXML parse output format
+parse-xml non-validating parser properties
+case and international character support issues
+parse-xml and packages
+parse-xml, the XML Namespace specification, and packages
+ACL does not support Unicode 4 byte scalar values
+only little-endian Unicode tested in ACL 6.0 beta
+debugging aids
+XML Conformance test results
+Compiling and Loading the parser
+parse-xml reference

+ +

The parse-xml generic function processes XML +input, returning a list of XML tags,
+attributes, and text. Here is a simple example:
+
+(parse-xml "<item1><item2 att1='one'/>this is some +text</item1>")
+
+-->
+
+((item1 ((item2 att1 "one")) "this is some text"))
+
+The output format is known as LXML format.
+
+LXML Format
+
+LXML is a list representation of XML tags and content.
+
+Each list member may be:
+
+a. a string containing text content, such as "Here is some text with a "
+
+b. a list representing a XML tag with associated attributes and/or content, +such as ('item1 "text") or (('item1 :att1 "help.html") +"link"). If the XML tag +does not have associated attributes, then the first list member will be a +symbol representing the XML tag, and the other elements will +represent the content, which can be a string (text content), a symbol (XML +tag with no attributes or content), or list (nested XML tag with +associated attributes and/or content). If there are associated attributes, +then the first list member will be a list containing a symbol +followed by two list members for each associated attribute; the first member is a +symbol representing the attribute, and the next member is a string corresponding +to the attribute value.
+
+c. XML comments and or processing instructions - see the more detailed example below for +further information.

+ +

Non Validating Parser Properties

+ +

Parse-xml is a non-validating XML parser. It will detect non-well-formed XML input. +When
+processing valid XML input, parse-xml will optionally produce the same output as a +validating
+parser would, including the processing of an external DTD subset and external entity +declarations.
+
+By default, parse-xml outputs a DTD parse along with the parsed XML contents. The DTD +parse may
+be optionally suppressed. The following example shows DTD parsed output components:

+ +

(defvar *xml-example-external-url*
+   "<!ENTITY ext1 'this is some external entity %param1;'>")
+
+(defun example-callback (var-name token &optional public)
+  (declare (ignorable token public))
+  (setf var-name (uri-path var-name))
+  (if* (equal var-name "null") then nil
+    else
+      (let ((string (eval (intern var-name (find-package +:user)))))
+      (make-string-input-stream string))))
+
+(defvar *xml-example-string*
+"<?xml version='1.0' encoding='utf-8'?>
+<!-- the following XML input is well-formed but its validity has not been checked ... +-->
+<?piexample this is an example processing instruction tag ?>
+<!DOCTYPE example SYSTEM '*xml-example-external-url*' [
+   <!ELEMENT item1 (item2* | (item3+ , item4))>
+   <!ELEMENT item2 ANY>
+   <!ELEMENT item3 (#PCDATA)>
+   <!ELEMENT item4 (#PCDATA)>
+   <!ATTLIST item1
+      att1 CDATA #FIXED 'att1-default'
+      att2 ID #REQUIRED
+      att3 ( one | two | three ) 'one'
+      att4 NOTATION ( four | five ) 'four' >
+   <!ENTITY % param1 'text'>
+   <!ENTITY nentity SYSTEM 'null' NDATA somedata>
+   <!NOTATION notation SYSTEM 'notation-processor'>
+   ]>
+<item1 att2='1'><item3>&ext1;</item3></item1>")
+
+(pprint (parse-xml *xml-example-string* :external-callback 'example-callback))
+
+-->
+
+((:xml :version "1.0" :encoding "utf-8")
+  (:comment " the following XML input is well-formed but may or may not be valid +")
+  (:pi :piexample "this is an example processing instruction tag ")
+  (:DOCTYPE :example
+    (:[ (:ELEMENT :item1 (:choice (:* :item2) (:seq (:+ :item3) :item4)))
+        (:ELEMENT :item2 :ANY)
+        (:ELEMENT :item3 :PCDATA) (:ELEMENT :item4 +:PCDATA)
+        (:ATTLIST item1 (att1 :CDATA :FIXED +"att1-default") (att2 :ID :REQUIRED)
+             (att3 +(:enumeration :one :two :three) "one")
+             (att4 (:NOTATION +:four :five) "four"))
+        (:ENTITY :param1 :param "text")
+        (:ENTITY :nentity :SYSTEM "null" +:NDATA :somedata)
+        (:NOTATION :notation :SYSTEM +"notation-processor"))
+    (:external (:ENTITY :ext1 "this is some external entity +text")))
+   ((item1 att1 "att1-default" att2 "1" att3 "one" +att4 "four")
+       (item3 "this is some external entity +text")))
+
+
+Usage Notes
+
+

    +
  1. The parse-xml function has been primarily compiled and tested in a +modern ACL. However, in an ANSI Lisp with wide character support, it DOES pass the valid +component of the conformance suite in the same manner as it does in a Modern Lisp. The +parser's successful operation in all potential situations depends on wide character support. +

    +
  2. +
  3. The parser uses the keyword package for DTD tokens and other +special XML tokens. Since element and attribute token symbols are usually interned +in the current package, it is not recommended to execute parse-xml +when the current package is the keyword package. +

    +
  4. +
  5. The XML parser supports the XML Namespaces specification. The +parser recognizes a "xmlns" attribute and attribute names starting with +"xmlns:". +As per the specification, the parser expects that the associated value +is an URI string. The parser then associates XML Namespace prefixes with a +Lisp package provided via the parse-xml :uri-to-package option or, if +necessary, a package created on the fly. The following example demonstrates +this behavior:
    + +

    (setf *xml-example-string4*
    +   "<bibliography
    +      xmlns:bib='http://www.bibliography.org/XML/bib.ns'
    +      xmlns='urn:com:books-r-us'>
    +   <bib:book owner='Smith'>
    +      <bib:title>A Tale of Two Cities</bib:title>
    +      <bib:bibliography
    +         xmlns:bib='http://www.franz.com/XML/bib.ns'
    +         xmlns='urn:com:books-r-us'>
    +      <bib:library branch='Main'>UK +Library</bib:library>
    +      <bib:date calendar='Julian'>1999</bib:date>
    +      </bib:bibliography>
    +   <bib:date calendar='Julian'>1999</bib:date>
    +   </bib:book>
    +</bibliography>")
    +
    +(setf *uri-to-package* nil)
    +(setf *uri-to-package*
    +   (acons (parse-uri "http://www.bibliography.org/XML/bib.ns")
    +      (make-package "bib") *uri-to-package*))
    +(setf *uri-to-package*
    +   (acons (parse-uri "urn:com:books-r-us")
    +      (make-package "royal") *uri-to-package*))
    +(setf *uri-to-package*
    +   (acons (parse-uri "http://www.franz.com/XML/bib.ns")
    +      (make-package "franz-ns") *uri-to-package*))
    +(pprint (multiple-value-list
    +             (parse-xml +*xml-example-string4*
    +                  :uri-to-package +*uri-to-package*)))
    +
    +-->
    +((((bibliography |xmlns:bib| "http://www.bibliography.org/XML/bib.ns"
    +     xmlns "urn:com:books-r-us")
    +    "
    +    "
    +   ((bib::book royal::owner "Smith") "
    +        " (bib::title "A Tale of Two +Cities") "
    +        "
    +    ((bib::bibliography royal::|xmlns:bib|
    +      "http://www.franz.com/XML/bib.ns" royal::xmlns
    +      "urn:com:books-r-us")
    +     "
    +         " ((franz-ns::library royal::branch +"Main") "UK Library") "
    +         " ((franz-ns::date royal::calendar +"Julian") "1999") "
    +         ")
    +     "
    +         " ((bib::date royal::calendar +"Julian") "1999") "
    +         ")
    +     "
    +         "))
    +((#<uri http://www.franz.com/XML/bib.ns> . #<The franz-ns package>)
    +  (#<uri urn:com:books-r-us> . #<The royal package>)
    +  (#<uri http://www.bibliography.org/XML/bib.ns> . #<The bib package>)))
    +
    +

  6. +
  7. In the absence of XML Namespace attributes, element and attribute symbols are interned +in the current package. Note that this implies that attributes and elements referenced +in DTD content will be interned in the current package. +
  8. +
  9. The parse-xml function has been tested using the OASIS conformance test suite (see +details below). The test suite has wide coverage across possible XML and DTD syntax, +but there may be some syntax paths that have not yet been tested or completely +supported. Here is a list of currently known syntax parsing issues: +
      +
    • ACL does not support 4 byte Unicode scalar values, so +input containing such data +will not be processed correctly. (Note, however, that parse-xml does correctly detect +and process wide Unicode input.) +
    • +
    • The OASIS tests that contain wide Unicode all use a +little-endian encoded Unicode. +Changes to the unicode-check function are required to also support big-endian encoded +Unicode. (Note also that this issue may be resolved by an ACL 6.0 final release change.) +
    • +
    • An initial <?xml declaration in external entity files is skipped without a check +being made to see if the <?xml declaration is itself incorrect. +
    • +
    +
  10. +
  11. When investigating possible parser errors or examining more closely +where the parser +determined that the input was non-well-formed, the net.xml.parser internal symbols +*debug-xml* and *debug-dtd* are useful. When not bound to nil, these variables cause +lexical analysis and intermediate parsing results to be output to *standard-output*. +
  12. +
  13. It is necessary to load the pxml module before using it. +Typically this can be done by evaluating (require :pxml). +
  14. +
+XML Conformance Test Suite
+
+Using the OASIS test suite (http://www.oasis-open.org), +here are the current parse-xml results:
+
+xmltest/invalid:    Not tested, since parse-xml is a non-validating parser
+
+not-wf/
+
+    ext.sa: 3 tests; all pass
+    not-sa: 8 tests; all pass
+    sa: 186 tests; the following fail:
+
+        170.xml: fails because ACL does not support 4 +byte Unicode scalar values
+
+valid/
+
+    ext-sa: 14 tests; all pass
+    not-sa: 31 tests; all pass
+    sa: 119 tests: the following fail:
+
+        052.xml, 064.xml, 089.xml: fails because ACL +does not support 4 byte
+                    +Unicode scalar values
+
+Compiling and Loading
+
+Load build.cl into a modern ACL session will result in a pxml.fasl file that can +subsequently be
+loaded in a modern ACL to provide XML parsing functionality.
+
+-------------------------------------------------------------------------------------------
+
+parse-xml reference
+
+parse-xml            [Generic +function]
+
+Arguments: input-source &key external-callback content-only
+            general-entities +parameter-entities
+            uri-to-package
+
+Returns multiple values:
+
    +
  1. LXML and parsed DTD output, as described above.
  2. +
  3. An association list containing the uri-to-package argument conses (if any) +and conses associated with any XML Namespace packages created during the +parse (see uri-to-package argument description, below).
  4. +
+The external-callback argument, if specified, is a function object or symbol +that parse-xml will execute when encountering an external DTD subset +or external entity DTD declaration. Here is an example which shows that +arguments the function should expect, and the value it should return: +
+(defun file-callback (uri-object token &optional public)
+  ;; The uri-object is an ACL URI object created from
+  ;; the XML input. In this example, this function
+  ;; assumes that all uri's will be file specifications.
+  ;;
+  ;; The token argument identifies what token is associated
+  ;; with the external parse (for example :DOCTYPE for external
+  ;; DTD subset
+  ;;
+  ;; The public argument contains the associated PUBLIC string,
+  ;; when present
+  ;;
+  (declare (ignorable token public))
+  ;; An open stream is returned on success,
+  ;; a nil return value indicates that the external
+  ;; parse should not occur.
+  ;; Note that parse-xml will close the open stream before exiting.
+  (ignore-errors (open (uri-path uri-object))))
+
+

+The general-entities argument is an association list containing general entity symbol +and replacement text pairs. The entity symbols should be in the keyword package. +Note that this option may be useful in generating desirable parse results in +situations where you do not wish to parse external entities or the external DTD subset. +

+The parameter-entities argument is an association list containing parameter entity symbol +and replacement text pairs. The entity symbols should be in the keyword package. +Note that this option may be useful in generating desirable parse results in +situations where you do not wish to parse external entities or the external DTD subset. +

+The uri-to-package argument is an association list containing uri objects and package +objects. Typically, the uri objects correspond to XML Namespace attribute values, and +the package objects correspond to the desired package for interning symbols associated +with the uri namespace. If the parser encounters an uri object not contained in this list, +it will generate a new package. The first generated package will be named +net.xml.namespace.0, +the second will be named net.xml.namespace.1, and so on. +

parse-xml methods

+
+(parse-xml (p stream) &key
+                      external-callback content-only
+                      general-entities
+                      parameter-entities
+                      uri-to-package)
+
+(parse-xml (str string) &key
+                        external-callback content-only
+                        general-entities
+                        parameter-entities
+                        uri-to-package)
+
+An easy way to parse a file containing XML input: +
+(with-open-file (p "example.xml")
+  (parse-xml p :content-only p))
+
+

net.xml.parser unexported special variables:

+

+*debug-xml*
+
+When true, parse-xml generates XML lexical state and intermediary +parse result debugging output. +

+*debug-dtd*
+
+When true, parse-xml generates DTD lexical state and intermediary +parse result debugging output. + + diff --git a/pxml0.cl b/pxml0.cl index a09f33a..93ba248 100644 --- a/pxml0.cl +++ b/pxml0.cl @@ -19,8 +19,6 @@ ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: pxml0.cl,v 1.3 2002/10/16 03:45:52 kevin Exp $ - ;; pxml.cl - parse xml ;; ;; Change Log @@ -38,12 +36,6 @@ (in-package :net.xml.parser) - -#-allegro -(eval-when (:compile-toplevel :load-toplevel :execute) - (defvar excl::*dribble-bug-hooks* nil) -#+ignore (export '*dribble-bug-hooks* 'excl)) - (unless (fboundp 'pxml-dribble-bug-hook) (let ((pxml-version-strings nil)) (defun pxml-dribble-bug-hook (stream-or-string) @@ -53,9 +45,9 @@ do (write-string string stream-or-string) (terpri stream-or-string)))) - (push 'pxml-dribble-bug-hook excl::*dribble-bug-hooks*))) + (push 'pxml-dribble-bug-hook excl:*dribble-bug-hooks*))) -(funcall 'pxml-dribble-bug-hook "$Id: pxml0.cl,v 1.3 2002/10/16 03:45:52 kevin Exp $") +(funcall 'pxml-dribble-bug-hook "$Id: pxml0.cl,v 1.4 2003/06/20 02:21:23 kevin Exp $") (defun xml-char-p (char) (declare (optimize (speed 3) (safety 1))) diff --git a/pxml1.cl b/pxml1.cl index d7c9252..cc6df9d 100644 --- a/pxml1.cl +++ b/pxml1.cl @@ -19,15 +19,13 @@ ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: pxml1.cl,v 1.2 2002/10/16 03:45:52 kevin Exp $ - -;; Change Log +;; Change Log ;; ;; 10/14/00 add namespace support; xml-error fix (in-package :net.xml.parser) -(pxml-dribble-bug-hook "$Id: pxml1.cl,v 1.2 2002/10/16 03:45:52 kevin Exp $") +(pxml-dribble-bug-hook "$Id: pxml1.cl,v 1.3 2003/06/20 02:21:23 kevin Exp $") (defparameter *collectors* (list nil nil nil nil nil nil nil nil)) @@ -271,14 +269,14 @@ (declare (ignorable tokenbuf) (optimize (speed 3) (safety 1))) ;; need no-OO check because external format support isn't completely done yet (when (not (typep p 'string-input-simple-stream)) - #+(and allegro (version>= 6 0 pre-final 1)) + #+(version>= 6 0 pre-final 1) (let ((format (ignore-errors (excl:sniff-for-unicode p)))) (if* (eq format (find-external-format :unicode)) then - #+allegro (setf (stream-external-format p) format) + (setf (stream-external-format p) format) else - #+allegro (setf (stream-external-format p) (find-external-format :utf8)))) - #-(and allegro (version>= 6 0 pre-final 1)) + (setf (stream-external-format p) (find-external-format :utf8)))) + #-(version>= 6 0 pre-final 1) (let* ((c (read-char p nil)) c2 (c-code (if c (char-code c) nil))) (if* (eq #xFF c-code) then @@ -286,12 +284,12 @@ (setf c-code (if c (char-code c2) nil)) (if* (eq #xFE c-code) then (format t "set unicode~%") - #+allegro (setf (stream-external-format p) - (find-external-format #+(and allegro (version>= 6 0 pre-final 1)) :unicode - #-(and allegro (version>= 6 0 pre-final 1)) :fat-little)) + (setf (stream-external-format p) + (find-external-format #+(version>= 6 0 pre-final 1) :unicode + #-(version>= 6 0 pre-final 1) :fat-little)) else (xml-error "stream has incomplete Unicode marker")) - else #+allegro (setf (stream-external-format p) + else (setf (stream-external-format p) (find-external-format :utf8)) (when c (push c (iostruct-unget-char tokenbuf)) @@ -411,7 +409,7 @@ (xml-error "XML declaration tag does not include correct 'encoding' or 'standalone' attribute")) (when (and (fourth val) (string= "standalone" (symbol-name (fourth val)))) (if* (equal (fifth val) "yes") then - (setf (iostruct-standalonep tokenbuf) t) + (setf (iostruct-standalonep tokenbuf) t) elseif (not (equal (fifth val) "no")) then (xml-error "XML declaration tag does not include correct 'standalone' attribute value"))) (dotimes (i (length (third val))) @@ -421,16 +419,26 @@ (not (member c '(#\. #\_ #\- #\:))) ) (xml-error "XML declaration tag does not include correct 'version' attribute value")))) - (when (and (fourth val) (eql :encoding (fourth val))) - (dotimes (i (length (fifth val))) - (let ((c (schar (fifth val) i))) - (when (and (not (alpha-char-p c)) - (if* (> i 0) then - (and (not (digit-char-p c)) - (not (member c '(#\. #\_ #\-)))) - else t)) - (xml-error "XML declaration tag does not include correct 'encoding' attribute value"))))) - ) + (if* (and (fourth val) (eql :encoding (fourth val))) + then (dotimes (i (length (fifth val))) + (let ((c (schar (fifth val) i))) + (when (and (not (alpha-char-p c)) + (if* (> i 0) then + (and (not (digit-char-p c)) + (not (member c '(#\. #\_ #\-)))) + else t)) + (xml-error "XML declaration tag does not include correct 'encoding' attribute value")))) + ;; jkf 3/26/02 + ;; if we have a stream we're reading from set its external-format + ;; to the encoding + ;; note - tokenbuf is really an iostruct, not a tokenbuf + (if* (tokenbuf-stream (iostruct-tokenbuf tokenbuf)) + then (setf (stream-external-format + (tokenbuf-stream (iostruct-tokenbuf tokenbuf))) + (find-external-format (fifth val)))) + + + )) (defun xml-error (text) (declare (optimize (speed 3) (safety 1))) diff --git a/pxml2.cl b/pxml2.cl index 27e2bf7..08483bf 100644 --- a/pxml2.cl +++ b/pxml2.cl @@ -19,15 +19,13 @@ ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $ - -;; Change Log +;; Change Log ;; ;; 10/14/00 add namespace support (in-package :net.xml.parser) -(pxml-dribble-bug-hook "$Id: pxml2.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $") +(pxml-dribble-bug-hook "$Id: pxml2.cl,v 1.2 2003/06/20 02:21:23 kevin Exp $") ;; state titles can be better chosen and explained diff --git a/pxml3.cl b/pxml3.cl index aefd3d3..bce6582 100644 --- a/pxml3.cl +++ b/pxml3.cl @@ -19,11 +19,10 @@ ;; version) or write to the Free Software Foundation, Inc., 59 Temple Place, ;; Suite 330, Boston, MA 02111-1307 USA ;; -;; $Id: pxml3.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $ (in-package :net.xml.parser) -(pxml-dribble-bug-hook "$Id: pxml3.cl,v 1.1 2002/10/15 12:23:03 kevin Exp $") +(pxml-dribble-bug-hook "$Id: pxml3.cl,v 1.2 2003/06/20 02:21:23 kevin Exp $") (defvar *debug-dtd* nil) diff --git a/debian/xmlutils.asd b/xmlutils.asd similarity index 87% rename from debian/xmlutils.asd rename to xmlutils.asd index b09bdd3..f57bc25 100644 --- a/debian/xmlutils.asd +++ b/xmlutils.asd @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Sep 2002 ;;;; -;;;; $Id: xmlutils.asd,v 1.4 2002/11/08 16:51:40 kevin Exp $ +;;;; $Id: xmlutils.asd,v 1.1 2003/06/20 02:21:23 kevin Exp $ ;;;; ;;;; This file, part of cl-xmlutils, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -16,7 +16,9 @@ ;;;; (http://www.gnu.org/licenses/lgpl.html) ;;;; ************************************************************************* -(in-package :asdf) +(in-package #:cl-user) +(defpackage #:xmlutils-system (:use #:asdf #:cl)) +(in-package #:xmlutils-system) #-allegro (require :acl-compat) @@ -25,7 +27,7 @@ (defvar system::*stack-overflow-behavior* :warn) (setq system::*stack-overflow-behavior* :warn)) -(defsystem :xmlutils +(defsystem xmlutils :name "cl-xmlutils" :author "Franz, Inc" :maintainer "Kevin M. Rosenberg " @@ -33,9 +35,6 @@ :description "Franz's Test Harness Package" :long-description "Xmlutils provides a library for parsing HTML and XML documents." - :perform (load-op :after (op xmlutils) - (pushnew :xmlutils cl:*features*)) - :components ((:file "phtml") (:file "pxml0") @@ -44,6 +43,6 @@ (:file "pxml3" :depends-on ("pxml2")) )) -(defmethod source-file-type ((c cl-source-file) (s (eql (find-system :xmlutils)))) +(defmethod source-file-type ((c cl-source-file) (s (eql (find-system 'xmlutils)))) "cl") -- 2.34.1