From: Kevin M. Rosenberg Date: Tue, 4 Sep 2012 23:09:48 +0000 (-0600) Subject: Release 0.9.0 X-Git-Tag: debian-0.9.0-1~1 X-Git-Url: http://git.kpe.io/?p=cl-rss.git;a=commitdiff_plain;h=dc76abe52ed6323608c8d01616a64b0204498c5e Release 0.9.0 --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..be303db --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.fasl diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..592ad3b --- /dev/null +++ b/ChangeLog @@ -0,0 +1,8 @@ +2012-09-04 Kevin Rosenberg + * Version 0.9.0 + * Commit patch from Rupert Swarbrick: Parse RSS version 2 and + export slot readers. This change allows CL-RSS to parse RSS + version 2 feeds as well as 0.9x feeds. Channels gain four slots: + language, image, pub-date and last-build-date. Items gain a + description and pub-date. The readers for these slot values are + now exported from the package. diff --git a/debian/changelog b/debian/changelog index e4e9199..bd9fdfd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +cl-rss (0.9.0-1) unstable; urgency=low + + * New upstream: supports RSS 2 + + -- Kevin M. Rosenberg Tue, 04 Sep 2012 16:09:40 -0600 + cl-rss (0.1.1-6) unstable; urgency=low * Convert to dh_lisp diff --git a/main.lisp b/main.lisp index 57e6537..44fd788 100644 --- a/main.lisp +++ b/main.lisp @@ -12,15 +12,30 @@ (in-package #:rss) -(defclass rss-0.9x-channel () - ((title :accessor title :initform nil) - (link :accessor link :initform nil) - (description :accessor description) +(defclass rss-channel () + ((title :reader title) + (link :reader link) + (description :reader description :initform nil) + (language :reader language :initform nil) + (image :reader image :initform nil) + (pub-date :reader pub-date :initform nil) + (last-build-date :reader last-build-date :initform nil) + (items :accessor items :initform nil))) -(defclass rss-0.9x-item () - ((title :accessor title :initform nil ) - (link :accessor link :initform nil))) +(defclass rss-item () + ((title :reader title) + (link :reader link) + (description :reader description :initform nil) + (pub-date :reader pub-date :initform nil))) + +(defclass rss-image () + ((url :reader url) + (title :reader title) + (link :reader link) + (width :reader width :initform nil) + (height :reader height :initform nil) + (description :reader description :initform nil))) (defvar *sites* '("http://www.cliki.net/recent-changes.rdf")) @@ -41,61 +56,166 @@ (declare (ignore true-uri headers)) (when (eql 200 response) (with-input-from-string (strm body) - (parse-rss-0.9x-stream strm))))) + (parse-rss-stream strm))))) -(defun parse-rss-0.9x-file (file) +(defun parse-rss-file (file) (with-open-file (stream file :direction :input) - (parse-rss-0.9x-stream stream))) - -(defun is-rss-version-supported (attributes) - (awhen (position "version" attributes :key #'car :test #'string=) - (let ((version (second (nth it attributes)))) - (= 4 (length version)) - (string= "0.9" (subseq version 0 3))))) - -(defun parse-rss-0.9x-stream (stream) - (let* ((*package* (find-package 'kmrcl)) - (tree (remove-from-tree-if - (lambda (x) (and (stringp x) (is-string-whitespace x))) - (xmls:parse stream :compress-whitespace t)))) - (unless (and (string= "rss" (first tree)) - (is-rss-version-supported (second tree))) - (return-from parse-rss-0.9x-stream nil)) - (let* ((content (third tree)) - (pos 0) - (len (length content)) - (rss (make-instance 'rss-0.9x-channel))) - (when (string= "channel" (nth pos content)) - (incf pos) - (while (and (< pos len) - (or (string= "title" (car (nth pos content))) - (string= "link" (car (nth pos content))) - (string= "description" (car (nth pos content))))) - (let ((slot (nth pos content))) - (cond - ((string= "title" (car slot)) - (setf (title rss) (second slot))) - ((string= "link" (car slot)) - (setf (link rss) (second slot))) - ((string= "description" (car slot)) - (setf (description rss) (second slot))))) - (incf pos))) - (while (< pos len) - (when (string= "item" (car (nth pos content))) - (let ((item (make-instance 'rss-0.9x-item))) - (dolist (pair (cdr (nth pos content))) - (cond - ((string= "title" (car pair)) - (setf (title item) (second pair))) - ((string= "link" (car pair)) - (setf (link item) (second pair))))) - (push item (items rss)))) - (incf pos)) - (setf (items rss) (nreverse (items rss))) - rss))) - - - - - - + (parse-rss-stream stream))) + +(defun is-rss-version-supported (version-string) + (and (member version-string '("0.91" "0.92" "2.0") :test #'string=) t)) + +(define-condition rss-parse-error (error) + ((msg :initarg :msg :reader msg)) + (:documentation "Thrown when PARSE-RSS-STREAM encounters invalid RSS data.") + (:report + (lambda (condition stream) + (format stream "Parse error reading RSS~@[. ~A~]" (msg condition))))) + +(define-condition rss-version-unsupported (rss-parse-error) + ((version :initarg :version :reader version)) + (:documentation + "Thrown when PARSE-RSS-STREAM encounters RSS of a version it doesn't +recognise.") + (:report + (lambda (condition stream) + (format stream "Unexpected RSS version: ~S" (version condition))))) + +(defmacro string=-case (keyform (&rest cases) &optional &body otherwise) + "A version of CASE that tests using string=." + (let ((key (gensym)) (expected)) + `(let ((,key ,keyform)) + (cond + ,@(mapcar + (lambda (form) + (destructuring-bind (string &body body) form + (unless (stringp string) + (error "Can only deal with strings as keys.")) + (push string expected) + `((string= ,string ,key) ,@body))) + cases) + (t + ,@otherwise))))) + +(defun setf-unique-slot (object name value) + "Set the slot with the given NAME in OBJECT to VALUE, throwing an error if it +was already set." + (when (and (slot-boundp object name) (slot-value object name)) + (error 'rss-parse-error + :msg (format nil "<~A> should only be specified once in the node." + name))) + (setf (slot-value object name) value)) + +(defun setf-unique-string (object name node) + "Set the slot with the given NAME in OBJECT to the string contents of NODE, +throwing an error if it was already set, or if they aren't a string. Used for +elements like and <link>, which shouldn't crop up twice." + (let ((string (car (xmls:xmlrep-children node)))) + (unless (stringp string) + (error 'rss-parse-error + :msg (format nil "Got ~A when expecting string for contents of <~A>" + string name))) + (setf-unique-slot object name string))) + +(defun ensure-string-slots-filled (object required-slots strict?) + "For each slot in REQUIRED-SLOTS, if it is unbound in OBJECT, set it to the +empty string if STRICT? is NIL, or throw an error if true." + (dolist (slot required-slots) + (unless (and (slot-boundp object slot) (slot-value object slot)) + (if strict? + (error 'rss-parse-error + :msg (format nil "Required field ~A not set for object." slot)) + (setf (slot-value object slot) ""))) + (when (and strict? (not (stringp (slot-value object slot)))) + (error 'rss-parse-error + :msg (format nil "Slot ~A is set to ~A, which is not a string." + slot (slot-value object slot))))) + (values)) + +(defun parse-type (class child-parser node strict? required-string-slots) + (let ((object (make-instance class))) + (map nil + (lambda (subnode) (funcall child-parser subnode object strict?)) + (xmls:xmlrep-children node)) + (ensure-string-slots-filled object required-string-slots strict?) + object)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun symbol-to-name (sym) + "Convert symbols in the form SOME-SYM to RSS-style camelCase (a lopsided +camel, it seems)." + (let ((parts (split-alphanumeric-string (symbol-name sym)))) + (format nil "~A~{~A~}" + (string-downcase (first parts)) + (mapcar #'string-capitalize (cdr parts)))))) + +(defmacro def-child-parser (name (&rest unique-strings) + &rest complicated-forms) + "Define a parser that sets UNIQUE-STRINGS in the obvious +way. COMPLICATED-FORMS should be lists (KEY &body BODY) where KEY is a string +and BODY is performed with ITEM set to the item we're modifying and NODE set to +the XML node we just got." + `(defun ,name (node object strict?) + (declare (ignorable strict?)) + (string=-case (xmls:xmlrep-tag node) + (,@(mapcar + (lambda (sym) `(,(symbol-to-name sym) + (setf-unique-string object ',sym node))) + unique-strings) + ,@complicated-forms)))) + +(def-child-parser parse-item-child (title link description pub-date)) +(defun parse-item (node strict?) + (parse-type 'rss-item 'parse-item-child node strict? '(title link))) + +(def-child-parser parse-image-child (url title link width height description)) +(defun parse-image (node strict?) + (parse-type 'rss-image 'parse-image-child node strict? '(url title link))) + +(def-child-parser parse-channel-child + (title link description language pub-date last-build-date) + ("item" (push (parse-item node strict?) (items object))) + ("image" (setf-unique-slot object 'image (parse-image node strict?)))) + +(defun parse-rss-stream (str &key err strict?) + "Parse RSS data from STR, which can be either a stream or a string. If ERR, +then throw an error when something goes wrong, otherwise just return NIL. If +STRICT?, check more carefully for whether the document fails to follow the RSS +2.0 specification (which incorporates 0.9x)" + (handler-case + (let* ((*package* (find-package 'kmrcl)) + (tree (xmls:parse str :compress-whitespace t)) + (children (xmls:xmlrep-children tree)) + (version (xmls:xmlrep-attrib-value "version" tree nil))) + + (unless (string= "rss" (xmls:xmlrep-tag tree)) + (error 'rss-parse-error :msg "Data doesn't claim to be RSS.")) + (unless (is-rss-version-supported version) + (error 'rss-version-unsupported :version version)) + (unless (and (= 1 (length children)) + (string= "channel" (xmls:xmlrep-tag (first children)))) + (error 'rss-parse-error + :msg "<rss> should have one child, a <channel>.")) + + (let ((channel (first children)) + (rss (make-instance 'rss-channel))) + + (map nil (lambda (child) (parse-channel-child child rss strict?)) + (xmls:xmlrep-children channel)) + + (ensure-string-slots-filled + rss + `(title link description + ,@(when (string= version "0.91") '(language))) + strict?) + + (unless (or (not strict?) (string= version "2.0") (image rss)) + (error 'rss-parse-error + :msg "<rss> should have <image> specified for version <2.0")) + + (setf (items rss) (nreverse (items rss))) + rss)) + + (rss-parse-error (e) + (when err (error e)) + nil))) diff --git a/package.lisp b/package.lisp index 197e565..d7d875e 100644 --- a/package.lisp +++ b/package.lisp @@ -14,6 +14,12 @@ (defpackage rss (:use #:cl #:kmrcl) (:export - #:show-sites - )) + #:rss-channel #:rss-item #:rss-image + #:show-sites #:rss-site + #:parse-rss-file #:parse-rss-stream + + ;; Accessors to class slots + #:title #:link #:description #:language #:pub-date #:image #:items + #:url #:width #:height #:last-build-date)) + diff --git a/rss.asd b/rss.asd index e34fcbb..f6a02c4 100644 --- a/rss.asd +++ b/rss.asd @@ -7,7 +7,6 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Aug 2003 ;;;; -;;;; $Id$ ;;;; ************************************************************************* (defpackage #:rss-system (:use #:asdf #:cl)) @@ -21,6 +20,7 @@ :maintainer "Kevin M. Rosenberg <kmr@debian.org>" :licence "BSD" :description "Remote Site Summary" + :version "0.9.0" :properties ((#:author-email . "kevin@rosenberg.net") ((#:albert #:output-dir) . "albert-docs/") @@ -28,7 +28,7 @@ ((#:albert #:docbook #:template) . "book") ((#:albert #:docbook #:bgcolor) . "white") ((#:albert #:docbook #:textcolor) . "black")) - + :serial t :depends-on (kmrcl xmls #-allegro aserve) :components