X-Git-Url: http://git.kpe.io/?p=cl-rss.git;a=blobdiff_plain;f=main.lisp;h=3545f936393e1cdfdde049dbf71888188e50e976;hp=57e6537104190390212ea6a0e0ddc5ea8389597d;hb=HEAD;hpb=c1737f8d0d38390d3308737e0978517d20c143bd diff --git a/main.lisp b/main.lisp index 57e6537..3545f93 100644 --- a/main.lisp +++ b/main.lisp @@ -12,15 +12,30 @@ (in-package #:rss) -(defclass rss-0.9x-channel () - ((title :accessor title :initform nil) - (link :accessor link :initform nil) - (description :accessor description) +(defclass rss-channel () + ((title :reader title) + (link :reader link) + (description :reader description :initform nil) + (language :reader language :initform nil) + (image :reader image :initform nil) + (pub-date :reader pub-date :initform nil) + (last-build-date :reader last-build-date :initform nil) + (items :accessor items :initform nil))) -(defclass rss-0.9x-item () - ((title :accessor title :initform nil ) - (link :accessor link :initform nil))) +(defclass rss-item () + ((title :reader title) + (link :reader link) + (description :reader description :initform nil) + (pub-date :reader pub-date :initform nil))) + +(defclass rss-image () + ((url :reader url) + (title :reader title) + (link :reader link) + (width :reader width :initform nil) + (height :reader height :initform nil) + (description :reader description :initform nil))) (defvar *sites* '("http://www.cliki.net/recent-changes.rdf")) @@ -41,61 +56,172 @@ (declare (ignore true-uri headers)) (when (eql 200 response) (with-input-from-string (strm body) - (parse-rss-0.9x-stream strm))))) + (parse-rss-stream strm))))) -(defun parse-rss-0.9x-file (file) +(defun parse-rss-file (file) (with-open-file (stream file :direction :input) - (parse-rss-0.9x-stream stream))) - -(defun is-rss-version-supported (attributes) - (awhen (position "version" attributes :key #'car :test #'string=) - (let ((version (second (nth it attributes)))) - (= 4 (length version)) - (string= "0.9" (subseq version 0 3))))) - -(defun parse-rss-0.9x-stream (stream) - (let* ((*package* (find-package 'kmrcl)) - (tree (remove-from-tree-if - (lambda (x) (and (stringp x) (is-string-whitespace x))) - (xmls:parse stream :compress-whitespace t)))) - (unless (and (string= "rss" (first tree)) - (is-rss-version-supported (second tree))) - (return-from parse-rss-0.9x-stream nil)) - (let* ((content (third tree)) - (pos 0) - (len (length content)) - (rss (make-instance 'rss-0.9x-channel))) - (when (string= "channel" (nth pos content)) - (incf pos) - (while (and (< pos len) - (or (string= "title" (car (nth pos content))) - (string= "link" (car (nth pos content))) - (string= "description" (car (nth pos content))))) - (let ((slot (nth pos content))) - (cond - ((string= "title" (car slot)) - (setf (title rss) (second slot))) - ((string= "link" (car slot)) - (setf (link rss) (second slot))) - ((string= "description" (car slot)) - (setf (description rss) (second slot))))) - (incf pos))) - (while (< pos len) - (when (string= "item" (car (nth pos content))) - (let ((item (make-instance 'rss-0.9x-item))) - (dolist (pair (cdr (nth pos content))) - (cond - ((string= "title" (car pair)) - (setf (title item) (second pair))) - ((string= "link" (car pair)) - (setf (link item) (second pair))))) - (push item (items rss)))) - (incf pos)) - (setf (items rss) (nreverse (items rss))) - rss))) - - - - - - + (parse-rss-stream stream))) + +(defun is-rss-version-supported (version-string) + (and (member version-string '("0.91" "0.92" "2.0") :test #'string=) t)) + +(define-condition rss-parse-error (error) + ((msg :initarg :msg :reader msg)) + (:documentation "Thrown when PARSE-RSS-STREAM encounters invalid RSS data.") + (:report + (lambda (condition stream) + (format stream "Parse error reading RSS~@[. ~A~]" (msg condition))))) + +(define-condition rss-version-unsupported (rss-parse-error) + ((version :initarg :version :reader version)) + (:documentation + "Thrown when PARSE-RSS-STREAM encounters RSS of a version it doesn't +recognise.") + (:report + (lambda (condition stream) + (format stream "Unexpected RSS version: ~S" (version condition))))) + +(defmacro string=-case (keyform (&rest cases) &optional &body otherwise) + "A version of CASE that tests using string=." + (let ((key (gensym)) (expected)) + `(let ((,key ,keyform)) + (cond + ,@(mapcar + (lambda (form) + (destructuring-bind (string &body body) form + (unless (stringp string) + (error "Can only deal with strings as keys.")) + (push string expected) + `((string= ,string ,key) ,@body))) + cases) + (t + ,@otherwise))))) + +(defun setf-unique-slot (object name value) + "Set the slot with the given NAME in OBJECT to VALUE, throwing an error if it +was already set." + (when (and (slot-boundp object name) (slot-value object name)) + (error 'rss-parse-error + :msg (format nil "<~A> should only be specified once in the node." + name))) + (setf (slot-value object name) value)) + +(defun setf-unique-string (object name node) + "Set the slot with the given NAME in OBJECT to the string contents of NODE, +throwing an error if it was already set, or if they aren't a string. Used for +elements like and <link>, which shouldn't crop up twice." + (let ((string (car (xmls:xmlrep-children node)))) + ;; skip empty nodes + (when string + (progn + (unless (stringp string) + (error 'rss-parse-error + :msg (format nil "Got ~A when expecting string for contents of <~A>" + string name))) + (setf-unique-slot object name string))))) + +(defun ensure-string-slots-filled (object required-slots strict?) + "For each slot in REQUIRED-SLOTS, if it is unbound in OBJECT, set it to the +empty string if STRICT? is NIL, or throw an error if true." + (dolist (slot required-slots) + (unless (and (slot-boundp object slot) (slot-value object slot)) + (if strict? + (error 'rss-parse-error + :msg (format nil "Required field ~A not set for object." slot)) + (setf (slot-value object slot) ""))) + (when (and strict? (not (stringp (slot-value object slot)))) + (error 'rss-parse-error + :msg (format nil "Slot ~A is set to ~A, which is not a string." + slot (slot-value object slot))))) + (values)) + +(defun parse-type (class child-parser node strict? required-string-slots) + (let ((object (make-instance class))) + (map nil + (lambda (subnode) (funcall child-parser subnode object strict?)) + (xmls:xmlrep-children node)) + (ensure-string-slots-filled object required-string-slots strict?) + object)) + +(eval-when (:compile-toplevel :load-toplevel :execute) + (defun symbol-to-name (sym) + "Convert symbols in the form SOME-SYM to RSS-style camelCase (a lopsided +camel, it seems)." + (let ((parts (split-alphanumeric-string (symbol-name sym)))) + (format nil "~A~{~A~}" + (string-downcase (first parts)) + (mapcar #'string-capitalize (cdr parts)))))) + +(defmacro def-child-parser (name (&rest unique-strings) + &rest complicated-forms) + "Define a parser that sets UNIQUE-STRINGS in the obvious +way. COMPLICATED-FORMS should be lists (KEY &body BODY) where KEY is a string +and BODY is performed with ITEM set to the item we're modifying and NODE set to +the XML node we just got." + `(defun ,name (node object strict?) + (declare (ignorable strict?)) + ;; skip atom-related tags + (when (equalp "http://www.w3.org/2005/Atom" (xmls:node-ns node)) + (return-from ,name nil)) + (string=-case (xmls:xmlrep-tag node) + (,@(mapcar + (lambda (sym) `(,(symbol-to-name sym) + (setf-unique-string object ',sym node))) + unique-strings) + ,@complicated-forms)))) + +(def-child-parser parse-item-child (title link description pub-date)) +(defun parse-item (node strict?) + (parse-type 'rss-item 'parse-item-child node strict? '(title link))) + +(def-child-parser parse-image-child (url title link width height description)) +(defun parse-image (node strict?) + (parse-type 'rss-image 'parse-image-child node strict? '(url title link))) + +(def-child-parser parse-channel-child + (title link description language pub-date last-build-date) + ("item" (push (parse-item node strict?) (items object))) + ("image" (setf-unique-slot object 'image (parse-image node strict?)))) + +(defun parse-rss-stream (str &key err strict?) + "Parse RSS data from STR, which can be either a stream or a string. If ERR, +then throw an error when something goes wrong, otherwise just return NIL. If +STRICT?, check more carefully for whether the document fails to follow the RSS +2.0 specification (which incorporates 0.9x)" + (handler-case + (let* ((*package* (find-package 'kmrcl)) + (tree (xmls:parse str :compress-whitespace t)) + (children (xmls:xmlrep-children tree)) + (version (xmls:xmlrep-attrib-value "version" tree nil))) + + (unless (string= "rss" (xmls:xmlrep-tag tree)) + (error 'rss-parse-error :msg "Data doesn't claim to be RSS.")) + (unless (is-rss-version-supported version) + (error 'rss-version-unsupported :version version)) + (unless (and (= 1 (length children)) + (string= "channel" (xmls:xmlrep-tag (first children)))) + (error 'rss-parse-error + :msg "<rss> should have one child, a <channel>.")) + + (let ((channel (first children)) + (rss (make-instance 'rss-channel))) + + (map nil (lambda (child) (parse-channel-child child rss strict?)) + (xmls:xmlrep-children channel)) + + (ensure-string-slots-filled + rss + `(title link description + ,@(when (string= version "0.91") '(language))) + strict?) + + (unless (or (not strict?) (string= version "2.0") (image rss)) + (error 'rss-parse-error + :msg "<rss> should have <image> specified for version <2.0")) + + (setf (items rss) (nreverse (items rss))) + rss)) + + (rss-parse-error (e) + (when err (error e)) + nil)))