X-Git-Url: http://git.kpe.io/?p=pubmed.git;a=blobdiff_plain;f=entrez.lisp;fp=entrez.lisp;h=89db4c7a4d8f16884890eca3b03afbc44ccc4986;hp=0000000000000000000000000000000000000000;hb=5ce187bd43a9280f16a315121967f39b9d2d1e10;hpb=3163cad3295c4353f62c0fb0b94a76031369aa82 diff --git a/entrez.lisp b/entrez.lisp new file mode 100644 index 0000000..89db4c7 --- /dev/null +++ b/entrez.lisp @@ -0,0 +1,376 @@ +;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*- +;;;; ************************************************************************* +;;;; FILE IDENTIFICATION +;;;; +;;;; Name: entrez.lisp +;;;; Purpose: Library to access NCBI Entrez web application +;;;; Programmer: Kevin M. Rosenberg +;;;; Date Started: April 2013 +;;;; +;;;; This file, part of cl-entrez, is Copyright (c) 2002 by Kevin M. Rosenberg +;;;; +;;;; cl-entrez users are granted the rights to distribute and use this software +;;;; as governed by the terms of the GNU Lesser General Public License +;;;; (http://www.gnu.org/licenses/lgpl.html) +;;;; ************************************************************************* + +(in-package #:entrez) + + +(defparameter +entrez-host+ "eutils.ncbi.nlm.nih.gov") +(defparameter +entrez-search-url+ "/entrez/eutils/esearch.fcgi") +(defparameter +entrez-fetch-url+ "/entrez/eutils/efetch.fcgi") +(defparameter +entrez-summary-url+ "/entrez/eutils/esummary.fcgi") +(defparameter *proxy-host* nil) + + +(define-condition entrez-condition () + ()) + + +(define-condition entrez-server-error (error entrez-condition) + ((response :initarg :response + :initform nil + :reader entrez-condition-response)) + (:report (lambda (c stream) + (format stream "A Entrez server error occurred.") + (awhen (entrez-condition-response c) + (format stream " The server response was:~&~S" it))))) + +(define-condition entrez-query-error (error entrez-condition) + ((response :initarg :response + :initform nil + :reader entrez-condition-response)) + (:report (lambda (c stream) + (format stream "A Entrez server error occurred.") + (awhen (entrez-condition-response c) + (format stream " The server response was:~&~S" it))))) + +;;; Article-Set and Article Classes + +(defclass pm-article-set () + ((query :type string :initarg :query :accessor articles-query) + (webenv :type string :initarg :webenv :accessor articles-webenv) + (qkey :type string :initarg :qkey :accessor articles-qkey) + (articles :type list :initarg :articles :accessor articles) + (total :type fixnum :initarg :total :accessor articles-total) + (count :type fixnum :initarg :count :accessor articles-count) + (start :type fixnum :initarg :start :accessor articles-start)) + (:documentation "Entrez Article Set Class") + (:default-initargs :total 0 :start 0 :count 0 :query nil + :articles nil :qkey nil :webenv nil)) + +(defclass equery () + ((query :type string :initarg :query :accessor query) + (wenv :type string :initarg :webenv :accessor wenv) + (qkey :type string :initarg :qkey :accessor qkey) + (qcount :type fixnum :initarg :qcount :accessor qcount)) + (:documentation "Entrez EQuery Results Class") + (:default-initargs :qcount 0 :query nil :qkey nil :webenv nil)) + +(defclass pm-article () + ( + (pmid :type integer :accessor article-pmid) + (title :type string :accessor article-title) + (authors :type list :accessor article-authors) + (affiliation :type string :accessor article-affiliation) + (journal :type string :accessor article-journal) + (date :type string :accessor article-date) + (volume :type string :accessor article-volume) + (issue :type string :accessor article-issue) + (pages :type string :accessor article-pages) + (abstract :type string :accessor article-abstract) + (mesh-headings :type list :accessor article-mesh-headings)) + (:documentation "Entrez Article Class")) + +(defmethod print-object ((obj pm-article-set) (s stream)) + (print-unreadable-object (obj s :type t :identity t) + (format s "~d total articles, ~d articles starting at #~d" + (articles-total obj) + (articles-count obj) + (articles-start obj) + ))) + +(defmethod print-object ((obj equery) (s stream)) + (print-unreadable-object (obj s :type t :identity t) + (format s "Found ~d with qkey ~a & wenv ~a" + (qcount obj) + (qkey obj) + (wenv obj)))) + +(defmethod print-object ((obj pm-article) (s stream)) + (print-unreadable-object (obj s :type t :identity t) + (format s "pmid:~d, title:~S" (article-pmid obj) + (article-title obj)))) + +(defun article-equal-p (a b) + (check-type a pm-article) + (check-type b pm-article) + (eql (article-pmid a) (article-pmid b))) + +(defun article-ref (art) + "Return a string of publication data for an article" + (let ((ref "")) + (awhen (article-date art) + (string-append ref (format nil "~a; " it))) + (awhen (article-volume art) + (string-append ref it)) + (awhen (article-issue art) + (string-append ref (format nil "(~a)" it))) + (awhen (article-pages art) + (string-append ref (format nil ":~a" it))) + ref)) + +(defmethod print-article-set ((artset pm-article-set) + &key (os *standard-output*) (format :text) + (complete nil) (print-link nil)) + "Display an article set to specified stream in specified format" + (dotimes (i (articles-count artset) artset) + (if (nth i (articles artset)) + (print-article (nth i (articles artset)) :os os :format format + :complete complete :print-link print-link) + (princ "NULL Article" os)))) + +(defmethod print-article ((art pm-article) &key (os *standard-output*) + (format :text) (complete nil) (print-link nil)) + "Display an article" + (ecase format + (:text + (format os "~a~%~a~%~a~a ~a~%~a~%" + (article-title art) + (list-to-delimited-string (article-authors art) ", ") + (aif (article-affiliation art) + (format nil "~a~%" it) "") + (article-journal art) (article-ref art) + (aif (article-abstract art) + (if complete + it + "Abstract available") + "No abstract available") + (when complete + (format os "~a~%" (article-mesh-headings art))))) + (:html + (let ((has-link (or (article-abstract art) (article-mesh-headings art)))) + (when (and print-link has-link) + (format os "" (funcall print-link + (article-pmid art)))) + (format os "
~a
~%" + (article-title art)) + (when (and print-link has-link) + (format os "
")) + (format os "
~a
~%" + (list-to-delimited-string (article-authors art) ", ")) + (format os "
~a ~a
~%" + (article-journal art) (article-ref art)) + (when (and complete (article-abstract art)) + (format os "
~a
~%" + (article-abstract art))) + (when (and complete (article-mesh-headings art)) + (format os "
Mesh Headings:
") + (dolist (mh (article-mesh-headings art)) + (format os "
~a
~%" mh))) + (format os "

~%")))) + art) + + +;;; Entrez Query Functions + +(defun equery (searchstr &key (db "pubmed") (usehistory t) start max) + "Performs Entrez query and returns article-set structure" + (multiple-value-bind + (xmlres ids) (do-equery searchstr :db db :usehistory usehistory :start start :max max) + (when (xml-tag-contents "Count" xmlres) + (let ((as (make-instance 'equery))) + (setf + (qcount as) (parse-integer (xml-tag-contents "Count" xmlres)) + (query as) searchstr) + (kmrcl:awhen (xml-tag-contents "WebEnv" xmlres) + (setf (wenv as) kmrcl:it)) + (kmrcl:awhen (xml-tag-contents "QueryKey" xmlres) + (setf (qkey as) kmrcl:it)) + as)))) + +(defun esummary (as &key maximum start) +;; (articles as) (extract-article-set results)) + as) + +(defun pm-fetch-ids (pmids) + "Fetchs list of Entrez ID's and returns pm-article-set class" + (setq pmids (mklist pmids)) + (let ((results (entrez-fetch-pmids-xml pmids))) + (unless (xml-tag-contents "Error" results) + (let ((as (make-instance 'pm-article-set))) + (setf + (articles-total as) (length pmids) + (articles-query as) (list-to-delimited-string pmids #\,) + (articles-start as) 0 + (articles-count as) (length pmids) + (articles as) (extract-article-set results)) + as)))) + +#+ignore +(defun entrez-search-tree (searchstr &key maximum start) + "Performs a entrez search and returns two values: +tree of Entrez search results and tree of Entrez search status" + (multiple-value-bind + (xml-search-results xml-search-status) + (entrez-search-xml searchstr :maximum maximum :start start) + (if xml-search-results + (values (parse-xml-no-ws xml-search-results) + (parse-xml-no-ws xml-search-status)) + (values nil (parse-xml-no-ws xml-search-status))))) + +(defun do-equery (term &key (db "pubmed") start max (usehistory t)) + "Performs a Entrez search and returns two values: +XML string of Entrez search results and XML search status" + (let ((res (http-equery term :start start :max max + :usehistory usehistory :db db))) + (values res (extract-pmid-list res)))) + +(defun http-equery (term &key start max (usehistory t) + (db "pubmed")) + "Performs a Entrez search and returns XML results of Entrez search + which contains Entrez ID's and status results" + (let* ((alist `(("db" . ,db) ("term" . ,term))) + (geturl (format nil "http://~a~a" +entrez-host+ +entrez-search-url+))) + (when usehistory + (setq alist (acons "usehistory" "y" alist))) + (when start + (setq alist (acons "retstart" start alist))) + (when max + (setq alist (acons "retmax" max alist))) + (multiple-value-bind (xmlres http-code http-server-alist) + (net.aserve.client:do-http-request + geturl + :method :get + :query alist + :proxy *proxy-host*) + (declare (ignore http-server-alist)) + (when (not (equal http-code 200)) + (error "HTTP code performing '~A' was ~D." geturl http-code)) + xmlres))) + +(defun entrez-fetch-pmids-xml (pmids &key (db "pubmed")) + "Fetch articles for a list of Entrez ID's and return XML string" + (setq pmids (mklist pmids)) ;; Ensure list + (when pmids + (net.aserve.client:do-http-request + (format nil "http://~a~a" +entrez-host+ +entrez-fetch-url+) + :method :get + :query + `(("db" . ,db) ("report" . "xml") ("mode" . "text") + ("id" . ,(list-to-delimited-string pmids #\,))) + :proxy *proxy-host*))) + +;;; XML Extraction Routines + +(defun extract-article-set (results) + "Extract article set from Entrez XML string, return results in pm-article-set class" + (multiple-value-bind (as-start as-end as-next) + (positions-xml-tag-contents "PubmedArticleSet" results) + (declare (ignore as-end as-next)) + (when as-start + (let ((done nil) + (articles '()) + (pos as-start)) + (until done + (multiple-value-bind + (a-start a-end a-next) + (positions-xml-tag-contents "PubmedArticle" results pos) + (if a-start + (progn + (push (extract-article results a-start a-end) articles) + (setq pos a-next) + ) + (setq done t)))) + (nreverse articles))))) + +(defun extract-article (xmlstr a-start a-end) + "Extract article contents from Entrez XML string and return results in pm-article class" + (let ((article (make-instance 'pm-article))) + (setf + (article-pmid article) (parse-integer (xml-tag-contents "PMID" xmlstr a-start a-end)) + (article-title article) (xml-tag-contents "ArticleTitle" xmlstr a-start a-end) + (article-journal article) (xml-tag-contents "MedlineTA" xmlstr a-start a-end) + (article-pages article) (xml-tag-contents "MedlinePgn" xmlstr a-start a-end) + (article-affiliation article) (xml-tag-contents "Affiliation" xmlstr a-start a-end) + (article-abstract article) (xml-tag-contents "AbstractText" xmlstr a-start a-end)) + (multiple-value-bind (ji-start ji-end ji-next) + (positions-xml-tag-contents "JournalIssue" xmlstr a-start a-end) + (declare (ignore ji-next)) + (setf + (article-volume article) (xml-tag-contents "Volume" xmlstr ji-start ji-end) + (article-issue article) (xml-tag-contents "Issue" xmlstr ji-start ji-end)) + (aif (xml-tag-contents "MedlineDate" xmlstr ji-start ji-end) + (setf (article-date article) it) + (setf (article-date article) + (concatenate 'string (xml-tag-contents "Year" xmlstr ji-start ji-end) + (aif (xml-tag-contents "Month" xmlstr ji-start ji-end) + (format nil " ~a" it) + ""))))) + + (multiple-value-bind (al-start al-end al-next) + (positions-xml-tag-contents "AuthorList" xmlstr a-start a-end) + (declare (ignore al-next)) + (setf (article-authors article) + (when al-start + (let ((done nil) + (authors '()) + (pos al-start)) + (until done + (multiple-value-bind + (au-start au-end au-next) + (positions-xml-tag-contents "Author" xmlstr pos al-end) + (if au-start + (progn + (push (extract-author xmlstr au-start au-end) authors) + (setq pos au-next)) + (setq done t)))) + (nreverse authors))))) + + (multiple-value-bind (mhl-start mhl-end mhl-next) + (positions-xml-tag-contents "MeshHeadingList" xmlstr a-start a-end) + (declare (ignore mhl-next)) + (setf (article-mesh-headings article) + (when mhl-start + (let ((done nil) + (mesh-headings '()) + (pos mhl-start)) + (until done + (multiple-value-bind + (mh-start mh-end mh-next) + (positions-xml-tag-contents "MeshHeading" xmlstr pos mhl-end) + (if mh-start + (progn + (push (extract-mesh-heading xmlstr mh-start mh-end) mesh-headings) + (setq pos mh-next) + ) + (setq done t)))) + (nreverse mesh-headings))))) + + article)) + +(defun extract-author (xmlstr start end) + "Extract author name from XML string" + (let ((last-name (xml-tag-contents "LastName" xmlstr start end)) + (initials (xml-tag-contents "Initials" xmlstr start end))) + (concatenate 'string last-name " " initials))) + +(defun extract-mesh-heading (xmlstr start end) + "Extract and format mesh headings from XML string" + (let ((desc (xml-tag-contents "DescriptorName" xmlstr start end)) + (sh (xml-tag-contents "SubHeading" xmlstr start end))) + (if sh + (format nil "~a(~a)" desc sh) + desc))) + +(defun extract-pmid-list (results) + "Returns list of Entrez ID's from XML result string" + (cond + ((search "" results) + (error 'entrez-query-error :response results)) + ((search "

Server Error

" results) + (error 'entrez-server-error :response results)) + (t + (awhen (xml-tag-contents "Id" results) + (delimited-string-to-list it #\space)))))