From: Kevin M. Rosenberg Date: Tue, 5 Sep 2006 20:57:32 +0000 (+0000) Subject: r11099: add error checking X-Git-Tag: v2006ac.2~69 X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=commitdiff_plain;h=8e895602ced5ab847ecc36c1eaa7be1c9a872a22 r11099: add error checking --- diff --git a/class-support.lisp b/class-support.lisp index eb600eb..a9fae98 100644 --- a/class-support.lisp +++ b/class-support.lisp @@ -24,8 +24,12 @@ (defmethod fmt-cui ((c ucon)) (fmt-cui (cui c))) -(defmethod fmt-cui ((c fixnum)) - (prefixed-fixnum-string c #\C 7)) +(when *has-fixnum-class* + (defmethod fmt-cui ((c fixnum)) + (prefixed-fixnum-string c #\C 7))) + +(defmethod fmt-cui ((c integer)) + (prefixed-integer-string c #\C 7)) (defmethod fmt-cui ((c string)) (if (eql (aref c 0) #\C) @@ -39,8 +43,12 @@ (defmethod fmt-lui ((l uterm)) (fmt-lui (lui l))) -(defmethod fmt-lui ((l fixnum)) - (prefixed-fixnum-string l #\L 7)) +(when *has-fixnum-class* + (defmethod fmt-lui ((l fixnum)) + (prefixed-fixnum-string l #\L 7))) + +(defmethod fmt-lui ((l integer)) + (prefixed-integer-string l #\L 7)) (defmethod fmt-lui ((l string)) (if (eql (aref l 0) #\L) @@ -51,8 +59,12 @@ (defmethod fmt-sui ((s ustr)) (fmt-sui (sui s))) -(defmethod fmt-sui ((s fixnum)) - (prefixed-fixnum-string s #\S 7)) +(when *has-fixnum-class* + (defmethod fmt-sui ((s fixnum)) + (prefixed-fixnum-string s #\S 7))) + +(defmethod fmt-sui ((s integer)) + (prefixed-integer-string s #\S 7)) (defmethod fmt-sui ((s string)) (if (eql (aref s 0) #\S) @@ -60,8 +72,12 @@ (fmt-sui (parse-integer s)))) (defgeneric fmt-tui (tui)) -(defmethod fmt-tui ((tui fixnum)) - (prefixed-fixnum-string tui #\T 3)) +(when *has-fixnum-class* + (defmethod fmt-tui ((tui fixnum)) + (prefixed-fixnum-string tui #\T 3))) + +(defmethod fmt-tui ((tui integer)) + (prefixed-integer-string tui #\T 3)) (defmethod fmt-tui ((tui string)) (if (eql (aref tui 0) #\T) @@ -69,8 +85,12 @@ (fmt-tui (parse-integer tui)))) (defgeneric fmt-aui (aui)) -(defmethod fmt-aui ((aui fixnum)) - (prefixed-fixnum-string aui #\A 7)) +(when *has-fixnum-class* + (defmethod fmt-aui ((aui fixnum)) + (prefixed-fixnum-string aui #\A 7))) + +(defmethod fmt-aui ((aui integer)) + (prefixed-integer-string aui #\A 7)) (defmethod fmt-aui ((aui string)) (if (eql (aref aui 0) #\A) @@ -78,8 +98,12 @@ (fmt-aui (parse-integer aui)))) (defgeneric fmt-eui (e)) -(defmethod fmt-eui ((e fixnum)) - (prefixed-fixnum-string e #\E 7)) +(when *has-fixnum-class* + (defmethod fmt-eui ((e fixnum)) + (prefixed-fixnum-string e #\E 7))) + +(defmethod fmt-eui ((e integer)) + (prefixed-integer-string e #\E 7)) (defmethod fmt-eui ((e string)) (if (eql (aref e 0) #\E) diff --git a/data-structures.lisp b/data-structures.lisp index 3f51b3e..36b874c 100644 --- a/data-structures.lisp +++ b/data-structures.lisp @@ -21,21 +21,21 @@ ;;; Paths for files (defparameter *umls-path* - (make-pathname :directory '(:absolute "home" "kevin" "2006AC")) + (make-pathname :directory '(:absolute "srv" "umls" "2006AC")) "Path for base of UMLS data files") -(defvar *meta-path* - (merge-pathnames +(defparameter *meta-path* + (merge-pathnames (make-pathname :directory '(:relative "META")) *umls-path*)) -(defvar *lex-path* - (merge-pathnames +(defparameter *lex-path* + (merge-pathnames (make-pathname :directory '(:relative "LEX")) *umls-path*)) -(defvar *net-path* - (merge-pathnames +(defparameter *net-path* + (merge-pathnames (make-pathname :directory '(:relative "NET")) *umls-path*)) @@ -44,13 +44,20 @@ ;;; Structures for parsing UMLS text files - -(defparameter *umls-files* nil + +(defparameter *umls-files* nil "List of umls file structures. Used when parsing text files.") -(defparameter *umls-cols* nil +(defparameter *umls-cols* nil "List of meta column structures. Used when parsing text files.") +;; Special variables + +(defvar *has-fixnum-class* (when (ignore-errors (find-class 'fixnum)) t)) + +(defvar *octet-sql-storage* t + "Used to deciding field lengths. Use nil if using UTF-8 database encoding. But, UTF-8 will cause MySQL to double the bytes used for fixed field sizes.") + ;; Preliminary objects to replace structures (defclass ufile () @@ -98,4 +105,3 @@ (format s "~A" (col obj)))) - diff --git a/debian/changelog b/debian/changelog index 673d50f..57ff927 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +cl-umlisp (4.2.0-1) unstable; urgency=low + + * Add support for UMLS 2006AC + + -- Kevin M. Rosenberg Sun, 3 Sep 2006 23:38:27 -0600 + cl-umlisp (4.1.0-2) unstable; urgency=low * New upstream URI diff --git a/parse-common.lisp b/parse-common.lisp index ebd88a8..ba7aef6 100644 --- a/parse-common.lisp +++ b/parse-common.lisp @@ -20,12 +20,17 @@ (defun ensure-ucols+ufiles (&optional (alwaysclear nil)) "Initialize all UMLS file and column structures if not already initialized" - (when (or alwaysclear (null *umls-files*)) - (setq *umls-cols* nil) - (setq *umls-files* nil) - (gen-ucols) - (gen-ufiles) - (ensure-field-lengths))) + (handler-case + (when (or alwaysclear (null *umls-files*)) + (setf *umls-cols* nil *umls-files* nil) + (gen-ucols) + (gen-ufiles) + (ensure-field-lengths)) + (error (e) + (setf *umls-cols* nil *umls-files* nil) + (error e))) + t) + (defun add-ucols (ucols) "Adds a ucol or list of ucols to *umls-cols*. Returns input value." @@ -40,9 +45,9 @@ (defun ufile-pathname (ufile &optional (extension "")) "Return pathname for a umls filename with an optional extension" (assert (typep ufile 'ufile)) - (let ((dirs (append (list (dir ufile)) + (let ((dirs (append (list (dir ufile)) (awhen (subdir ufile) (list it))))) - (merge-pathnames + (merge-pathnames (make-pathname :name (concatenate 'string (fil ufile) extension) :directory (cons :relative dirs)) *umls-path*))) @@ -51,8 +56,8 @@ "Return pathname for a umls filename with an optional extension" (etypecase filename (string - (merge-pathnames - (make-pathname :name (concatenate 'string filename extension)) + (merge-pathnames + (make-pathname :name (concatenate 'string filename extension)) (case (schar filename 0) ((#\M #\m) *meta-path*) @@ -78,7 +83,7 @@ Currently, these are the LEX and NET files." (error "Can't find ~A filename in ufiles" filename)) (unless (= (length fields-max) (length (fields file))) (error - "Number of file fields ~A not equal to field count in ufile ~S" + "Number of file fields ~A not equal to field count in ufile ~S" fields-max file)) (dotimes (i (length (fields file))) (declare (fixnum i)) @@ -89,15 +94,15 @@ Currently, these are the LEX and NET files." (setf (cmax col) (aref fields-max i)) (setf (av col) (aref fields-av i)) (ensure-ucol-datatype col (datatype-for-colname (col col))))))))) - + (defun ufiles-to-measure () "Returns a list of ufiles to measure" (loop for ufile in *umls-files* unless (or (char= #\M (schar (fil ufile) 0)) (char= #\m (schar (fil ufile) 0))) collect ufile)) - - + + (defun ufiles-field-lengths (ufiles) "Returns a list of lists of containing (FILE MAX AV)" (loop for ufile in ufiles collect (file-field-lengths ufile))) @@ -109,13 +114,18 @@ Currently, these are the LEX and NET files." (with-umls-ufile (line ufile) (unless num-fields (setq num-fields (length line)) - (setq fields-max (make-array num-fields :element-type 'fixnum + (setq fields-max (make-array num-fields :element-type 'fixnum :initial-element 0)) (setq fields-av (make-array num-fields :element-type 'number :initial-element 0))) (dotimes (i num-fields) (declare (fixnum i)) - (let ((len (length (nth i line)))) + (let* ((str (nth i line)) + (len (length #-(and clisp unicode) str + #+(and clisp unicode) + (if *octet-sql-storage* + (ext:convert-string-to-bytes str charset:utf-8) + str)))) (incf (aref fields-av i) len) (when (> len (aref fields-max i)) (setf (aref fields-max i) len)))) @@ -180,11 +190,11 @@ Currently, these are the LEX and NET files." (quote-str "'") (custom-value-fun)) (let ((ucol (make-instance 'ucol - :col col :des des :ref ref :min min :av av + :col col :des des :ref ref :min min :av av :max (if (eql max 0) 1 max) ;; ensure at least one char wide :fil fil :dty dty - :sqltype sqltype + :sqltype sqltype :quote-str quote-str :parse-fun (ensure-compiled-fun parse-fun) :custom-value-fun (ensure-compiled-fun custom-value-fun)))) @@ -200,13 +210,13 @@ Currently, these are the LEX and NET files." (ensure-col-in-columns colname filename *umls-cols*)) (defun find-ufile (filename) - "Returns umls-file structure for a filename" + "Returns umls-file structure for a filename" (find-if #'(lambda (f) (string-equal filename (fil f))) *umls-files*)) (defun find-ucols-for-ufile (ufile) "Returns list of umls-cols for a file structure" (loop for colname in (fields ufile) - collect (find-ucol colname + collect (find-ucol colname (if (subdir ufile) (concatenate 'string (subdir ufile) "/" (fil ufile)) (fil ufile))))) @@ -250,7 +260,7 @@ append a unique number (starting at 2) onto a column name that is repeated in th ufile))) (defun datatype-for-colname (colname) -"Return datatype for column name" +"Return datatype for column name" (second (find colname +col-datatypes+ :key #'car :test #'string-equal))) (defun canonicalize-column-type (type) @@ -293,7 +303,7 @@ append a unique number (starting at 2) onto a column name that is repeated in th (t "NUMERIC"))) (t type))) - + (defun ensure-ucol-datatype (col datatype) "Add data type information to column" (setf (datatype col) datatype) @@ -314,7 +324,7 @@ append a unique number (starting at 2) onto a column name that is repeated in th (parse-fun col) #'read-from-string (quote-str col) "")) (t ; Default column type, optimized text storage - (setf (parse-fun col) #'add-sql-quotes + (setf (parse-fun col) #'add-sql-quotes (quote-str col) "'") (when (and (cmax col) (av col)) (if (> (cmax col) 255) diff --git a/parse-macros.lisp b/parse-macros.lisp index 6e006d8..a2af75e 100644 --- a/parse-macros.lisp +++ b/parse-macros.lisp @@ -26,7 +26,7 @@ (delimited-string-to-list line #\| t)))) (defun source-files (path) - (if (probe-file path) + (if (probe-file path) (list path) (sort (directory (make-pathname :defaults path @@ -47,7 +47,9 @@ `(let ((,eof (gensym "EOFSYM-")) (,buffer (make-fields-buffer)) (,files (source-files ,path))) - (with-open-file (,ustream (first ,files) :direction :input) + (with-open-file (,ustream (first ,files) :direction :input + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) (do ((,line (read-buffered-fields ,buffer ,ustream #\| ,eof) (read-buffered-fields ,buffer ,ustream #\| ,eof))) ((eq ,line ,eof) t) @@ -62,11 +64,15 @@ (files (gensym "FILES-"))) `(let ((,eof (gensym "EOFSYM-")) (,files (source-files ,path))) - (with-open-file (,ustream (first ,files) :direction :input) - (do ((,line (read-umls-line ,ustream ,eof) - (read-umls-line ,ustream ,eof))) - ((eq ,line ,eof) t) - ,@body))))) + (unless ,files + (error "Can't find file files for ~A~%" ,path)) + (with-open-file (,ustream (first ,files) :direction :input + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) + (do ((,line (read-umls-line ,ustream ,eof) + (read-umls-line ,ustream ,eof))) + ((eq ,line ,eof) t) + ,@body))))) (defmacro with-umls-ufile ((line ufile) &body body) "Opens a UMLS and processes each parsed line with (body) argument" diff --git a/parse-rrf.lisp b/parse-rrf.lisp index 4f54fb2..0842d97 100644 --- a/parse-rrf.lisp +++ b/parse-rrf.lisp @@ -33,12 +33,12 @@ (defun make-preparse-hash-table () (if sui-lrl-hash (progn - (clrhash pfstr-hash) - (clrhash cui-lrl-hash) - (clrhash lui-lrl-hash) - (clrhash sui-lrl-hash) - (clrhash cuisui-lrl-hash) - (clrhash sab-srl-hash)) + (clrhash pfstr-hash) + (clrhash cui-lrl-hash) + (clrhash lui-lrl-hash) + (clrhash sui-lrl-hash) + (clrhash cuisui-lrl-hash) + (clrhash sab-srl-hash)) (setf pfstr-hash (make-hash-table :size 800000) cui-lrl-hash (make-hash-table :size 800000) @@ -49,8 +49,7 @@ (defun ensure-preparse (&optional (force-read nil)) (when (or force-read (not *preparse-hash-init?*)) - (make-preparse-hash-table) - (setq *preparse-hash-init?* t)) + (make-preparse-hash-table)) (with-umls-file (line "MRCONSO.RRF") (let ((cui (parse-ui (nth 0 line))) (lui (parse-ui (nth 3 line))) @@ -69,7 +68,8 @@ (multiple-value-bind (val found) (gethash sab sab-srl-hash) (declare (ignore val)) (unless found - (setf (gethash sab sab-srl-hash) srl)))))) + (setf (gethash sab sab-srl-hash) srl))))) + (setq *preparse-hash-init?* t)) (defun pfstr-hash (cui) (gethash cui pfstr-hash)) (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) diff --git a/sql.lisp b/sql.lisp index 1c4b057..6b8ad01 100644 --- a/sql.lisp +++ b/sql.lisp @@ -65,7 +65,7 @@ (defun sql-connect () "Connect to UMLS database, automatically used pooled connections" (clsql:connect (list *umls-sql-host* (lookup-db-name *umls-sql-db*) - *umls-sql-user* *umls-sql-passwd*) + *umls-sql-user* *umls-sql-passwd*) :database-type *umls-sql-type* :pool t)) (defun sql-disconnect (conn)