From 8894876c0e1bc1ce0a3e1680b804e4df7efce2b8 Mon Sep 17 00:00:00 2001 From: "Kevin M. Rosenberg" Date: Fri, 5 Apr 2013 10:50:22 -0600 Subject: [PATCH] Handle empty RRF files, add MTS2012AB --- parse-common.lisp | 50 ++++++++++++++++++++++++++--------------------- sql.lisp | 5 +++-- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/parse-common.lisp b/parse-common.lisp index ae7af53..b139566 100644 --- a/parse-common.lisp +++ b/parse-common.lisp @@ -25,10 +25,10 @@ (gen-ucols) (set-ucols-for-ufiles *umls-files*) (ensure-field-lengths)) - (error (e) - (warn "Error reading ucols+ufiles: ~A." e) - (setf *umls-cols* nil *umls-files* nil) - nil)) + (error (e) + (warn "Error reading ucols+ufiles: ~A." e) + (setf *umls-cols* nil *umls-files* nil) + nil)) t) @@ -89,23 +89,26 @@ "Initial colstruct field lengths for files that don't have a measurement. Currently, these are the LEX and NET files." (dolist (length-list (ufiles-field-lengths (ufiles-to-measure))) - (destructuring-bind (filename fields-max fields-av) length-list + (destructuring-bind (filename fields-max fields-av count-lines) length-list (let ((file (find-ufile filename))) (unless file (error "Can't find ~A filename in ufiles" filename)) - (unless (= (length fields-max) (length (fields file))) - (error - "Number of file fields ~A not equal to field count in ufile ~S" - fields-max file)) - (dotimes (i (length (fields file))) - (declare (fixnum i)) - (let* ((field (nth i (fields file))) - (col (find-ucol field filename))) - (unless col - (error "can't find column ~A" field)) - (setf (cmax col) (aref fields-max i)) - (setf (av col) (aref fields-av i)) - (ensure-ucol-datatype col (datatype-for-colname (col col))))))))) + (if (zerop count-lines) + (warn "File ~A is empty." filename) + (progn + (unless (= (length fields-max) (length (fields file))) + (error + "Number of file fields ~A not equal to field count in ufile ~S" + fields-max file)) + (dotimes (i (length (fields file))) + (declare (fixnum i)) + (let* ((field (nth i (fields file))) + (col (find-ucol field filename))) + (unless col + (error "can't find column ~A" field)) + (setf (cmax col) (aref fields-max i)) + (setf (av col) (aref fields-av i)) + (ensure-ucol-datatype col (datatype-for-colname (col col))))))))))) (defun ufiles-to-measure () "Returns a list of ufiles to measure" @@ -131,7 +134,7 @@ Currently, these are the LEX and NET files." :initial-element 0)) (setq fields-av (make-array num-fields :element-type '(or integer float) :initial-element 0))) - (dotimes (i num-fields) + (dotimes (i (or num-fields 0)) (declare (fixnum i)) (let* ((str (nth i line)) (len (length #-(and clisp unicode) str @@ -145,9 +148,12 @@ Currently, these are the LEX and NET files." (when (> len (aref fields-max i)) (setf (aref fields-max i) len)))) (incf count-lines)) - (dotimes (i num-fields) - (setf (aref fields-av i) (float (/ (aref fields-av i) count-lines)))) - (list (fil ufile) fields-max fields-av))) + (dotimes (i (or num-fields 0)) + (setf (aref fields-av i) + (if (plusp count-lines) + (float (/ (aref fields-av i) count-lines)) + 0))) + (list (fil ufile) fields-max fields-av count-lines))) ;;; UMLS column/file functions diff --git a/sql.lisp b/sql.lisp index e1414d9..dab38b8 100644 --- a/sql.lisp +++ b/sql.lisp @@ -21,8 +21,9 @@ (:2006ac . "KUMLS2006AC") (:2006ad . "MTS2006AD") (:2009ab . "MTS2009AB") - (:2010aa . "MTS2010AA"))) -(defvar +default-umls-db+ "MTS2010AA") + (:2010aa . "MTS2010AA") + (:2012ab . "MTS2012AB"))) +(defvar +default-umls-db+ "MTS2012AB") (defun lookup-db-name (db) (cdr (assoc (ensure-keyword db) +umls-sql-map+))) -- 2.34.1