X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;f=parse-2002.lisp;h=726764fd63b112f4d5280b5115eab0ff0b3a16fe;hb=3199369942d2e5ab4f5b060c2c6b655caf505944;hp=07f9b9cd3f992bfd73786511391c9631f0ae4712;hpb=bfdd5c9d3d66970759fcdbee5a51da2ca93ddf06;p=umlisp.git diff --git a/parse-2002.lisp b/parse-2002.lisp index 07f9b9c..726764f 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -5,10 +5,10 @@ ;;;; Name: parse-2002.lisp ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may ;;;; change from year to year -;;;; Programmer: Kevin M. Rosenberg +;;;; Author: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: parse-2002.lisp,v 1.3 2002/10/09 23:03:41 kevin Exp $ +;;;; $Id: parse-2002.lisp,v 1.10 2003/05/07 21:57:06 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -17,8 +17,10 @@ ;;;; as governed by the terms of the GNU General Public License. ;;;; ************************************************************************* -(in-package :umlisp) -(declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3))) +(in-package #:umlisp) + +(eval-when (:compile-toplevel) + (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))) ;;; Pre-read data for custom fields into hash tables (defvar *parse-hash-init?* nil) @@ -51,9 +53,9 @@ (setq *parse-hash-init?* t)) (with-buffered-umls-file (line "MRCON") (let ((cui (parse-ui (aref line 0))) - (lui (parse-ui (nth 3 line))) - (sui (parse-ui (nth 5 line))) - (lrl (parse-integer (nth 7 line)))) + (lui (parse-ui (aref line 3))) + (sui (parse-ui (aref line 5))) + (lrl (parse-integer (aref line 7)))) (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui (if (and (string-equal (aref line 1) "ENG") ; LAT (string-equal (aref line 2) "P") ; ts @@ -63,9 +65,9 @@ (set-lrl-hash lui lrl lui-lrl-hash) (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) (with-buffered-umls-file (line "MRSO") - (let ((sab (aref 3 line))) + (let ((sab (aref line 3))) (unless (gethash sab sab-srl-hash) ;; if haven't stored - (setf (gethash sab sab-srl-hash) (aref 6 line)))))) + (setf (gethash sab sab-srl-hash) (aref line 6)))))) (defun init-hash-table (&optional (force-read nil)) (when (or force-read (not *parse-hash-init?*)) @@ -114,17 +116,6 @@ (setf (gethash key hash) lrl)))) ;; UMLS file and column structures - -(defstruct (umls-file) - "Record for each UMLS File" - fil table des fmt cls rws bts fields colstructs) - -(defstruct (umls-col) - "Record for each UMLS Column in each file" - col des ref min av max fil sqltype - dty ;; new in 2002 umls: suggested SQL datatype - parsefunc quotechar datatype custom-value-func) - ;;; SQL datatypes symbols ;;; sql-u - Unique identifier ;;; sql-s - Small integer (16-bit) @@ -132,7 +123,7 @@ ;;; sql-l - Big integer (64-bit) ;;; sql-f - Floating point -(defconstant +col-datatypes+ +(defparameter +col-datatypes+ '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u) ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s) ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-s) @@ -143,64 +134,67 @@ ;;; LEX columns ("EUI" sql-u) ("EUI2" sql-u) ;;; Semantic net columns - ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)) + ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u) + ;; New fields for 2002AD + ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i) + ) "SQL data types for each non-string column") -(defconstant +custom-tables+ +(defparameter +custom-tables+ nil #+ignore '(("MRCONSO" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL FROM MRCON m, MRSO s WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI") ("MRCONFULL" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL, t.TUI FROM MRCON m, MRSO s, MRSTY t WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI AND m.CUI=t.CUI AND s.CUI=t.CUI")) "Custom tables to create") -(defconstant +custom-cols+ +(defparameter +custom-cols+ '(("MRCON" "KPFSTR" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) ("MRCON" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCON" "KCUILUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) + (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) ("MRCON" "KCUILRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) + (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCON" "KLUILRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (lui-lrl (parse-ui (nth 3 x)))))) + (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x)))))) ("MRLO" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" + (lambda (x) (write-to-string (if (zerop (length (nth 4 x))) (cui-lrl (parse-ui (nth 0 x))) (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) ("MRSTY" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) + (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCOC" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" + (lambda (x) (write-to-string (max (cui-lrl (parse-ui (nth 0 x))) (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) ("MRSAT" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 5 x))))) + (lambda (x) (write-to-string (sab-srl (nth 5 x))))) ("MRREL" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 4 x))))) + (lambda (x) (write-to-string (sab-srl (nth 4 x))))) ("MRRANK" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRDEF" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRCXT" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 2 x))))) + (lambda (x) (write-to-string (sab-srl (nth 2 x))))) ("MRATX" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRXW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXW.NONENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNS.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRREL" "KPFSTR2" "TEXT" 1024 @@ -208,29 +202,29 @@ ("MRCOC" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 1 x))))) ("MRCXT" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUILUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) + (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRSO" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRXW.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNW.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNS.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXW.NONENG" "LAT" "CHAR" 3 (lambda (x) (nth 0 x))) ("MRXW.NONENG" "WD" "CHAR" 200 (lambda (x) (nth 1 x))) - ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (nth 2 x))) - ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (nth 3 x))) - ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (nth 4 x))) + ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x))))) + ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x))))) + ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x))))) ("MRXW.NONENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) "Custom columns to create.(filename, col, sqltype, value-func).") -(defconstant +index-cols+ +(defparameter +index-cols+ '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON") ("LRL" "MRCON") ("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO") @@ -255,11 +249,12 @@ ;; Semantic NET indices ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR") - ("RL" "SRSTR")) + ("RL" "SRSTR") + ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") + ("VCUI" "MRSAB") ("LAT" "MRSAB")) "Columns in files to index") - -(defconstant +custom-index-cols+ +(defparameter +custom-index-cols+ nil #+ignore '(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL")) @@ -267,180 +262,66 @@ ;; File & Column functions -(defun init-umls (&optional (alwaysclear nil)) -"Initialize all UMLS file and column structures if not already initialized" - (when (or alwaysclear (null *umls-files*)) - (init-umls-cols) - (init-umls-files) - (init-field-lengths))) +(defun gen-ucols () + (add-ucols (gen-ucols-meta)) + (add-ucols (gen-ucols-custom)) + (add-ucols (gen-ucols-generic "LRFLD")) + (add-ucols (gen-ucols-generic "SRFLD"))) -(defun init-umls-cols () - (setq *umls-cols* (append - (init-meta-cols) - (init-custom-cols) - (init-generic-cols "LRFLD") - (init-generic-cols "SRFLD")))) - -(defun init-meta-cols () +(defun gen-ucols-meta () "Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (let ((c (make-umls-col - :col col - :des des - :ref ref - :min (parse-integer min) - :av (read-from-string av) - :max (parse-integer max) - :fil fil - :dty dty ;; new in 2002 UMLS - :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col col)) - (push c cols)))) + (push (make-ucol col des ref (parse-integer min) (read-from-string av) + (parse-integer max) fil dty) + cols))) (nreverse cols))) -(defun init-custom-cols () +(defun gen-ucols-custom () "Initialize umls columns for custom columns" - (let ((cols '())) - (dolist (customcol +custom-cols+) - (let ((c (make-umls-col :col (nth 1 customcol) - :des "" - :ref 0 - :min 0 - :max (nth 3 customcol) - :av 0 - :dty nil - :fil (nth 0 customcol) - :sqltype (nth 2 customcol) - :parsefunc #'add-sql-quotes - :custom-value-func (nth 4 customcol) - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) - (push c cols))) - (nreverse cols))) - -(defun escape-column-name (name) - (substitute #\_ #\/ name)) + (loop for customcol in +custom-cols+ + collect + (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) + (nth 0 customcol) nil :sqltype (nth 2 customcol)))) -(defun init-generic-cols (col-filename) +(defun gen-ucols-generic (col-filename) "Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) - (let ((c (make-umls-col - :col nam - :des des - :ref ref - :min nil - :av nil - :max nil - :fil file - :dty nil - :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col nam)) - (push c cols))))) + (push + (make-ucol nam des ref nil nil nil file nil) + cols)))) (nreverse cols))) -(defun init-umls-files () - (setq *umls-files* (append - (init-generic-files "MRFILES") - (init-generic-files "LRFIL") - (init-generic-files "SRFIL"))) - ;; need to separate this since init-custom-files depends on *umls-files* - (setq *umls-files* (append *umls-files* (init-custom-files)))) +(defun gen-ufiles () + (add-ufiles (gen-ufiles-generic "MRFILES")) + (add-ufiles (gen-ufiles-generic "LRFIL")) + (add-ufiles (gen-ufiles-generic "SRFIL")) + ;; needs to come last + (add-ufiles (gen-ufiles-custom))) -(defun umls-field-string-to-list (fmt) - "Converts a comma delimited list of fields into a list of field names. Will -append a unique number (starting at 2) onto a column name that is repeated in the list" - (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,)) - (col-count (make-hash-table :test 'equal))) - (dotimes (i (length field-list)) - (declare (fixnum i)) - (let ((col (nth i field-list))) - (multiple-value-bind (key found) (gethash col col-count) - (if found - (let ((next-id (1+ key))) - (setf (nth i field-list) (concatenate 'string - col - (format nil "~D" next-id))) - (setf (gethash col col-count) next-id)) - (setf (gethash col col-count) 1))))) - field-list)) - -(defun init-generic-files (files-filename) + +(defun gen-ufiles-generic (files-filename) "Initialize all LEX file structures" (let ((files '())) - (with-umls-file (line files-filename) - (destructuring-bind (fil des fmt cls rws bts) line - (let ((f (make-umls-file - :fil fil - :table (substitute #\_ #\. fil) - :des des - :fmt (escape-column-name fmt) - :cls (parse-integer cls) - :rws (parse-integer rws) - :bts (parse-integer bts) - :fields (concatenate 'list - (umls-field-string-to-list fmt) - (custom-colnames-for-filename fil))))) - (setf (umls-file-colstructs f) (umls-cols-for-umls-file f)) - (push f files)))) - (nreverse files))) - -(defun init-custom-files () - (let ((ffile (make-umls-file :fil "MRXW.NONENG" - :des "Custom NonEnglish Index" - :table "MRXW_NONENG" - :cls 5 - :rws 0 - :bts 0 - :fields (umls-file-fields (find-umls-file "MRXW.ENG"))))) - (setf (umls-file-colstructs ffile) - (umls-cols-for-umls-file ffile)) - (list ffile))) - -(defun datatype-for-col (colname) -"Return datatype for column name" - (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal)))) + (with-umls-file (line files-filename) + (destructuring-bind (fil des fmt cls rws bts) line + (push (make-ufile + fil des (substitute #\_ #\. fil) (parse-integer cls) + (parse-integer rws) (parse-integer bts) + (concatenate 'list (umls-field-string-to-list fmt) + (custom-colnames-for-filename fil))) + files))) + (nreverse files))) -(defun add-datatype-to-col (col datatype) -"Add data type information to column" - (setf (umls-col-datatype col) datatype) - (case datatype - (sql-u (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-ui - (umls-col-quotechar col) "")) - (sql-s (setf (umls-col-sqltype col) "SMALLINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-l (setf (umls-col-sqltype col) "BIGINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-i (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-f (setf (umls-col-sqltype col) "NUMERIC" - (umls-col-parsefunc col) #'read-from-string - (umls-col-quotechar col) "")) - (t ; Default column type, optimized text storage - (setf (umls-col-parsefunc col) #'add-sql-quotes - (umls-col-quotechar col) "'") - (when (and (umls-col-max col) (umls-col-av col)) - (if (> (umls-col-max col) 255) - (setf (umls-col-sqltype col) "TEXT") - (if (< (- (umls-col-max col) (umls-col-av col)) 4) - (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4 - (setf (umls-col-sqltype col) "VARCHAR"))))))) +(defun gen-ufiles-custom () + (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG" + 5 0 0 (fields (find-ufile "MRXW.ENG"))))