X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;f=parse-2002.lisp;h=7dbd52270c2937034e2b79d7061d336fcff144ec;hb=3d32184fac35651bbe223e1a1fdf183ce16c081a;hp=67a4e0274a80acff2b8b8993e88f430f8baf0946;hpb=5a80d916816131a1d56a60790cd1e4b5e9c88810;p=umlisp.git diff --git a/parse-2002.lisp b/parse-2002.lisp index 67a4e02..7dbd522 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -5,10 +5,10 @@ ;;;; Name: parse-2002.lisp ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may ;;;; change from year to year -;;;; Programmer: Kevin M. Rosenberg +;;;; Author: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: parse-2002.lisp,v 1.4 2002/10/14 09:25:20 kevin Exp $ +;;;; $Id: parse-2002.lisp,v 1.12 2003/05/08 04:36:12 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -17,11 +17,13 @@ ;;;; as governed by the terms of the GNU General Public License. ;;;; ************************************************************************* -(in-package :umlisp) -(declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3))) +(in-package #:umlisp) + +(eval-when (:compile-toplevel) + (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))) ;;; Pre-read data for custom fields into hash tables -(defvar *parse-hash-init?* nil) +(defvar *preparse-hash-init?* nil) (eval-when (:compile-toplevel :load-toplevel :execute) (let ((pfstr-hash nil) ;;; Preferred concept strings by CUI @@ -30,7 +32,7 @@ (cuisui-lrl-hash nil) ;;; LRL by CUISUI (sab-srl-hash nil)) ;;; SRL by SAB - (defun make-parse-hash-table () + (defun make-preparse-hash-table () (if pfstr-hash (progn (clrhash pfstr-hash) @@ -45,10 +47,10 @@ cuisui-lrl-hash (make-hash-table :size 1800000) sab-srl-hash (make-hash-table :size 100 :test 'equal)))) - (defun binit-hash-table (&optional (force-read nil)) - (when (or force-read (not *parse-hash-init?*)) - (make-parse-hash-table) - (setq *parse-hash-init?* t)) + (defun buffered-ensure-preparse (&optional (force-read nil)) + (when (or force-read (not *preparse-hash-init?*)) + (make-preparse-hash-table) + (setq *preparse-hash-init?* t)) (with-buffered-umls-file (line "MRCON") (let ((cui (parse-ui (aref line 0))) (lui (parse-ui (aref line 3))) @@ -67,10 +69,10 @@ (unless (gethash sab sab-srl-hash) ;; if haven't stored (setf (gethash sab sab-srl-hash) (aref line 6)))))) - (defun init-hash-table (&optional (force-read nil)) - (when (or force-read (not *parse-hash-init?*)) - (make-parse-hash-table) - (setq *parse-hash-init?* t)) + (defun ensure-preparse (&optional (force-read nil)) + (when (or force-read (not *preparse-hash-init?*)) + (make-preparse-hash-table) + (setq *preparse-hash-init?* t)) (with-umls-file (line "MRCON") (let ((cui (parse-ui (nth 0 line))) (lui (parse-ui (nth 3 line))) @@ -104,7 +106,7 @@ (gethash cuisui cuisui-lrl-hash)) (defun sab-srl (sab) - (kmrcl:aif (gethash sab sab-srl-hash) kmrcl::it 0)) + (aif (gethash sab sab-srl-hash) it 0)) )) ;; closure (defun set-lrl-hash (key lrl hash) @@ -114,17 +116,6 @@ (setf (gethash key hash) lrl)))) ;; UMLS file and column structures - -(defstruct (umls-file) - "Record for each UMLS File" - fil table des fmt cls rws bts fields colstructs) - -(defstruct (umls-col) - "Record for each UMLS Column in each file" - col des ref min av max fil sqltype - dty ;; new in 2002 umls: suggested SQL datatype - parsefunc quotechar datatype custom-value-func) - ;;; SQL datatypes symbols ;;; sql-u - Unique identifier ;;; sql-s - Small integer (16-bit) @@ -143,7 +134,10 @@ ;;; LEX columns ("EUI" sql-u) ("EUI2" sql-u) ;;; Semantic net columns - ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)) + ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u) + ;; New fields for 2002AD + ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i) + ) "SQL data types for each non-string column") (defparameter +custom-tables+ @@ -157,50 +151,50 @@ '(("MRCON" "KPFSTR" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) ("MRCON" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCON" "KCUILUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) + (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) ("MRCON" "KCUILRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) + (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCON" "KLUILRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (lui-lrl (parse-ui (nth 3 x)))))) + (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x)))))) ("MRLO" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" + (lambda (x) (write-to-string (if (zerop (length (nth 4 x))) (cui-lrl (parse-ui (nth 0 x))) (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) ("MRSTY" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) + (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCOC" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" + (lambda (x) (write-to-string (max (cui-lrl (parse-ui (nth 0 x))) (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) ("MRSAT" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 5 x))))) + (lambda (x) (write-to-string (sab-srl (nth 5 x))))) ("MRREL" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 4 x))))) + (lambda (x) (write-to-string (sab-srl (nth 4 x))))) ("MRRANK" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRDEF" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRCXT" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 2 x))))) + (lambda (x) (write-to-string (sab-srl (nth 2 x))))) ("MRATX" "KSRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRXW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXW.NONENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNS.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRREL" "KPFSTR2" "TEXT" 1024 @@ -208,26 +202,26 @@ ("MRCOC" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 1 x))))) ("MRCXT" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUILUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) + (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRSO" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRXW.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNW.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNS.ENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXW.NONENG" "LAT" "CHAR" 3 (lambda (x) (nth 0 x))) ("MRXW.NONENG" "WD" "CHAR" 200 (lambda (x) (nth 1 x))) - ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (nth 2 x))) - ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (nth 3 x))) - ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (nth 4 x))) + ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x))))) + ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x))))) + ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x))))) ("MRXW.NONENG" "KCUISUI" "BIGINT" 0 - (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) "Custom columns to create.(filename, col, sqltype, value-func).") (defparameter +index-cols+ @@ -255,10 +249,11 @@ ;; Semantic NET indices ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR") - ("RL" "SRSTR")) + ("RL" "SRSTR") + ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") + ("VCUI" "MRSAB") ("LAT" "MRSAB")) "Columns in files to index") - (defparameter +custom-index-cols+ nil #+ignore @@ -267,180 +262,67 @@ ;; File & Column functions -(defun init-umls (&optional (alwaysclear nil)) -"Initialize all UMLS file and column structures if not already initialized" - (when (or alwaysclear (null *umls-files*)) - (init-umls-cols) - (init-umls-files) - (init-field-lengths))) - -(defun init-umls-cols () - (setq *umls-cols* (append - (init-meta-cols) - (init-custom-cols) - (init-generic-cols "LRFLD") - (init-generic-cols "SRFLD")))) +(defun gen-ucols () + (add-ucols (gen-ucols-meta)) + (add-ucols (gen-ucols-custom)) + (add-ucols (gen-ucols-generic "LRFLD")) + (add-ucols (gen-ucols-generic "SRFLD"))) -(defun init-meta-cols () +(defun gen-ucols-meta () "Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (let ((c (make-umls-col - :col col - :des des - :ref ref - :min (parse-integer min) - :av (read-from-string av) - :max (parse-integer max) - :fil fil - :dty dty ;; new in 2002 UMLS - :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col col)) - (push c cols)))) + (push (make-ucol col des ref (parse-integer min) (read-from-string av) + (parse-integer max) fil dty) + cols))) (nreverse cols))) -(defun init-custom-cols () +(defun gen-ucols-custom () "Initialize umls columns for custom columns" - (let ((cols '())) - (dolist (customcol +custom-cols+) - (let ((c (make-umls-col :col (nth 1 customcol) - :des "" - :ref 0 - :min 0 - :max (nth 3 customcol) - :av 0 - :dty nil - :fil (nth 0 customcol) - :sqltype (nth 2 customcol) - :parsefunc #'add-sql-quotes - :custom-value-func (nth 4 customcol) - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) - (push c cols))) - (nreverse cols))) - -(defun escape-column-name (name) - (substitute #\_ #\/ name)) + (loop for customcol in +custom-cols+ + collect + (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) + (nth 0 customcol) nil :sqltype (nth 2 customcol) + :custom-value-fun (nth 4 customcol)))) -(defun init-generic-cols (col-filename) +(defun gen-ucols-generic (col-filename) "Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) - (let ((c (make-umls-col - :col nam - :des des - :ref ref - :min nil - :av nil - :max nil - :fil file - :dty nil - :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col nam)) - (push c cols))))) + (push + (make-ucol nam des ref nil nil nil file nil) + cols)))) (nreverse cols))) -(defun init-umls-files () - (setq *umls-files* (append - (init-generic-files "MRFILES") - (init-generic-files "LRFIL") - (init-generic-files "SRFIL"))) - ;; need to separate this since init-custom-files depends on *umls-files* - (setq *umls-files* (append *umls-files* (init-custom-files)))) +(defun gen-ufiles () + (add-ufiles (gen-ufiles-generic "MRFILES")) + (add-ufiles (gen-ufiles-generic "LRFIL")) + (add-ufiles (gen-ufiles-generic "SRFIL")) + ;; needs to come last + (add-ufiles (gen-ufiles-custom))) -(defun umls-field-string-to-list (fmt) - "Converts a comma delimited list of fields into a list of field names. Will -append a unique number (starting at 2) onto a column name that is repeated in the list" - (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,)) - (col-count (make-hash-table :test 'equal))) - (dotimes (i (length field-list)) - (declare (fixnum i)) - (let ((col (nth i field-list))) - (multiple-value-bind (key found) (gethash col col-count) - (if found - (let ((next-id (1+ key))) - (setf (nth i field-list) (concatenate 'string - col - (format nil "~D" next-id))) - (setf (gethash col col-count) next-id)) - (setf (gethash col col-count) 1))))) - field-list)) - -(defun init-generic-files (files-filename) + +(defun gen-ufiles-generic (files-filename) "Initialize all LEX file structures" (let ((files '())) - (with-umls-file (line files-filename) - (destructuring-bind (fil des fmt cls rws bts) line - (let ((f (make-umls-file - :fil fil - :table (substitute #\_ #\. fil) - :des des - :fmt (escape-column-name fmt) - :cls (parse-integer cls) - :rws (parse-integer rws) - :bts (parse-integer bts) - :fields (concatenate 'list - (umls-field-string-to-list fmt) - (custom-colnames-for-filename fil))))) - (setf (umls-file-colstructs f) (umls-cols-for-umls-file f)) - (push f files)))) - (nreverse files))) - -(defun init-custom-files () - (let ((ffile (make-umls-file :fil "MRXW.NONENG" - :des "Custom NonEnglish Index" - :table "MRXW_NONENG" - :cls 5 - :rws 0 - :bts 0 - :fields (umls-file-fields (find-umls-file "MRXW.ENG"))))) - (setf (umls-file-colstructs ffile) - (umls-cols-for-umls-file ffile)) - (list ffile))) - -(defun datatype-for-col (colname) -"Return datatype for column name" - (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal)))) + (with-umls-file (line files-filename) + (destructuring-bind (fil des fmt cls rws bts) line + (push (make-ufile + fil des (substitute #\_ #\. fil) (parse-integer cls) + (parse-integer rws) (parse-integer bts) + (concatenate 'list (umls-field-string-to-list fmt) + (custom-colnames-for-filename fil))) + files))) + (nreverse files))) -(defun add-datatype-to-col (col datatype) -"Add data type information to column" - (setf (umls-col-datatype col) datatype) - (case datatype - (sql-u (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-ui - (umls-col-quotechar col) "")) - (sql-s (setf (umls-col-sqltype col) "SMALLINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-l (setf (umls-col-sqltype col) "BIGINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-i (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-f (setf (umls-col-sqltype col) "NUMERIC" - (umls-col-parsefunc col) #'read-from-string - (umls-col-quotechar col) "")) - (t ; Default column type, optimized text storage - (setf (umls-col-parsefunc col) #'add-sql-quotes - (umls-col-quotechar col) "'") - (when (and (umls-col-max col) (umls-col-av col)) - (if (> (umls-col-max col) 255) - (setf (umls-col-sqltype col) "TEXT") - (if (< (- (umls-col-max col) (umls-col-av col)) 4) - (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4 - (setf (umls-col-sqltype col) "VARCHAR"))))))) +(defun gen-ufiles-custom () + (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG" + 5 0 0 (fields (find-ufile "MRXW.ENG"))))