;;;; Name: parse-2002.lisp
;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
;;;; change from year to year
-;;;; Programmer: Kevin M. Rosenberg
+;;;; Author: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: parse-2002.lisp,v 1.5 2002/11/10 22:39:15 kevin Exp $
+;;;; $Id: parse-2002.lisp,v 1.15 2003/07/21 00:53:27 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
-;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
+;;;; Copyright (c) 2000-2003 by Kevin M. Rosenberg, M.D.
;;;;
;;;; UMLisp users are granted the rights to distribute and use this software
;;;; as governed by the terms of the GNU General Public License.
;;;; *************************************************************************
-(in-package :umlisp)
-(declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))
+(in-package #:umlisp)
+
+(eval-when (:compile-toplevel)
+ (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3))))
;;; Pre-read data for custom fields into hash tables
-(defvar *parse-hash-init?* nil)
+(defvar *preparse-hash-init?* nil)
(eval-when (:compile-toplevel :load-toplevel :execute)
(let ((pfstr-hash nil) ;;; Preferred concept strings by CUI
(cuisui-lrl-hash nil) ;;; LRL by CUISUI
(sab-srl-hash nil)) ;;; SRL by SAB
- (defun make-parse-hash-table ()
+ (defun make-preparse-hash-table ()
(if pfstr-hash
(progn
(clrhash pfstr-hash)
cuisui-lrl-hash (make-hash-table :size 1800000)
sab-srl-hash (make-hash-table :size 100 :test 'equal))))
- (defun binit-hash-table (&optional (force-read nil))
- (when (or force-read (not *parse-hash-init?*))
- (make-parse-hash-table)
- (setq *parse-hash-init?* t))
+ (defun buffered-ensure-preparse (&optional (force-read nil))
+ (when (or force-read (not *preparse-hash-init?*))
+ (make-preparse-hash-table)
+ (setq *preparse-hash-init?* t))
(with-buffered-umls-file (line "MRCON")
(let ((cui (parse-ui (aref line 0)))
(lui (parse-ui (aref line 3)))
(unless (gethash sab sab-srl-hash) ;; if haven't stored
(setf (gethash sab sab-srl-hash) (aref line 6))))))
- (defun init-hash-table (&optional (force-read nil))
- (when (or force-read (not *parse-hash-init?*))
- (make-parse-hash-table)
- (setq *parse-hash-init?* t))
+ (defun ensure-preparse (&optional (force-read nil))
+ (when (or force-read (not *preparse-hash-init?*))
+ (make-preparse-hash-table)
+ (setq *preparse-hash-init?* t))
(with-umls-file (line "MRCON")
(let ((cui (parse-ui (nth 0 line)))
(lui (parse-ui (nth 3 line)))
(gethash cuisui cuisui-lrl-hash))
(defun sab-srl (sab)
- (kmrcl:aif (gethash sab sab-srl-hash) kmrcl::it 0))
+ (aif (gethash sab sab-srl-hash) it 0))
)) ;; closure
(defun set-lrl-hash (key lrl hash)
(setf (gethash key hash) lrl))))
;; UMLS file and column structures
-
-(defstruct (umls-file)
- "Record for each UMLS File"
- fil table des fmt cls rws bts fields colstructs)
-
-(defstruct (umls-col)
- "Record for each UMLS Column in each file"
- col des ref min av max fil sqltype
- dty ;; new in 2002 umls: suggested SQL datatype
- parsefunc quotechar datatype custom-value-func)
-
;;; SQL datatypes symbols
;;; sql-u - Unique identifier
;;; sql-s - Small integer (16-bit)
'(("MRCON" "KPFSTR" "TEXT" 1024
(lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
("MRCON" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
("MRCON" "KCUILUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
+ (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
("MRCON" "KCUILRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x))))))
+ (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
("MRCON" "KLUILRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (lui-lrl (parse-ui (nth 3 x))))))
+ (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
("MRLO" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d"
+ (lambda (x) (write-to-string
(if (zerop (length (nth 4 x)))
(cui-lrl (parse-ui (nth 0 x)))
(cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
("MRSTY" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x))))))
+ (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
("MRCOC" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d"
+ (lambda (x) (write-to-string
(max (cui-lrl (parse-ui (nth 0 x)))
(kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
("MRSAT" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 5 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 5 x)))))
("MRREL" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 4 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
("MRRANK" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 1 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
("MRDEF" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 1 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
("MRCXT" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 2 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
("MRATX" "KSRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (sab-srl (nth 1 x)))))
+ (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
("MRXW.ENG" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXW.NONENG" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXNW.ENG" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXNS.ENG" "KLRL" "INTEGER" 0
- (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRREL" "KPFSTR2" "TEXT" 1024
("MRCOC" "KPFSTR2" "TEXT" 1024
(lambda (x) (pfstr-hash (parse-ui (nth 1 x)))))
("MRCXT" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
("MRSAT" "KCUILUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
+ (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
("MRSAT" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
("MRSO" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
("MRXW.ENG" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
("MRXNW.ENG" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
("MRXNS.ENG" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
("MRXW.NONENG" "LAT" "CHAR" 3 (lambda (x) (nth 0 x)))
("MRXW.NONENG" "WD" "CHAR" 200 (lambda (x) (nth 1 x)))
- ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (nth 2 x)))
- ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (nth 3 x)))
- ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (nth 4 x)))
+ ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
+ ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
+ ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
("MRXW.NONENG" "KCUISUI" "BIGINT" 0
- (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
+ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
"Custom columns to create.(filename, col, sqltype, value-func).")
(defparameter +index-cols+
("VCUI" "MRSAB") ("LAT" "MRSAB"))
"Columns in files to index")
+
(defparameter +custom-index-cols+
nil
#+ignore
;; File & Column functions
-(defun init-umls (&optional (alwaysclear nil))
-"Initialize all UMLS file and column structures if not already initialized"
- (when (or alwaysclear (null *umls-files*))
- (init-umls-cols)
- (init-umls-files)
- (init-field-lengths)))
-
-(defun init-umls-cols ()
- (setq *umls-cols* (append
- (init-meta-cols)
- (init-custom-cols)
- (init-generic-cols "LRFLD")
- (init-generic-cols "SRFLD"))))
+(defun gen-ucols ()
+ (add-ucols (gen-ucols-meta))
+ (add-ucols (gen-ucols-custom))
+ (add-ucols (gen-ucols-generic "LRFLD"))
+ (add-ucols (gen-ucols-generic "SRFLD")))
-(defun init-meta-cols ()
+(defun gen-ucols-meta ()
"Initialize all umls columns"
(let ((cols '()))
(with-umls-file (line "MRCOLS")
(destructuring-bind (col des ref min av max fil dty) line
- (let ((c (make-umls-col
- :col col
- :des des
- :ref ref
- :min (parse-integer min)
- :av (read-from-string av)
- :max (parse-integer max)
- :fil fil
- :dty dty ;; new in 2002 UMLS
- :sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
- :quotechar "'")))
- (add-datatype-to-col c (datatype-for-col col))
- (push c cols))))
+ (push (make-ucol col des ref (parse-integer min) (read-from-string av)
+ (parse-integer max) fil dty)
+ cols)))
(nreverse cols)))
-(defun init-custom-cols ()
+(defun gen-ucols-custom ()
"Initialize umls columns for custom columns"
- (let ((cols '()))
- (dolist (customcol +custom-cols+)
- (let ((c (make-umls-col :col (nth 1 customcol)
- :des ""
- :ref 0
- :min 0
- :max (nth 3 customcol)
- :av 0
- :dty nil
- :fil (nth 0 customcol)
- :sqltype (nth 2 customcol)
- :parsefunc #'add-sql-quotes
- :custom-value-func (nth 4 customcol)
- :quotechar "'")))
- (add-datatype-to-col c (datatype-for-col (nth 1 customcol)))
- (push c cols)))
- (nreverse cols)))
-
-(defun escape-column-name (name)
- (substitute #\_ #\/ name))
+ (loop for customcol in +custom-cols+
+ collect
+ (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol)
+ (nth 0 customcol) nil :sqltype (nth 2 customcol)
+ :custom-value-fun (nth 4 customcol))))
-(defun init-generic-cols (col-filename)
+(defun gen-ucols-generic (col-filename)
"Initialize for generic (LEX/NET) columns"
(let ((cols '()))
(with-umls-file (line col-filename)
(destructuring-bind (nam des ref fil) line
(setq nam (escape-column-name nam))
(dolist (file (delimited-string-to-list fil #\,))
- (let ((c (make-umls-col
- :col nam
- :des des
- :ref ref
- :min nil
- :av nil
- :max nil
- :fil file
- :dty nil
- :sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
- :quotechar "'")))
- (add-datatype-to-col c (datatype-for-col nam))
- (push c cols)))))
+ (push
+ (make-ucol nam des ref nil nil nil file nil)
+ cols))))
(nreverse cols)))
-(defun init-umls-files ()
- (setq *umls-files* (append
- (init-generic-files "MRFILES")
- (init-generic-files "LRFIL")
- (init-generic-files "SRFIL")))
- ;; need to separate this since init-custom-files depends on *umls-files*
- (setq *umls-files* (append *umls-files* (init-custom-files))))
+(defun gen-ufiles ()
+ (add-ufiles (gen-ufiles-generic "MRFILES"))
+ (add-ufiles (gen-ufiles-generic "LRFIL"))
+ (add-ufiles (gen-ufiles-generic "SRFIL"))
+ ;; needs to come last
+ (add-ufiles (gen-ufiles-custom)))
-(defun umls-field-string-to-list (fmt)
- "Converts a comma delimited list of fields into a list of field names. Will
-append a unique number (starting at 2) onto a column name that is repeated in the list"
- (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,))
- (col-count (make-hash-table :test 'equal)))
- (dotimes (i (length field-list))
- (declare (fixnum i))
- (let ((col (nth i field-list)))
- (multiple-value-bind (key found) (gethash col col-count)
- (if found
- (let ((next-id (1+ key)))
- (setf (nth i field-list) (concatenate 'string
- col
- (format nil "~D" next-id)))
- (setf (gethash col col-count) next-id))
- (setf (gethash col col-count) 1)))))
- field-list))
-
-(defun init-generic-files (files-filename)
+
+(defun gen-ufiles-generic (files-filename)
"Initialize all LEX file structures"
(let ((files '()))
- (with-umls-file (line files-filename)
- (destructuring-bind (fil des fmt cls rws bts) line
- (let ((f (make-umls-file
- :fil fil
- :table (substitute #\_ #\. fil)
- :des des
- :fmt (escape-column-name fmt)
- :cls (parse-integer cls)
- :rws (parse-integer rws)
- :bts (parse-integer bts)
- :fields (concatenate 'list
- (umls-field-string-to-list fmt)
- (custom-colnames-for-filename fil)))))
- (setf (umls-file-colstructs f) (umls-cols-for-umls-file f))
- (push f files))))
- (nreverse files)))
-
-(defun init-custom-files ()
- (let ((ffile (make-umls-file :fil "MRXW.NONENG"
- :des "Custom NonEnglish Index"
- :table "MRXW_NONENG"
- :cls 5
- :rws 0
- :bts 0
- :fields (umls-file-fields (find-umls-file "MRXW.ENG")))))
- (setf (umls-file-colstructs ffile)
- (umls-cols-for-umls-file ffile))
- (list ffile)))
-
-(defun datatype-for-col (colname)
-"Return datatype for column name"
- (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal))))
+ (with-umls-file (line files-filename)
+ (destructuring-bind (fil des fmt cls rws bts) line
+ (push (make-ufile
+ fil des (substitute #\_ #\. fil) (parse-integer cls)
+ (parse-integer rws) (parse-integer bts)
+ (concatenate 'list (umls-field-string-to-list fmt)
+ (custom-colnames-for-filename fil)))
+ files)))
+ (nreverse files)))
-(defun add-datatype-to-col (col datatype)
-"Add data type information to column"
- (setf (umls-col-datatype col) datatype)
- (case datatype
- (sql-u (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-ui
- (umls-col-quotechar col) ""))
- (sql-s (setf (umls-col-sqltype col) "SMALLINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-l (setf (umls-col-sqltype col) "BIGINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-i (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-f (setf (umls-col-sqltype col) "NUMERIC"
- (umls-col-parsefunc col) #'read-from-string
- (umls-col-quotechar col) ""))
- (t ; Default column type, optimized text storage
- (setf (umls-col-parsefunc col) #'add-sql-quotes
- (umls-col-quotechar col) "'")
- (when (and (umls-col-max col) (umls-col-av col))
- (if (> (umls-col-max col) 255)
- (setf (umls-col-sqltype col) "TEXT")
- (if (< (- (umls-col-max col) (umls-col-av col)) 4)
- (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4
- (setf (umls-col-sqltype col) "VARCHAR")))))))
+(defun gen-ufiles-custom ()
+ (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG"
+ 5 0 0 (fields (find-ufile "MRXW.ENG"))))