X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;f=parse-2002.lisp;h=aa9c46a6c423dc0bfb00c7f5eea4b4601484f0b7;hb=aeade16272b79115d3f307906c7a3e9597137e97;hp=4024eb2d32a824d5646c16ea2d0a785a746a9353;hpb=e426167f52dd029869b5db3cf36b51124312d7d4;p=umlisp.git diff --git a/parse-2002.lisp b/parse-2002.lisp index 4024eb2..aa9c46a 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -2,16 +2,16 @@ ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; -;;;; Name: parse-2002.lisp -;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may -;;;; change from year to year -;;;; Author: Kevin M. Rosenberg -;;;; Date Started: Apr 2000 +;;;; Name: parse-2002.lisp +;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may +;;;; change from year to year +;;;; Author: Kevin M. Rosenberg +;;;; Created: Apr 2000 ;;;; -;;;; $Id: parse-2002.lisp,v 1.9 2003/05/06 07:55:15 kevin Exp $ +;;;; $Id$ ;;;; ;;;; This file, part of UMLisp, is -;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. +;;;; Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D. ;;;; ;;;; UMLisp users are granted the rights to distribute and use this software ;;;; as governed by the terms of the GNU General Public License. @@ -19,11 +19,8 @@ (in-package #:umlisp) -(eval-when (:compile-toplevel) - (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))) - ;;; Pre-read data for custom fields into hash tables -(defvar *parse-hash-init?* nil) +(defvar *preparse-hash-init?* nil) (eval-when (:compile-toplevel :load-toplevel :execute) (let ((pfstr-hash nil) ;;; Preferred concept strings by CUI @@ -32,7 +29,7 @@ (cuisui-lrl-hash nil) ;;; LRL by CUISUI (sab-srl-hash nil)) ;;; SRL by SAB - (defun make-parse-hash-table () + (defun make-preparse-hash-table () (if pfstr-hash (progn (clrhash pfstr-hash) @@ -47,10 +44,10 @@ cuisui-lrl-hash (make-hash-table :size 1800000) sab-srl-hash (make-hash-table :size 100 :test 'equal)))) - (defun binit-hash-table (&optional (force-read nil)) - (when (or force-read (not *parse-hash-init?*)) - (make-parse-hash-table) - (setq *parse-hash-init?* t)) + (defun buffered-ensure-preparse (&optional (force-read nil)) + (when (or force-read (not *preparse-hash-init?*)) + (make-preparse-hash-table) + (setq *preparse-hash-init?* t)) (with-buffered-umls-file (line "MRCON") (let ((cui (parse-ui (aref line 0))) (lui (parse-ui (aref line 3))) @@ -69,10 +66,10 @@ (unless (gethash sab sab-srl-hash) ;; if haven't stored (setf (gethash sab sab-srl-hash) (aref line 6)))))) - (defun init-hash-table (&optional (force-read nil)) - (when (or force-read (not *parse-hash-init?*)) - (make-parse-hash-table) - (setq *parse-hash-init?* t)) + (defun ensure-preparse (&optional (force-read nil)) + (when (or force-read (not *preparse-hash-init?*)) + (make-preparse-hash-table) + (setq *preparse-hash-init?* t)) (with-umls-file (line "MRCON") (let ((cui (parse-ui (nth 0 line))) (lui (parse-ui (nth 3 line))) @@ -106,7 +103,7 @@ (gethash cuisui cuisui-lrl-hash)) (defun sab-srl (sab) - (kmrcl:aif (gethash sab sab-srl-hash) kmrcl::it 0)) + (aif (gethash sab sab-srl-hash) it 0)) )) ;; closure (defun set-lrl-hash (key lrl hash) @@ -122,11 +119,12 @@ ;;; sql-i - Integer (32-bit) ;;; sql-l - Big integer (64-bit) ;;; sql-f - Floating point +;;; sql-c - Character data (defparameter +col-datatypes+ '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u) ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s) - ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-s) + ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c) ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u) ;;; Custom columns ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i) @@ -254,6 +252,7 @@ ("VCUI" "MRSAB") ("LAT" "MRSAB")) "Columns in files to index") + (defparameter +custom-index-cols+ nil #+ignore @@ -262,167 +261,67 @@ ;; File & Column functions -(defun init-umls (&optional (alwaysclear nil)) -"Initialize all UMLS file and column structures if not already initialized" - (when (or alwaysclear (null *umls-files*)) - (init-umls-cols) - (init-umls-files) - (init-field-lengths))) - -(defun init-umls-cols () - (setq *umls-cols* (append - (init-meta-cols) - (init-custom-cols) - (init-generic-cols "LRFLD") - (init-generic-cols "SRFLD")))) +(defun gen-ucols () + (add-ucols (gen-ucols-meta)) + (add-ucols (gen-ucols-custom)) + (add-ucols (gen-ucols-generic "LRFLD")) + (add-ucols (gen-ucols-generic "SRFLD"))) -(defun init-meta-cols () +(defun gen-ucols-meta () "Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (let ((c (make-instance 'ucol - :col col - :des des - :ref ref - :min (parse-integer min) - :av (read-from-string av) - :max (parse-integer max) - :fil fil - :dty dty ;; new in 2002 UMLS - :sqltype "VARCHAR" ; default data type - :parse-fun #'add-sql-quotes - :custom-value-fun nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col col)) - (push c cols)))) + (push (make-ucol col des ref (parse-integer min) (read-from-string av) + (parse-integer max) fil dty) + cols))) (nreverse cols))) -(defun init-custom-cols () +(defun gen-ucols-custom () "Initialize umls columns for custom columns" - (let ((cols '())) - (dolist (customcol +custom-cols+) - (let ((c (make-instance 'ucol - :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol) - :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol) - :parse-fun #'add-sql-quotes :custom-value-fun (nth 4 customcol) - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) - (push c cols))) - (nreverse cols))) + (loop for customcol in +custom-cols+ + collect + (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) + (nth 0 customcol) nil :sqltype (nth 2 customcol) + :custom-value-fun (nth 4 customcol)))) -(defun escape-column-name (name) - (substitute #\_ #\/ name)) - -(defun init-generic-cols (col-filename) +(defun gen-ucols-generic (col-filename) "Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) - (let ((c (make-instance 'ucol - :col nam - :des des - :ref ref - :min nil - :av nil - :max nil - :fil file - :dty nil - :sqltype "VARCHAR" ; default data type - :parse-fun #'add-sql-quotes - :custom-value-fun nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col nam)) - (push c cols))))) + (push + (make-ucol nam des ref nil nil nil file nil) + cols)))) (nreverse cols))) -(defun init-umls-files () - (setq *umls-files* (append - (init-generic-files "MRFILES") - (init-generic-files "LRFIL") - (init-generic-files "SRFIL"))) - ;; need to separate this since init-custom-files depends on *umls-files* - (setq *umls-files* (append *umls-files* (init-custom-files)))) - -(defun umls-field-string-to-list (fmt) - "Converts a comma delimited list of fields into a list of field names. Will -append a unique number (starting at 2) onto a column name that is repeated in the list" - (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,)) - (col-count (make-hash-table :test 'equal))) - (dotimes (i (length field-list)) - (declare (fixnum i)) - (let ((col (nth i field-list))) - (multiple-value-bind (key found) (gethash col col-count) - (if found - (let ((next-id (1+ key))) - (setf (nth i field-list) (concatenate 'string col (write-to-string next-id))) - (setf (gethash col col-count) next-id)) - (setf (gethash col col-count) 1))))) - field-list)) +(defun gen-ufiles () + (add-ufiles (gen-ufiles-generic "MRFILES")) + (add-ufiles (gen-ufiles-generic "LRFIL")) + (add-ufiles (gen-ufiles-generic "SRFIL")) + ;; needs to come last + (add-ufiles (gen-ufiles-custom))) -(defun init-generic-files (files-filename) + +(defun gen-ufiles-generic (files-filename) "Initialize all LEX file structures" (let ((files '())) - (with-umls-file (line files-filename) - (destructuring-bind (fil des fmt cls rws bts) line - (let ((f (make-instance 'ufile - :fil fil - :table (substitute #\_ #\. fil) - :des des - :fmt (escape-column-name fmt) - :cls (parse-integer cls) - :rws (parse-integer rws) - :bts (parse-integer bts) - :fields (concatenate 'list - (umls-field-string-to-list fmt) - (custom-colnames-for-filename fil))))) - (setf (ucols f) (ucols-for-ufile f)) - (push f files)))) - (nreverse files))) - -(defun init-custom-files () - (let ((ffile (make-instance 'ufile - :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG" - :cls 5 :rws 0 :bts 0 :fields (fields (find-ufile "MRXW.ENG"))))) - (setf (ucols ffile) - (ucols-for-ufile ffile)) - (list ffile))) - -(defun datatype-for-col (colname) -"Return datatype for column name" - (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal)))) + (with-umls-file (line files-filename) + (destructuring-bind (fil des fmt cls rws bts) line + (push (make-ufile + fil des (substitute #\_ #\. fil) (parse-integer cls) + (parse-integer rws) (parse-integer bts) + (concatenate 'list (umls-field-string-to-list fmt) + (custom-colnames-for-filename fil))) + files))) + (nreverse files))) -(defun add-datatype-to-col (col datatype) -"Add data type information to column" - (setf (datatype col) datatype) - (case datatype - (sql-u (setf (sqltype col) "INTEGER" - (parse-fun col) #'parse-ui - (quotechar col) "")) - (sql-s (setf (sqltype col) "SMALLINT" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-l (setf (sqltype col) "BIGINT" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-i (setf (sqltype col) "INTEGER" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-f (setf (sqltype col) "NUMERIC" - (parse-fun col) #'read-from-string - (quotechar col) "")) - (t ; Default column type, optimized text storage - (setf (parse-fun col) #'add-sql-quotes - (quotechar col) "'") - (when (and (cmax col) (av col)) - (if (> (cmax col) 255) - (setf (sqltype col) "TEXT") - (if (< (- (cmax col) (av col)) 4) - (setf (sqltype col) "CHAR") ; if average bytes wasted < 4 - (setf (sqltype col) "VARCHAR"))))))) +(defun gen-ufiles-custom () + (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG" + 5 0 0 (fields (find-ufile "MRXW.ENG"))))