From: Kevin M. Rosenberg Date: Mon, 8 Jan 2007 07:53:33 +0000 (+0000) Subject: r11475: add support for SRLUS and LRLUS (USA-based) source restriction levels X-Git-Tag: v2006ac.2~21 X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=commitdiff_plain;h=24e20cbd6508de4fc712696c08a9d7b5fd23219b r11475: add support for SRLUS and LRLUS (USA-based) source restriction levels --- diff --git a/parse-common.lisp b/parse-common.lisp index c9adcf7..96f7b8d 100644 --- a/parse-common.lisp +++ b/parse-common.lisp @@ -277,6 +277,12 @@ append a unique number (starting at 2) onto a column name that is repeated in th (defun canonicalize-column-type (type) (cond + ((string-equal type "TINYINT") + (case *umls-sql-type* + (:mysql "TINYINT") + ((:postgresql :postgresql-socket) "INT1") + (:oracle "NUMBER(3,0)") + (t "INTEGER"))) ((string-equal type "SMALLINT") (case *umls-sql-type* (:mysql "SMALLINT") @@ -332,6 +338,9 @@ append a unique number (starting at 2) onto a column name that is repeated in th (sql-i (setf (sqltype col) (canonicalize-column-type "INTEGER") (parse-fun col) #'parse-integer (quote-str col) "")) + (sql-t (setf (sqltype col) (canonicalize-column-type "TINYINT") + (parse-fun col) #'parse-integer + (quote-str col) "")) (sql-f (setf (sqltype col) (canonicalize-column-type "NUMERIC") (parse-fun col) #'read-from-string (quote-str col) "")) diff --git a/parse-rrf.lisp b/parse-rrf.lisp index 84a4b4b..d54c6b2 100644 --- a/parse-rrf.lisp +++ b/parse-rrf.lisp @@ -23,12 +23,32 @@ (defvar *preparse-hash-init?* nil) (eval-when (:compile-toplevel :load-toplevel :execute) + +(declaim (inline srl-to-srlus)) +(defun srl-to-srlus (srl) + "Convert the standard SRL category to one oriented for use in the United States. +Specifically, SRL 4 in the USA has license restrictions between SRL 1 and 2 when +used in the United States. We create a new scale (SRLUS) where SRL to SRLUS mapping is: +(0->0, 1->1, 4->2, 2->3, 3->4)." + (declare (fixnum srl)) + (cond + ((= srl 4) 2) + ((= srl 2) 3) + ((= srl 3) 4) + (t srl))) + (let ((pfstr-hash nil) ;; Preferred concept strings by CUI (cui-lrl-hash nil) ;; LRL by CUI (lui-lrl-hash nil) ;; LRL by LUI (sui-lrl-hash nil) ;; LRL by SUI (cuisui-lrl-hash nil) ;; LRL by CUISUI - (sab-srl-hash nil)) ;; SRL by SAB + (cui-lrlus-hash nil) ;; LRLUS by CUI + (lui-lrlus-hash nil) ;; LRLUS by LUI + (sui-lrlus-hash nil) ;; LRLUS by SUI + (cuisui-lrlus-hash nil) ;; LRL by CUISUI + + (sab-srl-hash nil) + (sab-srlus-hash nil)) ;; SRL by SAB (defun make-preparse-hash-table () (if sui-lrl-hash @@ -38,14 +58,24 @@ (clrhash lui-lrl-hash) (clrhash sui-lrl-hash) (clrhash cuisui-lrl-hash) - (clrhash sab-srl-hash)) + (clrhash cui-lrlus-hash) + (clrhash lui-lrlus-hash) + (clrhash sui-lrlus-hash) + (clrhash cuisui-lrlus-hash) + (clrhash sab-srl-hash) + (clrhash sab-srlus-hash)) (setf pfstr-hash (make-hash-table :size 1500000) cui-lrl-hash (make-hash-table :size 1500000) lui-lrl-hash (make-hash-table :size 5000000) sui-lrl-hash (make-hash-table :size 6000000) cuisui-lrl-hash (make-hash-table :size 6000000) - sab-srl-hash (make-hash-table :size 200 :test 'equal)))) + cui-lrlus-hash (make-hash-table :size 1500000) + lui-lrlus-hash (make-hash-table :size 5000000) + sui-lrlus-hash (make-hash-table :size 6000000) + cuisui-lrlus-hash (make-hash-table :size 6000000) + sab-srl-hash (make-hash-table :size 200 :test 'equal) + sab-srlus-hash (make-hash-table :size 200 :test 'equal)))) (defun ensure-preparse (&optional (force-read nil)) (when (and *preparse-hash-init?* (not force-read)) @@ -55,15 +85,16 @@ (declare (fixnum counter) (ignorable counter)) (with-umls-file (line "MRCONSO.RRF") - (let ((cui (parse-ui (nth 0 line))) - (lui (parse-ui (nth 3 line))) - (sui (parse-ui (nth 5 line))) - (sab (nth 11 line)) - (srl (parse-integer (nth 15 line)))) + (let* ((cui (parse-ui (nth 0 line))) + (lui (parse-ui (nth 3 line))) + (sui (parse-ui (nth 5 line))) + (sab (nth 11 line)) + (srl (parse-integer (nth 15 line))) + (srlus (srl-to-srlus srl))) #+sbcl (when (= 0 (mod (incf counter) 100000)) (sb-ext:gc :full t)) - ;; pfstr deprecated by KPKENG field in MRCONSO + ;; pfstr deprecated by KPFENG field in MRCONSO #+nil (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui (when (and (string-equal (nth 1 line) "ENG") ; LAT @@ -74,31 +105,49 @@ (set-lrl-hash lui srl lui-lrl-hash) (set-lrl-hash sui srl sui-lrl-hash) (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash) + (set-lrl-hash cui srlus cui-lrlus-hash) + (set-lrl-hash lui srlus lui-lrlus-hash) + (set-lrl-hash sui srlus sui-lrlus-hash) + (set-lrl-hash (make-cuisui cui sui) srlus cuisui-lrlus-hash) (multiple-value-bind (val found) (gethash sab sab-srl-hash) (declare (ignore val)) (unless found - (setf (gethash sab sab-srl-hash) srl)))))) + (setf (gethash sab sab-srl-hash) srl))) + (multiple-value-bind (val found) (gethash sab sab-srlus-hash) + (declare (ignore val)) + (unless found + (setf (gethash sab sab-srlus-hash) srlus)))))) (setq *preparse-hash-init?* t) t) #+nil (defun pfstr-hash (cui) (gethash cui pfstr-hash)) - (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) - (defun lui-lrl (lui) (gethash lui lui-lrl-hash)) - (defun sui-lrl (sui) (gethash sui sui-lrl-hash)) - (defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0)) + (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) + (defun lui-lrl (lui) (gethash lui lui-lrl-hash)) + (defun sui-lrl (sui) (gethash sui sui-lrl-hash)) (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash)) + (defun cui-lrlus (cui) (gethash cui cui-lrlus-hash)) + (defun lui-lrlus (lui) (gethash lui lui-lrlus-hash)) + (defun sui-lrlus (sui) (gethash sui sui-lrlus-hash)) + (defun cuisui-lrlus (cuisui) (gethash cuisui cuisui-lrlus-hash)) + (defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0)) + (defun sab-srlus (sab) (aif (gethash sab sab-srlus-hash) it 0)) )) ;; closure -(defun set-lrl-hash (key lrl hash) + +(defun set-lrl-hash (key srl hash) "Set the least restrictive level in hash table" + (declare (fixnum srl)) (multiple-value-bind (hash-lrl found) (gethash key hash) - (if (or (not found) (< lrl hash-lrl)) - (setf (gethash key hash) lrl)))) + (declare (type (or null fixnum) hash-lrl) + (boolean found)) + (if (or (not found) (< srl hash-lrl)) + (setf (gethash key hash) srl)))) ;; UMLS file and column structures ;;; SQL datatypes symbols ;;; sql-u - Unique identifier +;;; sql-t - Tiny integer (8-bit) ;;; sql-s - Small integer (16-bit) ;;; sql-i - Integer (32-bit) ;;; sql-l - Big integer (64-bit) @@ -109,15 +158,16 @@ '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u) ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u) ("PLUI" sql-u) ("PAUI" sql-u) ("RUI" sql-u) - ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s) + ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c) ("PTR" sql-c) - ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u) + ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-t) ("SUI" sql-u) ("TUI" sql-u) ("MAPRANK" sql-s) ;;; Custom columns - ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i) - ("KSUILRL" sql-i) - ("KSRL" sql-i) ("KLRL" sql-i) + ("KCUISUI" sql-l) ("KCUILUI" sql-l) + ("KSRL" sql-t) ("KSRLUS" sql-t) ("LRL" sql-t) ("LRLUS" sql-t) + ("KCUILRL" sql-t) ("KLUILRL" sql-t) ("KSUILRL" sql-t) ("KLRL" sql-t) + ("KCUILRLUS" sql-t) ("KLUILRLUS" sql-t) ("KSUILRLUS" sql-t) ("KLRLUS" sql-t) ;;; LEX columns ("EUI" sql-u) ("EUI2" sql-u) ;;; Semantic net columns @@ -150,49 +200,94 @@ (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) - ("MRCONSO.RRF" "KCUILRL" "SMALLINT" 0 - (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 0 x)))))) - ("MRCONSO.RRF" "KLUILRL" "SMALLINT" 0 + ("MRCONSO.RRF" "KCUILRL" "TINYINT" 0 + (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) + ("MRCONSO.RRF" "KCUILRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cui-lrlus (parse-ui (nth 0 x)))))) + ("MRCONSO.RRF" "KLUILRL" "TINYINT" 0 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x)))))) - ("MRCONSO.RRF" "KSUILRL" "SMALLINT" 0 + ("MRCONSO.RRF" "KLUILRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (lui-lrlus (parse-ui (nth 3 x)))))) + ("MRCONSO.RRF" "KSUILRL" "TINYINT" 0 (lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x)))))) - ("MRSTY.RRF" "KLRL" "SMALLINT" 0 + ("MRCONSO.RRF" "KSUILRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sui-lrlus (parse-ui (nth 5 x)))))) + ("MRCONSO.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (srl-to-srlus (parse-integer (nth 15 x)))))) + ("MRSAB.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (srl-to-srlus (parse-integer (nth 3 x)))))) + ("MRSTY.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) - ("MRCOC.RRF" "KLRL" "SMALLINT" 0 + ("MRSTY.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cui-lrlus (parse-ui (nth 0 x)))))) + ("MRCOC.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (max (cui-lrl (parse-ui (nth 0 x))) (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) - ("MRSAT.RRF" "KSRL" "SMALLINT" 0 + ("MRCOC.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string + (max (cui-lrlus (parse-ui (nth 0 x))) + (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) + ("MRSAT.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 9 x))))) - ("MRREL.RRF" "KSRL" "SMALLINT" 0 + ("MRSAT.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 9 x))))) + ("MRREL.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 10 x))))) - ("MRRANK.RRF" "KSRL" "SMALLINT" 0 + ("MRREL.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 10 x))))) + ("MRRANK.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 1 x))))) - ("MRHIER.RRF" "KSRL" "SMALLINT" 0 + ("MRRANK.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 1 x))))) + ("MRHIER.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 4 x))))) - ("MRMAP.RRF" "KSRL" "SMALLINT" 0 + ("MRHIER.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 4 x))))) + ("MRMAP.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 1 x))))) - ("MRSMAP.RRF" "KSRL" "SMALLINT" 0 + ("MRMAP.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 1 x))))) + ("MRSMAP.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 1 x))))) - ("MRDEF.RRF" "KSRL" "SMALLINT" 0 + ("MRSMAP.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 1 x))))) + ("MRDEF.RRF" "KSRL" "TINYINT" 0 (lambda (x) (write-to-string (sab-srl (nth 4 x))))) - #+nil ("MRCXT.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 2 x))))) - ("MRXW_ENG.RRF" "KLRL" "SMALLINT" 0 + ("MRDEF.RRF" "KSRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (sab-srlus (nth 4 x))))) + ("MRXW_ENG.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) - ("MRXW_NONENG.RRF" "KLRL" "SMALLINT" 0 + ("MRXW_ENG.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) + ("MRXW_NONENG.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) - ("MRXNW_ENG.RRF" "KLRL" "SMALLINT" 0 + ("MRXW_NONENG.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) + ("MRXNW_ENG.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) - ("MRXNS_ENG.RRF" "KLRL" "SMALLINT" 0 + ("MRXNW_ENG.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) + ("MRXNS_ENG.RRF" "KLRL" "TINYINT" 0 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) + ("MRXNS_ENG.RRF" "KLRLUS" "TINYINT" 0 + (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) #+nil ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 4 x))))) #+nil ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x))))) @@ -217,7 +312,7 @@ (defparameter +index-cols+ '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO") - ("SRL" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO") + ("SRL" "MRCONSO") ("KSRLUS" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO") ("SUI" "MRCONSO") ("SAUI" "MRCONSO") ("CODE" "MRCONSO") ("SCUI" "MRCONSO") ("CUI" "MRDEF") @@ -230,15 +325,18 @@ ("SAB" "MRHIER") #+ignore ("NSTR" "MRXNS_ENG" 10) ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG") - ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") ("KCUILRL" "MRCONSO") - ("KLUILRL" "MRCONSO") + ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") + ("KCUILRL" "MRCONSO") ("KLUILRL" "MRCONSO") ("KSUILRL" "MRCONSO") + ("KCUILRLUS" "MRCONSO") ("KLUILRLUS" "MRCONSO") ("KSUILRLUS" "MRCONSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT") ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG") - ("KSRL" "MRDEF") ("KSRL" "MRRANK") - ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") - ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG") + ("KSRL" "MRDEF") ("KSRL" "MRRANK")("KSRL" "MRREL") ("KSRL" "MRSAT") + ("KSRLUS" "MRDEF") ("KSRLUS" "MRRANK")("KSRLUS" "MRREL") ("KSRLUS" "MRSAT") + ("KLRL" "MRCOC") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG") + ("KLRLUS" "MRCOC") ("KLRLUS" "MRSTY") ("KLRLUS" "MRXW_ENG") ("KLRLUS" "MRXNW_ENG") ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG") + ("KLRLUS" "MRXNS_ENG") ("KLRLUS" "MRXW_NONENG") ;; LEX indices ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD") ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL") @@ -249,7 +347,7 @@ ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR") ("RL" "SRSTR") - ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") + ("SRL" "MRSAB") ("KSRLUS" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") ("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP") ("MAPSETCUI" "MRSMAP") ("CUI" "MRHIER")) "Columns in files to index")