X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=parse-rrf.lisp;h=4076dc7c47635615d12b3813d2beb2030922f814;hp=4c617a02655c539da1e370a52de14c7faec1dcdc;hb=08af5459d3ba7d229a8339d242ef742cfe846861;hpb=32de5f079093b0ec4141dc797b3acd805c975b48 diff --git a/parse-rrf.lisp b/parse-rrf.lisp index 4c617a0..4076dc7 100644 --- a/parse-rrf.lisp +++ b/parse-rrf.lisp @@ -2,7 +2,7 @@ ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; -;;;; Name: parse-2002.lisp +;;;; Name: parse-rrf.lisp ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may ;;;; change from year to year ;;;; Author: Kevin M. Rosenberg @@ -40,40 +40,47 @@ (clrhash cuisui-lrl-hash) (clrhash sab-srl-hash)) (setf - pfstr-hash (make-hash-table :size 800000) - cui-lrl-hash (make-hash-table :size 800000) - lui-lrl-hash (make-hash-table :size 1500000) - sui-lrl-hash (make-hash-table :size 1500000) - cuisui-lrl-hash (make-hash-table :size 1800000) + pfstr-hash (make-hash-table :size 1300000) + cui-lrl-hash (make-hash-table :size 1300000) + lui-lrl-hash (make-hash-table :size 4600000) + sui-lrl-hash (make-hash-table :size 5100000) + cuisui-lrl-hash (make-hash-table :size 2000000) sab-srl-hash (make-hash-table :size 100 :test 'equal)))) (defun ensure-preparse (&optional (force-read nil)) (when (and *preparse-hash-init?* (not force-read)) (return-from ensure-preparse 'already-done)) (make-preparse-hash-table) - (with-umls-file (line "MRCONSO.RRF") - (let ((cui (parse-ui (nth 0 line))) - (lui (parse-ui (nth 3 line))) - (sui (parse-ui (nth 5 line))) - (sab (nth 11 line)) - (srl (parse-integer (nth 15 line)))) - (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui - (if (and (string-equal (nth 1 line) "ENG") ; LAT - (string-equal (nth 2 line) "P") ; ts - (string-equal (nth 4 line) "PF")) ; stt + (let ((counter 0)) + (declare (fixnum counter)) + (with-umls-file (line "MRCONSO.RRF") + (let ((cui (parse-ui (nth 0 line))) + (lui (parse-ui (nth 3 line))) + (sui (parse-ui (nth 5 line))) + (sab (nth 11 line)) + (srl (parse-integer (nth 15 line)))) + #+sbcl + (when (= 0 (mod (incf counter) 100000)) (sb-ext:gc :full t)) + + ;; pfstr deprecated by KPKENG field in MRCONSO + #+nil + (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui + (when (and (string-equal (nth 1 line) "ENG") ; LAT + (string-equal (nth 2 line) "P") ; ts + (string-equal (nth 4 line) "PF")) ; stt (setf (gethash cui pfstr-hash) (nth 14 line)))) - (set-lrl-hash cui srl cui-lrl-hash) - (set-lrl-hash lui srl lui-lrl-hash) - (set-lrl-hash sui srl sui-lrl-hash) - (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash) - (multiple-value-bind (val found) (gethash sab sab-srl-hash) - (declare (ignore val)) - (unless found - (setf (gethash sab sab-srl-hash) srl))))) + (set-lrl-hash cui srl cui-lrl-hash) + (set-lrl-hash lui srl lui-lrl-hash) + (set-lrl-hash sui srl sui-lrl-hash) + (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash) + (multiple-value-bind (val found) (gethash sab sab-srl-hash) + (declare (ignore val)) + (unless found + (setf (gethash sab sab-srl-hash) srl)))))) (setq *preparse-hash-init?* t) t) - (defun pfstr-hash (cui) (gethash cui pfstr-hash)) + #+nil (defun pfstr-hash (cui) (gethash cui pfstr-hash)) (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) (defun lui-lrl (lui) (gethash lui lui-lrl-hash)) (defun sui-lrl (sui) (gethash sui sui-lrl-hash)) @@ -100,9 +107,10 @@ (defparameter +col-datatypes+ '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u) ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u) - ("PLUI" sql-u) ("PAUI" sql-u) + ("PLUI" sql-u) ("PAUI" sql-u) ("RUI" sql-u) ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s) ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c) + ("PTR" sql-c) ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u) ("MAPRANK" sql-s) ;;; Custom columns @@ -127,9 +135,16 @@ "Custom tables to create") (defparameter +custom-cols+ - '(("MRCONSO.RRF" "KPFSTR" "TEXT" - (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max) - (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) + '(#+nil ("MRCONSO.RRF" "KPFSTR" "TEXT" + (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max) + (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) + ;; Set to 1 if term is prefered term for english + ("MRCONSO.RRF" "KPFENG" "TINYINT" 0 + (lambda (x) (if (and (string-equal (nth 1 x) "ENG") ; LAT + (string-equal (nth 2 x) "P") ; ts + (string-equal (nth 4 x) "PF")) ; stt + "1" + "0"))) ("MRCONSO.RRF" "KCUISUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0 @@ -140,13 +155,6 @@ (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x)))))) ("MRCONSO.RRF" "KSUILRL" "SMALLINT" 0 (lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x)))))) - ;; Deprecated, last in 2004AA -- skip index - #+ignore - ("MRLO.RRF" "KLRL" "SMALLINT" 0 - (lambda (x) (write-to-string - (if (zerop (length (nth 4 x))) - (cui-lrl (parse-ui (nth 0 x))) - (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) ("MRSTY.RRF" "KLRL" "SMALLINT" 0 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCOC.RRF" "KLRL" "SMALLINT" 0 @@ -159,10 +167,15 @@ (lambda (x) (write-to-string (sab-srl (nth 10 x))))) ("MRRANK.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 1 x))))) + ("MRHIER.RRF" "KSRL" "SMALLINT" 0 + (lambda (x) (write-to-string (sab-srl (nth 4 x))))) + ("MRMAP.RRF" "KSRL" "SMALLINT" 0 + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) + ("MRSMAP.RRF" "KSRL" "SMALLINT" 0 + (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRDEF.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 4 x))))) - ("MRCXT.RRF" "KSRL" "SMALLINT" 0 - (lambda (x) (write-to-string (sab-srl (nth 2 x))))) + #+nil ("MRCXT.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 2 x))))) ("MRXW_ENG.RRF" "KLRL" "SMALLINT" 0 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) @@ -179,12 +192,9 @@ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) - ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 - (lambda (x) (pfstr-hash (parse-ui (nth 4 x))))) - ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 - (lambda (x) (pfstr-hash (parse-ui (nth 2 x))))) - ("MRCXT.RRF" "KCUISUI" "BIGINT" 0 - (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) + + #+nil ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 4 x))))) + #+nil ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x))))) ("MRSAT.RRF" "KCUILUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT.RRF" "KCUISUI" "BIGINT" 0 @@ -206,21 +216,26 @@ (defparameter +index-cols+ '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO") - ("SRL" "MRCONSO") ("AUI" "MRCONSO") - ("SUI" "MRCONSO") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO") - ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT") - ("CUI" "MRSTY") - ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") + ("SRL" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO") + ("SUI" "MRCONSO") ("SAUI" "MRCONSO") ("CODE" "MRCONSO") + ("SCUI" "MRCONSO") + ("CUI" "MRDEF") + ("CUI1" "MRREL") ("CUI2" "MRREL") ("SAB" "MRREL") + ("RUI" "MRREL") ("AUI1" "MRREL") ("AUI2" "MRREL") + ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT") + ("METAUI" "MRSAT") ("ATN" "MRSAT") + ("CUI" "MRSTY") ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") + ("AUI" "MRHIER") ("CUI" "MRHIER") ("CXN" "MRHIER") ("RELA" "MRHIER") ("PAUI" "MRHIER") + ("SAB" "MRHIER") #+ignore ("NSTR" "MRXNS_ENG" 10) ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG") ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") ("KCUILRL" "MRCONSO") - ("KLUILRL" "MRCONSO") ("KCUISUI" "MRCXT") + ("KLUILRL" "MRCONSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT") ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG") - ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK") + ("KSRL" "MRDEF") ("KSRL" "MRRANK") ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") - #+ignore ("KLRL" "MRLO") ;; deprecated ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG") ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG") ;; LEX indices @@ -235,7 +250,7 @@ ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") ("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP") ("MAPSETCUI" "MRSMAP") - ("CUI" "MRHIER") ("AUI" "MRHIER") ("PAUI" "MRHIER")) + ("CUI" "MRHIER")) "Columns in files to index")