;;;; *************************************************************************
;;;; FILE IDENTIFICATION
;;;;
-;;;; Name: parse-2002.lisp
+;;;; Name: parse-rrf.lisp
;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
;;;; change from year to year
;;;; Author: Kevin M. Rosenberg
(sui-lrl-hash nil) ;; LRL by SUI
(cuisui-lrl-hash nil) ;; LRL by CUISUI
(sab-srl-hash nil)) ;; SRL by SAB
-
+
(defun make-preparse-hash-table ()
(if sui-lrl-hash
(progn
(clrhash cuisui-lrl-hash)
(clrhash sab-srl-hash))
(setf
- pfstr-hash (make-hash-table :size 800000)
- cui-lrl-hash (make-hash-table :size 800000)
- lui-lrl-hash (make-hash-table :size 1500000)
- sui-lrl-hash (make-hash-table :size 1500000)
- cuisui-lrl-hash (make-hash-table :size 1800000)
+ pfstr-hash (make-hash-table :size 1300000)
+ cui-lrl-hash (make-hash-table :size 1300000)
+ lui-lrl-hash (make-hash-table :size 4600000)
+ sui-lrl-hash (make-hash-table :size 5100000)
+ cuisui-lrl-hash (make-hash-table :size 2000000)
sab-srl-hash (make-hash-table :size 100 :test 'equal))))
-
+
(defun ensure-preparse (&optional (force-read nil))
- (when (or force-read (not *preparse-hash-init?*))
- (make-preparse-hash-table))
- (with-umls-file (line "MRCONSO.RRF")
- (let ((cui (parse-ui (nth 0 line)))
- (lui (parse-ui (nth 3 line)))
- (sui (parse-ui (nth 5 line)))
- (sab (nth 11 line))
- (srl (parse-integer (nth 15 line))))
- (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
- (if (and (string-equal (nth 1 line) "ENG") ; LAT
- (string-equal (nth 2 line) "P") ; ts
- (string-equal (nth 4 line) "PF")) ; stt
+ (when (and *preparse-hash-init?* (not force-read))
+ (return-from ensure-preparse 'already-done))
+ (make-preparse-hash-table)
+ (let ((counter 0))
+ (declare (fixnum counter))
+ (with-umls-file (line "MRCONSO.RRF")
+ (let ((cui (parse-ui (nth 0 line)))
+ (lui (parse-ui (nth 3 line)))
+ (sui (parse-ui (nth 5 line)))
+ (sab (nth 11 line))
+ (srl (parse-integer (nth 15 line))))
+ #+sbcl
+ (when (= 0 (mod (incf counter) 100000)) (sb-ext:gc :full t))
+
+ ;; pfstr deprecated by KPKENG field in MRCONSO
+ #+nil
+ (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
+ (when (and (string-equal (nth 1 line) "ENG") ; LAT
+ (string-equal (nth 2 line) "P") ; ts
+ (string-equal (nth 4 line) "PF")) ; stt
(setf (gethash cui pfstr-hash) (nth 14 line))))
- (set-lrl-hash cui srl cui-lrl-hash)
- (set-lrl-hash lui srl lui-lrl-hash)
- (set-lrl-hash sui srl sui-lrl-hash)
- (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
- (multiple-value-bind (val found) (gethash sab sab-srl-hash)
- (declare (ignore val))
- (unless found
- (setf (gethash sab sab-srl-hash) srl)))))
- (setq *preparse-hash-init?* t))
-
- (defun pfstr-hash (cui) (gethash cui pfstr-hash))
+ (set-lrl-hash cui srl cui-lrl-hash)
+ (set-lrl-hash lui srl lui-lrl-hash)
+ (set-lrl-hash sui srl sui-lrl-hash)
+ (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
+ (multiple-value-bind (val found) (gethash sab sab-srl-hash)
+ (declare (ignore val))
+ (unless found
+ (setf (gethash sab sab-srl-hash) srl))))))
+ (setq *preparse-hash-init?* t)
+ t)
+
+ #+nil (defun pfstr-hash (cui) (gethash cui pfstr-hash))
(defun cui-lrl (cui) (gethash cui cui-lrl-hash))
(defun lui-lrl (lui) (gethash lui lui-lrl-hash))
(defun sui-lrl (sui) (gethash sui sui-lrl-hash))
(defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0))
(defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash))
-
+
)) ;; closure
(defun set-lrl-hash (key lrl hash)
(defparameter +col-datatypes+
'(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u)
- ("PLUI" sql-u) ("PAUI" sql-u)
+ ("PLUI" sql-u) ("PAUI" sql-u) ("RUI" sql-u)
("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
+ ("PTR" sql-c)
("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
("MAPRANK" sql-s)
;;; Custom columns
("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
;; New fields for 2004AA
("MAPSETCUI" sql-u)
- )
+ )
"SQL data types for each non-string column")
(defparameter +custom-tables+
"Custom tables to create")
(defparameter +custom-cols+
- '(("MRCONSO.RRF" "KPFSTR" "TEXT"
- (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max)
- (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
+ '(#+nil ("MRCONSO.RRF" "KPFSTR" "TEXT"
+ (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max)
+ (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
+ ;; Set to 1 if term is prefered term for english
+ ("MRCONSO.RRF" "KPFENG" "TINYINT" 0
+ (lambda (x) (if (and (string-equal (nth 1 x) "ENG") ; LAT
+ (string-equal (nth 2 x) "P") ; ts
+ (string-equal (nth 4 x) "PF")) ; stt
+ "1"
+ "0")))
("MRCONSO.RRF" "KCUISUI" "BIGINT" 0
(lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
("MRCONSO.RRF" "KCUILUI" "BIGINT" 0
(lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
("MRCONSO.RRF" "KSUILRL" "SMALLINT" 0
(lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x))))))
- ;; Deprecated, last in 2004AA -- skip index
- #+ignore
- ("MRLO.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string
- (if (zerop (length (nth 4 x)))
- (cui-lrl (parse-ui (nth 0 x)))
- (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
("MRSTY.RRF" "KLRL" "SMALLINT" 0
(lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
("MRCOC.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string
+ (lambda (x) (write-to-string
(max (cui-lrl (parse-ui (nth 0 x)))
(kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
("MRSAT.RRF" "KSRL" "SMALLINT" 0
(lambda (x) (write-to-string (sab-srl (nth 10 x)))))
("MRRANK.RRF" "KSRL" "SMALLINT" 0
(lambda (x) (write-to-string (sab-srl (nth 1 x)))))
+ ("MRHIER.RRF" "KSRL" "SMALLINT" 0
+ (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
+ ("MRMAP.RRF" "KSRL" "SMALLINT" 0
+ (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
+ ("MRSMAP.RRF" "KSRL" "SMALLINT" 0
+ (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
("MRDEF.RRF" "KSRL" "SMALLINT" 0
(lambda (x) (write-to-string (sab-srl (nth 4 x)))))
- ("MRCXT.RRF" "KSRL" "SMALLINT" 0
- (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
+ #+nil ("MRCXT.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
("MRXW_ENG.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXW_NONENG.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXNW_ENG.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
("MRXNS_ENG.RRF" "KLRL" "SMALLINT" 0
- (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
+ (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
(parse-ui (nth 2 x))
(parse-ui (nth 4 x)))))))
- ("MRREL.RRF" "KPFSTR2" "TEXT" 1024
- (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
- ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024
- (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
- ("MRCXT.RRF" "KCUISUI" "BIGINT" 0
- (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
+
+ #+nil ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
+ #+nil ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
("MRSAT.RRF" "KCUILUI" "BIGINT" 0
(lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
("MRSAT.RRF" "KCUISUI" "BIGINT" 0
("MRXW_NONENG.RRF" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
("MRXW_NONENG.RRF" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
("MRXW_NONENG.RRF" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
- ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0
+ ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0
(lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
"Custom columns to create.(filename, col, sqltype, value-func).")
(defparameter +index-cols+
- '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO")
- ("SRL" "MRCONSO") ("AUI" "MRCONSO")
- ("SUI" "MRCONSO") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
- ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
- ("CUI" "MRSTY")
- ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
+ '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO")
+ ("SRL" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO")
+ ("SUI" "MRCONSO") ("SAUI" "MRCONSO") ("CODE" "MRCONSO")
+ ("SCUI" "MRCONSO")
+ ("CUI" "MRDEF")
+ ("CUI1" "MRREL") ("CUI2" "MRREL") ("SAB" "MRREL")
+ ("RUI" "MRREL") ("AUI1" "MRREL") ("AUI2" "MRREL")
+ ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
+ ("METAUI" "MRSAT") ("ATN" "MRSAT")
+ ("CUI" "MRSTY") ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
+ ("AUI" "MRHIER") ("CUI" "MRHIER") ("CXN" "MRHIER") ("RELA" "MRHIER") ("PAUI" "MRHIER")
+ ("SAB" "MRHIER")
#+ignore ("NSTR" "MRXNS_ENG" 10)
("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") ("KCUILRL" "MRCONSO")
- ("KLUILRL" "MRCONSO") ("KCUISUI" "MRCXT")
+ ("KLUILRL" "MRCONSO")
("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT")
- ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
+ ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
- ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK")
- ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
- #+ignore ("KLRL" "MRLO") ;; deprecated
+ ("KSRL" "MRDEF") ("KSRL" "MRRANK")
+ ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
;; LEX indices
("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
- ("BAS" "LRABR")
+ ("BAS" "LRABR")
;; Semantic NET indices
- ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
+ ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
("RL" "SRSTR")
-
+
("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP") ("MAPSETCUI" "MRSMAP")
- ("CUI" "MRHIER") ("AUI" "MRHIER") ("PAUI" "MRHIER"))
+ ("CUI" "MRHIER"))
"Columns in files to index")
(add-ucols (gen-ucols-generic "SRFLD")))
(defun gen-ucols-meta ()
-"Initialize all umls columns"
+"Initialize all umls columns"
(let ((cols '()))
(with-umls-file (line "MRCOLS.RRF")
(destructuring-bind (col des ref min av max fil dty) line
(nreverse cols)))
(defun gen-ucols-custom ()
-"Initialize umls columns for custom columns"
+"Initialize umls columns for custom columns"
(loop for customcol in +custom-cols+
collect
(make-ucol (nth 1 customcol) "" 0 0 0 (eval (nth 3 customcol))
:custom-value-fun (nth 4 customcol))))
(defun gen-ucols-generic (col-filename)
-"Initialize for generic (LEX/NET) columns"
+"Initialize for generic (LEX/NET) columns"
(let ((cols '()))
(with-umls-file (line col-filename)
(destructuring-bind (nam des ref fil) line
;; needs to come last
(add-ufiles (gen-ufiles-custom)))
-
+
(defun gen-ufiles-generic (files-filename dir)
-"Initialize all LEX file structures"
+"Initialize all LEX file structures"
(let ((files '()))
(with-umls-file (line files-filename)
(destructuring-bind (fil des fmt cls rws bts) line
(push (make-ufile
- dir fil des
+ dir fil des
(parse-integer cls)
(parse-integer rws) (parse-integer bts)
(concatenate 'list (umls-field-string-to-list fmt)
(nreverse files)))
(defun gen-ufiles-custom ()
- (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index"
+ (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index"
5 0 0 (fields (find-ufile "MRXW_ENG.RRF"))))