X-Git-Url: http://git.kpe.io/?p=umlisp-orf.git;a=blobdiff_plain;f=parse-2002.lisp;h=153b85e797243b1b0a171cc9bc6e9609aca60e75;hp=811d56905733b4ea67d89bbee25e2461163b223d;hb=3c963bdf7389ec0d00b893fb7b7757ab884f3222;hpb=d8fe27c58aa49f4a19f8b0dc11f97e0db7662e9e diff --git a/parse-2002.lisp b/parse-2002.lisp index 811d569..153b85e 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -28,80 +28,80 @@ (lui-lrl-hash nil) ;;; LRL by LUI (cuisui-lrl-hash nil) ;;; LRL by CUISUI (sab-srl-hash nil)) ;;; SRL by SAB - + (defun make-preparse-hash-table () (if pfstr-hash - (progn - (clrhash pfstr-hash) - (clrhash cui-lrl-hash) - (clrhash lui-lrl-hash) - (clrhash cuisui-lrl-hash) - (clrhash sab-srl-hash)) + (progn + (clrhash pfstr-hash) + (clrhash cui-lrl-hash) + (clrhash lui-lrl-hash) + (clrhash cuisui-lrl-hash) + (clrhash sab-srl-hash)) (setf - pfstr-hash (make-hash-table :size 800000) - cui-lrl-hash (make-hash-table :size 800000) - lui-lrl-hash (make-hash-table :size 1500000) - cuisui-lrl-hash (make-hash-table :size 1800000) - sab-srl-hash (make-hash-table :size 100 :test 'equal)))) - + pfstr-hash (make-hash-table :size 800000) + cui-lrl-hash (make-hash-table :size 800000) + lui-lrl-hash (make-hash-table :size 1500000) + cuisui-lrl-hash (make-hash-table :size 1800000) + sab-srl-hash (make-hash-table :size 100 :test 'equal)))) + (defun buffered-ensure-preparse (&optional (force-read nil)) (when (or force-read (not *preparse-hash-init?*)) (make-preparse-hash-table) (setq *preparse-hash-init?* t)) (with-buffered-umls-file (line "MRCON") (let ((cui (parse-ui (aref line 0))) - (lui (parse-ui (aref line 3))) - (sui (parse-ui (aref line 5))) - (lrl (parse-integer (aref line 7)))) - (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui - (if (and (string-equal (aref line 1) "ENG") ; LAT - (string-equal (aref line 2) "P") ; ts - (string-equal (aref line 4) "PF")) ; stt - (setf (gethash cui pfstr-hash) (aref line 6)))) - (set-lrl-hash cui lrl cui-lrl-hash) - (set-lrl-hash lui lrl lui-lrl-hash) - (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) + (lui (parse-ui (aref line 3))) + (sui (parse-ui (aref line 5))) + (lrl (parse-integer (aref line 7)))) + (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui + (if (and (string-equal (aref line 1) "ENG") ; LAT + (string-equal (aref line 2) "P") ; ts + (string-equal (aref line 4) "PF")) ; stt + (setf (gethash cui pfstr-hash) (aref line 6)))) + (set-lrl-hash cui lrl cui-lrl-hash) + (set-lrl-hash lui lrl lui-lrl-hash) + (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) (with-buffered-umls-file (line "MRSO") (let ((sab (aref line 3))) - (unless (gethash sab sab-srl-hash) ;; if haven't stored - (setf (gethash sab sab-srl-hash) (aref line 6)))))) - + (unless (gethash sab sab-srl-hash) ;; if haven't stored + (setf (gethash sab sab-srl-hash) (aref line 6)))))) + (defun ensure-preparse (&optional (force-read nil)) (when (or force-read (not *preparse-hash-init?*)) (make-preparse-hash-table) (setq *preparse-hash-init?* t)) (with-umls-file (line "MRCON") (let ((cui (parse-ui (nth 0 line))) - (lui (parse-ui (nth 3 line))) - (sui (parse-ui (nth 5 line))) - (lrl (parse-integer (nth 7 line)))) - (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui - (if (and (string-equal (nth 1 line) "ENG") ; LAT - (string-equal (nth 2 line) "P") ; ts - (string-equal (nth 4 line) "PF")) ; stt - (setf (gethash cui pfstr-hash) (nth 6 line)))) - (set-lrl-hash cui lrl cui-lrl-hash) - (set-lrl-hash lui lrl lui-lrl-hash) - (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) + (lui (parse-ui (nth 3 line))) + (sui (parse-ui (nth 5 line))) + (lrl (parse-integer (nth 7 line)))) + (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui + (if (and (string-equal (nth 1 line) "ENG") ; LAT + (string-equal (nth 2 line) "P") ; ts + (string-equal (nth 4 line) "PF")) ; stt + (setf (gethash cui pfstr-hash) (nth 6 line)))) + (set-lrl-hash cui lrl cui-lrl-hash) + (set-lrl-hash lui lrl lui-lrl-hash) + (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) (with-umls-file (line "MRSO") (let ((sab (nth 3 line))) - (multiple-value-bind (val found) (gethash sab sab-srl-hash) - (declare (ignore val)) - (unless found - (setf (gethash sab sab-srl-hash) (parse-integer (nth 6 line)))))))) - + (multiple-value-bind (val found) (gethash sab sab-srl-hash) + (declare (ignore val)) + (unless found + (setf (gethash sab sab-srl-hash) (parse-integer (nth 6 line)))))))) + (defun pfstr-hash (cui) (gethash cui pfstr-hash)) - + (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) - + (defun lui-lrl (lui) (gethash lui lui-lrl-hash)) - + (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash)) - + (defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0)) )) ;; closure @@ -110,7 +110,7 @@ "Set the least restrictive level in hash table" (multiple-value-bind (hash-lrl found) (gethash key hash) (if (or (not found) (< lrl hash-lrl)) - (setf (gethash key hash) lrl)))) + (setf (gethash key hash) lrl)))) ;; UMLS file and column structures ;;; SQL datatypes symbols @@ -135,7 +135,7 @@ ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u) ;; New fields for 2002AD ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i) - ) + ) "SQL data types for each non-string column") (defparameter +custom-tables+ @@ -147,7 +147,7 @@ (defparameter +custom-cols+ '(("MRCON" "KPFSTR" "TEXT" 1024 - (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) + (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) ("MRCON" "KCUISUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCON" "KCUILUI" "BIGINT" 0 @@ -157,16 +157,16 @@ ("MRCON" "KLUILRL" "INTEGER" 0 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x)))))) ("MRLO" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string - (if (zerop (length (nth 4 x))) - (cui-lrl (parse-ui (nth 0 x))) - (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) + (lambda (x) (write-to-string + (if (zerop (length (nth 4 x))) + (cui-lrl (parse-ui (nth 0 x))) + (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) ("MRSTY" "KLRL" "INTEGER" 0 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x)))))) ("MRCOC" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string - (max (cui-lrl (parse-ui (nth 0 x))) - (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) + (lambda (x) (write-to-string + (max (cui-lrl (parse-ui (nth 0 x))) + (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) ("MRSAT" "KSRL" "INTEGER" 0 (lambda (x) (write-to-string (sab-srl (nth 5 x))))) ("MRREL" "KSRL" "INTEGER" 0 @@ -180,26 +180,26 @@ ("MRATX" "KSRL" "INTEGER" 0 (lambda (x) (write-to-string (sab-srl (nth 1 x))))) ("MRXW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string (cuisui-lrl (make-cuisui - (parse-ui (nth 2 x)) - (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) ("MRXW.NONENG" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string (cuisui-lrl (make-cuisui - (parse-ui (nth 2 x)) - (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) ("MRXNW.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string (cuisui-lrl (make-cuisui - (parse-ui (nth 2 x)) - (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) ("MRXNS.ENG" "KLRL" "INTEGER" 0 - (lambda (x) (write-to-string (cuisui-lrl (make-cuisui - (parse-ui (nth 2 x)) - (parse-ui (nth 4 x))))))) + (lambda (x) (write-to-string (cuisui-lrl (make-cuisui + (parse-ui (nth 2 x)) + (parse-ui (nth 4 x))))))) ("MRREL" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x))))) ("MRCOC" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 1 x))))) - ("MRCXT" "KCUISUI" "BIGINT" 0 + ("MRCXT" "KCUISUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUILUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) @@ -218,35 +218,35 @@ ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x))))) ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x))))) ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x))))) - ("MRXW.NONENG" "KCUISUI" "BIGINT" 0 + ("MRXW.NONENG" "KCUISUI" "BIGINT" 0 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) "Custom columns to create.(filename, col, sqltype, value-func).") (defparameter +index-cols+ - '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON") + '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON") ("LRL" "MRCON") ("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO") ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT") ("CUI" "MRSO") ("SAB" "MRSO") ("SRL" "MRSO") ("CUI" "MRSTY") - ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") + ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") #+ignore ("NSTR" "MRXNS_ENG" 10) ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG") ("KCUISUI" "MRCON") ("KCUILUI" "MRCON") ("KCUILRL" "MRCON") - ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT") + ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT") ("KCUISUI" "MRSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT") - ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") + ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG") - ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK") - ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") + ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK") + ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") ("KLRL" "MRLO") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG") ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG") ;; LEX indices ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD") ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL") ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD") - ("BAS" "LRABR") + ("BAS" "LRABR") ;; Semantic NET indices - ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") + ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR") ("RL" "SRSTR") ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB") @@ -269,33 +269,33 @@ (add-ucols (gen-ucols-generic "SRFLD"))) (defun gen-ucols-meta () -"Initialize all umls columns" +"Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (push (make-ucol col des ref (parse-integer min) (read-from-string av) - (parse-integer max) fil dty) - cols))) + (push (make-ucol col des ref (parse-integer min) (read-from-string av) + (parse-integer max) fil dty) + cols))) (nreverse cols))) (defun gen-ucols-custom () -"Initialize umls columns for custom columns" +"Initialize umls columns for custom columns" (loop for customcol in +custom-cols+ - collect - (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) - (nth 0 customcol) nil :sqltype (nth 2 customcol) - :custom-value-fun (nth 4 customcol)))) + collect + (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) + (nth 0 customcol) nil :sqltype (nth 2 customcol) + :custom-value-fun (nth 4 customcol)))) (defun gen-ucols-generic (col-filename) -"Initialize for generic (LEX/NET) columns" +"Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line - (setq nam (escape-column-name nam)) - (dolist (file (delimited-string-to-list fil #\,)) - (push - (make-ucol nam des ref nil nil nil file nil) - cols)))) + (setq nam (escape-column-name nam)) + (dolist (file (delimited-string-to-list fil #\,)) + (push + (make-ucol nam des ref nil nil nil file nil) + cols)))) (nreverse cols))) @@ -306,23 +306,23 @@ ;; needs to come last (add-ufiles (gen-ufiles-custom))) - + (defun gen-ufiles-generic (files-filename) -"Initialize all LEX file structures" +"Initialize all LEX file structures" (let ((files '())) (with-umls-file (line files-filename) (destructuring-bind (fil des fmt cls rws bts) line - (push (make-ufile - fil des (substitute #\_ #\. fil) (parse-integer cls) - (parse-integer rws) (parse-integer bts) - (concatenate 'list (umls-field-string-to-list fmt) - (custom-colnames-for-filename fil))) - files))) + (push (make-ufile + fil des (substitute #\_ #\. fil) (parse-integer cls) + (parse-integer rws) (parse-integer bts) + (concatenate 'list (umls-field-string-to-list fmt) + (custom-colnames-for-filename fil))) + files))) (nreverse files))) (defun gen-ufiles-custom () (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG" - 5 0 0 (fields (find-ufile "MRXW.ENG")))) + 5 0 0 (fields (find-ufile "MRXW.ENG"))))