1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: parse-rrf.lisp
6 ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
7 ;;;; change from year to year
8 ;;;; Author: Kevin M. Rosenberg
13 ;;;; This file, part of UMLisp, is
14 ;;;; Copyright (c) 2000-2006 by Kevin M. Rosenberg, M.D.
16 ;;;; UMLisp users are granted the rights to distribute and use this software
17 ;;;; as governed by the terms of the GNU General Public License.
18 ;;;; *************************************************************************
22 ;;; Pre-read data for custom fields into hash tables
23 (defvar *preparse-hash-init?* nil)
25 (eval-when (:compile-toplevel :load-toplevel :execute)
26 (let ((pfstr-hash nil) ;; Preferred concept strings by CUI
27 (cui-lrl-hash nil) ;; LRL by CUI
28 (lui-lrl-hash nil) ;; LRL by LUI
29 (sui-lrl-hash nil) ;; LRL by SUI
30 (cuisui-lrl-hash nil) ;; LRL by CUISUI
31 (sab-srl-hash nil)) ;; SRL by SAB
33 (defun make-preparse-hash-table ()
37 (clrhash cui-lrl-hash)
38 (clrhash lui-lrl-hash)
39 (clrhash sui-lrl-hash)
40 (clrhash cuisui-lrl-hash)
41 (clrhash sab-srl-hash))
43 pfstr-hash (make-hash-table :size 1500000)
44 cui-lrl-hash (make-hash-table :size 1500000)
45 lui-lrl-hash (make-hash-table :size 5000000)
46 sui-lrl-hash (make-hash-table :size 6000000)
47 cuisui-lrl-hash (make-hash-table :size 6000000)
48 sab-srl-hash (make-hash-table :size 200 :test 'equal))))
50 (defun ensure-preparse (&optional (force-read nil))
51 (when (and *preparse-hash-init?* (not force-read))
52 (return-from ensure-preparse 'already-done))
53 (make-preparse-hash-table)
55 (declare (fixnum counter)
57 (with-umls-file (line "MRCONSO.RRF")
58 (let ((cui (parse-ui (nth 0 line)))
59 (lui (parse-ui (nth 3 line)))
60 (sui (parse-ui (nth 5 line)))
62 (srl (parse-integer (nth 15 line))))
64 (when (= 0 (mod (incf counter) 100000)) (sb-ext:gc :full t))
66 ;; pfstr deprecated by KPKENG field in MRCONSO
68 (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
69 (when (and (string-equal (nth 1 line) "ENG") ; LAT
70 (string-equal (nth 2 line) "P") ; ts
71 (string-equal (nth 4 line) "PF")) ; stt
72 (setf (gethash cui pfstr-hash) (nth 14 line))))
73 (set-lrl-hash cui srl cui-lrl-hash)
74 (set-lrl-hash lui srl lui-lrl-hash)
75 (set-lrl-hash sui srl sui-lrl-hash)
76 (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
77 (multiple-value-bind (val found) (gethash sab sab-srl-hash)
78 (declare (ignore val))
80 (setf (gethash sab sab-srl-hash) srl))))))
81 (setq *preparse-hash-init?* t)
84 #+nil (defun pfstr-hash (cui) (gethash cui pfstr-hash))
85 (defun cui-lrl (cui) (gethash cui cui-lrl-hash))
86 (defun lui-lrl (lui) (gethash lui lui-lrl-hash))
87 (defun sui-lrl (sui) (gethash sui sui-lrl-hash))
88 (defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0))
89 (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash))
93 (defun set-lrl-hash (key lrl hash)
94 "Set the least restrictive level in hash table"
95 (multiple-value-bind (hash-lrl found) (gethash key hash)
96 (if (or (not found) (< lrl hash-lrl))
97 (setf (gethash key hash) lrl))))
99 ;; UMLS file and column structures
100 ;;; SQL datatypes symbols
101 ;;; sql-u - Unique identifier
102 ;;; sql-s - Small integer (16-bit)
103 ;;; sql-i - Integer (32-bit)
104 ;;; sql-l - Big integer (64-bit)
105 ;;; sql-f - Floating point
106 ;;; sql-c - Character data
108 (defparameter +col-datatypes+
109 '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
110 ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u)
111 ("PLUI" sql-u) ("PAUI" sql-u) ("RUI" sql-u)
112 ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
113 ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
115 ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
118 ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i)
120 ("KSRL" sql-i) ("KLRL" sql-i)
122 ("EUI" sql-u) ("EUI2" sql-u)
123 ;;; Semantic net columns
124 ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
125 ;; New fields for 2002AD
126 ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
127 ;; New fields for 2004AA
130 "SQL data types for each non-string column")
132 (defparameter +custom-tables+
135 '(("KCON" "SELECT CUI,STR FROM MRCONSO WHERE STT='PF' AND TS='P' AND ISPREF='Y' AND LAT='ENG'"))
136 "Custom tables to create")
138 (defparameter +custom-cols+
139 '(#+nil ("MRCONSO.RRF" "KPFSTR" "TEXT"
140 (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max)
141 (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
142 ;; Set to 1 if term is prefered term for english
143 ("MRCONSO.RRF" "KPFENG" "TINYINT" 0
144 (lambda (x) (if (and (string-equal (nth 1 x) "ENG") ; LAT
145 (string-equal (nth 2 x) "P") ; ts
146 (string-equal (nth 4 x) "PF")) ; stt
149 ("MRCONSO.RRF" "KCUISUI" "BIGINT" 0
150 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
151 ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0
152 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
153 ("MRCONSO.RRF" "KCUILRL" "SMALLINT" 0
154 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 0 x))))))
155 ("MRCONSO.RRF" "KLUILRL" "SMALLINT" 0
156 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
157 ("MRCONSO.RRF" "KSUILRL" "SMALLINT" 0
158 (lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x))))))
159 ("MRSTY.RRF" "KLRL" "SMALLINT" 0
160 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
161 ("MRCOC.RRF" "KLRL" "SMALLINT" 0
162 (lambda (x) (write-to-string
163 (max (cui-lrl (parse-ui (nth 0 x)))
164 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
165 ("MRSAT.RRF" "KSRL" "SMALLINT" 0
166 (lambda (x) (write-to-string (sab-srl (nth 9 x)))))
167 ("MRREL.RRF" "KSRL" "SMALLINT" 0
168 (lambda (x) (write-to-string (sab-srl (nth 10 x)))))
169 ("MRRANK.RRF" "KSRL" "SMALLINT" 0
170 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
171 ("MRHIER.RRF" "KSRL" "SMALLINT" 0
172 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
173 ("MRMAP.RRF" "KSRL" "SMALLINT" 0
174 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
175 ("MRSMAP.RRF" "KSRL" "SMALLINT" 0
176 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
177 ("MRDEF.RRF" "KSRL" "SMALLINT" 0
178 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
179 #+nil ("MRCXT.RRF" "KSRL" "SMALLINT" 0 (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
180 ("MRXW_ENG.RRF" "KLRL" "SMALLINT" 0
181 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
183 (parse-ui (nth 4 x)))))))
184 ("MRXW_NONENG.RRF" "KLRL" "SMALLINT" 0
185 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
187 (parse-ui (nth 4 x)))))))
188 ("MRXNW_ENG.RRF" "KLRL" "SMALLINT" 0
189 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
191 (parse-ui (nth 4 x)))))))
192 ("MRXNS_ENG.RRF" "KLRL" "SMALLINT" 0
193 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
195 (parse-ui (nth 4 x)))))))
197 #+nil ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
198 #+nil ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
199 ("MRSAT.RRF" "KCUILUI" "BIGINT" 0
200 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
201 ("MRSAT.RRF" "KCUISUI" "BIGINT" 0
202 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
203 ("MRXW_ENG.RRF" "KCUISUI" "BIGINT" 0
204 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
205 ("MRXNW_ENG.RRF" "KCUISUI" "BIGINT" 0
206 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
207 ("MRXNS_ENG.RRF" "KCUISUI" "BIGINT" 0
208 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
209 ("MRXW_NONENG.RRF" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
210 ("MRXW_NONENG.RRF" "WD" "VARCHAR" 200 (lambda (x) (nth 1 x)))
211 ("MRXW_NONENG.RRF" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
212 ("MRXW_NONENG.RRF" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
213 ("MRXW_NONENG.RRF" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
214 ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0
215 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
216 "Custom columns to create.(filename, col, sqltype, value-func).")
218 (defparameter +index-cols+
219 '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO")
220 ("SRL" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO")
221 ("SUI" "MRCONSO") ("SAUI" "MRCONSO") ("CODE" "MRCONSO")
224 ("CUI1" "MRREL") ("CUI2" "MRREL") ("SAB" "MRREL")
225 ("RUI" "MRREL") ("AUI1" "MRREL") ("AUI2" "MRREL")
226 ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
227 ("METAUI" "MRSAT") ("ATN" "MRSAT")
228 ("CUI" "MRSTY") ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
229 ("AUI" "MRHIER") ("CUI" "MRHIER") ("CXN" "MRHIER") ("RELA" "MRHIER") ("PAUI" "MRHIER")
231 #+ignore ("NSTR" "MRXNS_ENG" 10)
232 ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
233 ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO") ("KCUILRL" "MRCONSO")
234 ("KLUILRL" "MRCONSO")
235 ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT")
236 ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
237 ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
238 ("KSRL" "MRDEF") ("KSRL" "MRRANK")
239 ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
240 ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
241 ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
243 ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
244 ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
245 ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
247 ;; Semantic NET indices
248 ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
249 ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
252 ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
253 ("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP") ("MAPSETCUI" "MRSMAP")
255 "Columns in files to index")
258 (defparameter +custom-index-cols+
261 '(("CUI" "KCON") ("LRL" "KCON"))
262 "Indexes to custom tables")
264 ;; File & Column functions
267 (add-ucols (gen-ucols-meta))
268 (add-ucols (gen-ucols-custom))
269 (add-ucols (gen-ucols-generic "LRFLD"))
270 (add-ucols (gen-ucols-generic "SRFLD")))
272 (defun gen-ucols-meta ()
273 "Initialize all umls columns"
275 (with-umls-file (line "MRCOLS.RRF")
276 (destructuring-bind (col des ref min av max fil dty) line
277 (push (make-ucol col des ref (parse-integer min) (read-from-string av)
278 (parse-integer max) fil dty)
282 (defun gen-ucols-custom ()
283 "Initialize umls columns for custom columns"
284 (loop for customcol in +custom-cols+
286 (make-ucol (nth 1 customcol) "" 0 0 0 (eval (nth 3 customcol))
287 (nth 0 customcol) nil :sqltype (canonicalize-column-type (nth 2 customcol))
288 :custom-value-fun (nth 4 customcol))))
290 (defun gen-ucols-generic (col-filename)
291 "Initialize for generic (LEX/NET) columns"
293 (with-umls-file (line col-filename)
294 (destructuring-bind (nam des ref fil) line
295 (setq nam (escape-column-name nam))
296 (dolist (file (delimited-string-to-list fil #\,))
298 (make-ucol nam des ref nil nil nil file nil)
304 (add-ufiles (gen-ufiles-generic "MRFILES.RRF" "META"))
305 (add-ufiles (gen-ufiles-generic "LRFIL" "LEX"))
306 (add-ufiles (gen-ufiles-generic "SRFIL" "NET"))
307 ;; needs to come last
308 (add-ufiles (gen-ufiles-custom)))
311 (defun gen-ufiles-generic (files-filename dir)
312 "Initialize generic UMLS file structures"
314 (with-umls-file (line files-filename)
315 (destructuring-bind (fil des fmt cls rws bts) line
319 (parse-integer rws) (parse-integer bts)
320 (concatenate 'list (umls-field-string-to-list fmt)
321 (custom-colnames-for-filename fil)))
325 (defun gen-ufiles-custom ()
326 (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index"
327 5 0 0 (fields (find-ufile "MRXW_ENG.RRF"))))