1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: parse-rrf.lisp
6 ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
7 ;;;; change from year to year
8 ;;;; Author: Kevin M. Rosenberg
13 ;;;; This file, part of UMLisp, is
14 ;;;; Copyright (c) 2000-2006 by Kevin M. Rosenberg, M.D.
16 ;;;; UMLisp users are granted the rights to distribute and use this software
17 ;;;; as governed by the terms of the GNU General Public License.
18 ;;;; *************************************************************************
22 ;;; Pre-read data for custom fields into hash tables
23 (defvar *preparse-hash-init?* nil)
25 (eval-when (:compile-toplevel :load-toplevel :execute)
27 (declaim (inline srl-to-srlus))
28 (defun srl-to-srlus (srl)
29 "Convert the standard SRL category to one oriented for use in the United States.
30 Specifically, SRL 4 in the USA has license restrictions between SRL 1 and 2 when
31 used in the United States. We create a new scale (SRLUS) where SRL to SRLUS mapping is:
32 (0->0, 1->1, 4->2, 2->3, 3->4)."
33 (declare (fixnum srl))
40 (let ((pfstr-hash nil) ;; Preferred concept strings by CUI
41 (cui-lrl-hash nil) ;; LRL by CUI
42 (lui-lrl-hash nil) ;; LRL by LUI
43 (sui-lrl-hash nil) ;; LRL by SUI
44 (cuisui-lrl-hash nil) ;; LRL by CUISUI
45 (cui-lrlus-hash nil) ;; LRLUS by CUI
46 (lui-lrlus-hash nil) ;; LRLUS by LUI
47 (sui-lrlus-hash nil) ;; LRLUS by SUI
48 (cuisui-lrlus-hash nil) ;; LRL by CUISUI
51 (sab-srlus-hash nil)) ;; SRL by SAB
53 (defun make-preparse-hash-table ()
57 (clrhash cui-lrl-hash)
58 (clrhash lui-lrl-hash)
59 (clrhash sui-lrl-hash)
60 (clrhash cuisui-lrl-hash)
61 (clrhash cui-lrlus-hash)
62 (clrhash lui-lrlus-hash)
63 (clrhash sui-lrlus-hash)
64 (clrhash cuisui-lrlus-hash)
65 (clrhash sab-srl-hash)
66 (clrhash sab-srlus-hash))
68 pfstr-hash (make-hash-table :size 1500000)
69 cui-lrl-hash (make-hash-table :size 1500000)
70 lui-lrl-hash (make-hash-table :size 5000000)
71 sui-lrl-hash (make-hash-table :size 6000000)
72 cuisui-lrl-hash (make-hash-table :size 6000000)
73 cui-lrlus-hash (make-hash-table :size 1500000)
74 lui-lrlus-hash (make-hash-table :size 5000000)
75 sui-lrlus-hash (make-hash-table :size 6000000)
76 cuisui-lrlus-hash (make-hash-table :size 6000000)
77 sab-srl-hash (make-hash-table :size 200 :test 'equal)
78 sab-srlus-hash (make-hash-table :size 200 :test 'equal))))
80 (defun ensure-preparse (&optional (force-read nil))
81 (when (and *preparse-hash-init?* (not force-read))
82 (return-from ensure-preparse 'already-done))
83 (make-preparse-hash-table)
85 (declare (fixnum counter)
87 (with-umls-file (line "MRCONSO.RRF")
88 (let* ((cui (parse-ui (nth 0 line)))
89 (lui (parse-ui (nth 3 line)))
90 (sui (parse-ui (nth 5 line)))
92 (srl (parse-integer (nth 15 line)))
93 (srlus (srl-to-srlus srl)))
95 (when (= 0 (mod (incf counter) 100000)) (sb-ext:gc :full t))
97 ;; pfstr deprecated by KPFENG field in MRCONSO
99 (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
100 (when (and (string-equal (nth 1 line) "ENG") ; LAT
101 (string-equal (nth 2 line) "P") ; ts
102 (string-equal (nth 4 line) "PF")) ; stt
103 (setf (gethash cui pfstr-hash) (nth 14 line))))
104 (set-lrl-hash cui srl cui-lrl-hash)
105 (set-lrl-hash lui srl lui-lrl-hash)
106 (set-lrl-hash sui srl sui-lrl-hash)
107 (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
108 (set-lrl-hash cui srlus cui-lrlus-hash)
109 (set-lrl-hash lui srlus lui-lrlus-hash)
110 (set-lrl-hash sui srlus sui-lrlus-hash)
111 (set-lrl-hash (make-cuisui cui sui) srlus cuisui-lrlus-hash)
112 (multiple-value-bind (val found) (gethash sab sab-srl-hash)
113 (declare (ignore val))
115 (setf (gethash sab sab-srl-hash) srl)))
116 (multiple-value-bind (val found) (gethash sab sab-srlus-hash)
117 (declare (ignore val))
119 (setf (gethash sab sab-srlus-hash) srlus))))))
120 (setq *preparse-hash-init?* t)
123 #+nil (defun pfstr-hash (cui) (gethash cui pfstr-hash))
124 (defun cui-lrl (cui) (gethash cui cui-lrl-hash))
125 (defun lui-lrl (lui) (gethash lui lui-lrl-hash))
126 (defun sui-lrl (sui) (gethash sui sui-lrl-hash))
127 (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash))
128 (defun cui-lrlus (cui) (gethash cui cui-lrlus-hash))
129 (defun lui-lrlus (lui) (gethash lui lui-lrlus-hash))
130 (defun sui-lrlus (sui) (gethash sui sui-lrlus-hash))
131 (defun cuisui-lrlus (cuisui) (gethash cuisui cuisui-lrlus-hash))
132 (defun sab-srl (sab) (aif (gethash sab sab-srl-hash) it 0))
133 (defun sab-srlus (sab) (aif (gethash sab sab-srlus-hash) it 0))
138 (defun set-lrl-hash (key srl hash)
139 "Set the least restrictive level in hash table"
140 (declare (fixnum srl))
141 (multiple-value-bind (hash-lrl found) (gethash key hash)
142 (declare (type (or null fixnum) hash-lrl)
144 (if (or (not found) (< srl hash-lrl))
145 (setf (gethash key hash) srl))))
147 ;; UMLS file and column structures
148 ;;; SQL datatypes symbols
149 ;;; sql-u - Unique identifier
150 ;;; sql-t - Tiny integer (8-bit)
151 ;;; sql-s - Small integer (16-bit)
152 ;;; sql-i - Integer (32-bit)
153 ;;; sql-l - Big integer (64-bit)
154 ;;; sql-f - Floating point
155 ;;; sql-c - Character data
157 (defparameter +col-datatypes+
158 '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
159 ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u) ("PCUI" sql-u)
160 ("PLUI" sql-u) ("PAUI" sql-u) ("RUI" sql-u)
161 ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i)
162 ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
164 ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-t) ("SUI" sql-u) ("TUI" sql-u)
167 ("KCUISUI" sql-l) ("KCUILUI" sql-l)
168 ("KSRL" sql-t) ("KSRLUS" sql-t) ("LRL" sql-t) ("LRLUS" sql-t)
169 ("KCUILRL" sql-t) ("KLUILRL" sql-t) ("KSUILRL" sql-t) ("KLRL" sql-t)
170 ("KCUILRLUS" sql-t) ("KLUILRLUS" sql-t) ("KSUILRLUS" sql-t) ("KLRLUS" sql-t)
172 ("EUI" sql-u) ("EUI2" sql-u)
173 ;;; Semantic net columns
174 ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
175 ;; New fields for 2002AD
176 ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
177 ;; New fields for 2004AA
180 "SQL data types for each non-string column")
182 (defparameter +custom-tables+
185 '(("KCON" "SELECT CUI,STR FROM MRCONSO WHERE STT='PF' AND TS='P' AND ISPREF='Y' AND LAT='ENG'"))
186 "Custom tables to create")
188 (defparameter +custom-cols+
189 '(#+nil ("MRCONSO.RRF" "KPFSTR" "TEXT"
190 (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max)
191 (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
192 ;; Set to 1 if term is prefered term for english
193 ("MRCONSO.RRF" "KPFENG" "TINYINT" 0
194 (lambda (x) (if (and (string-equal (nth 1 x) "ENG") ; LAT
195 (string-equal (nth 2 x) "P") ; ts
196 (string-equal (nth 4 x) "PF")) ; stt
199 ("MRCONSO.RRF" "KCUISUI" "BIGINT" 0
200 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
201 ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0
202 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
203 ("MRCONSO.RRF" "KCUILRL" "TINYINT" 0
204 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
205 ("MRCONSO.RRF" "KCUILRLUS" "TINYINT" 0
206 (lambda (x) (write-to-string (cui-lrlus (parse-ui (nth 0 x))))))
207 ("MRCONSO.RRF" "KLUILRL" "TINYINT" 0
208 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
209 ("MRCONSO.RRF" "KLUILRLUS" "TINYINT" 0
210 (lambda (x) (write-to-string (lui-lrlus (parse-ui (nth 3 x))))))
211 ("MRCONSO.RRF" "KSUILRL" "TINYINT" 0
212 (lambda (x) (write-to-string (sui-lrl (parse-ui (nth 5 x))))))
213 ("MRCONSO.RRF" "KSUILRLUS" "TINYINT" 0
214 (lambda (x) (write-to-string (sui-lrlus (parse-ui (nth 5 x))))))
215 ("MRCONSO.RRF" "KSRLUS" "TINYINT" 0
216 (lambda (x) (write-to-string (srl-to-srlus (parse-integer (nth 15 x))))))
217 ("MRSAB.RRF" "KSRLUS" "TINYINT" 0
218 (lambda (x) (write-to-string (srl-to-srlus (parse-integer (nth 3 x))))))
219 ("MRSTY.RRF" "KLRL" "TINYINT" 0
220 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
221 ("MRSTY.RRF" "KLRLUS" "TINYINT" 0
222 (lambda (x) (write-to-string (cui-lrlus (parse-ui (nth 0 x))))))
223 ("MRCOC.RRF" "KLRL" "TINYINT" 0
224 (lambda (x) (write-to-string
225 (max (cui-lrl (parse-ui (nth 0 x)))
226 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
227 ("MRCOC.RRF" "KLRLUS" "TINYINT" 0
228 (lambda (x) (write-to-string
229 (max (cui-lrlus (parse-ui (nth 0 x)))
230 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
231 ("MRSAT.RRF" "KSRL" "TINYINT" 0
232 (lambda (x) (write-to-string (sab-srl (nth 9 x)))))
233 ("MRSAT.RRF" "KSRLUS" "TINYINT" 0
234 (lambda (x) (write-to-string (sab-srlus (nth 9 x)))))
235 ("MRREL.RRF" "KSRL" "TINYINT" 0
236 (lambda (x) (write-to-string (sab-srl (nth 10 x)))))
237 ("MRREL.RRF" "KSRLUS" "TINYINT" 0
238 (lambda (x) (write-to-string (sab-srlus (nth 10 x)))))
239 ("MRRANK.RRF" "KSRL" "TINYINT" 0
240 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
241 ("MRRANK.RRF" "KSRLUS" "TINYINT" 0
242 (lambda (x) (write-to-string (sab-srlus (nth 1 x)))))
243 ("MRHIER.RRF" "KSRL" "TINYINT" 0
244 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
245 ("MRHIER.RRF" "KSRLUS" "TINYINT" 0
246 (lambda (x) (write-to-string (sab-srlus (nth 4 x)))))
247 ("MRMAP.RRF" "KSRL" "TINYINT" 0
248 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
249 ("MRMAP.RRF" "KSRLUS" "TINYINT" 0
250 (lambda (x) (write-to-string (sab-srlus (nth 1 x)))))
251 ("MRSMAP.RRF" "KSRL" "TINYINT" 0
252 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
253 ("MRSMAP.RRF" "KSRLUS" "TINYINT" 0
254 (lambda (x) (write-to-string (sab-srlus (nth 1 x)))))
255 ("MRDEF.RRF" "KSRL" "TINYINT" 0
256 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
257 ("MRDEF.RRF" "KSRLUS" "TINYINT" 0
258 (lambda (x) (write-to-string (sab-srlus (nth 4 x)))))
259 ("MRXW_ENG.RRF" "KLRL" "TINYINT" 0
260 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
262 (parse-ui (nth 4 x)))))))
263 ("MRXW_ENG.RRF" "KLRLUS" "TINYINT" 0
264 (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui
266 (parse-ui (nth 4 x)))))))
267 ("MRXW_NONENG.RRF" "KLRL" "TINYINT" 0
268 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
270 (parse-ui (nth 4 x)))))))
271 ("MRXW_NONENG.RRF" "KLRLUS" "TINYINT" 0
272 (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui
274 (parse-ui (nth 4 x)))))))
275 ("MRXNW_ENG.RRF" "KLRL" "TINYINT" 0
276 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
278 (parse-ui (nth 4 x)))))))
279 ("MRXNW_ENG.RRF" "KLRLUS" "TINYINT" 0
280 (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui
282 (parse-ui (nth 4 x)))))))
283 ("MRXNS_ENG.RRF" "KLRL" "TINYINT" 0
284 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
286 (parse-ui (nth 4 x)))))))
287 ("MRXNS_ENG.RRF" "KLRLUS" "TINYINT" 0
288 (lambda (x) (write-to-string (cuisui-lrlus (make-cuisui
290 (parse-ui (nth 4 x)))))))
292 #+nil ("MRREL.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
293 #+nil ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
294 ("MRSAT.RRF" "KCUILUI" "BIGINT" 0
295 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
296 ("MRSAT.RRF" "KCUISUI" "BIGINT" 0
297 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
298 ("MRXW_ENG.RRF" "KCUISUI" "BIGINT" 0
299 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
300 ("MRXNW_ENG.RRF" "KCUISUI" "BIGINT" 0
301 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
302 ("MRXNS_ENG.RRF" "KCUISUI" "BIGINT" 0
303 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
304 ("MRXW_NONENG.RRF" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
305 ("MRXW_NONENG.RRF" "WD" "VARCHAR" 200 (lambda (x) (nth 1 x)))
306 ("MRXW_NONENG.RRF" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
307 ("MRXW_NONENG.RRF" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
308 ("MRXW_NONENG.RRF" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
309 ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0
310 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
311 "Custom columns to create.(filename, col, sqltype, value-func).")
313 (defparameter +index-cols+
314 '(("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO")
315 ("SRL" "MRCONSO") ("KSRLUS" "MRCONSO") ("AUI" "MRCONSO") ("KPFENG" "MRCONSO")
316 ("SUI" "MRCONSO") ("SAUI" "MRCONSO") ("CODE" "MRCONSO")
319 ("CUI1" "MRREL") ("CUI2" "MRREL") ("SAB" "MRREL")
320 ("RUI" "MRREL") ("AUI1" "MRREL") ("AUI2" "MRREL")
321 ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
322 ("METAUI" "MRSAT") ("ATN" "MRSAT")
323 ("CUI" "MRSTY") ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
324 ("AUI" "MRHIER") ("CUI" "MRHIER") ("CXN" "MRHIER") ("RELA" "MRHIER") ("PAUI" "MRHIER")
326 #+ignore ("NSTR" "MRXNS_ENG" 10)
327 ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
328 ("KCUISUI" "MRCONSO") ("KCUILUI" "MRCONSO")
329 ("KCUILRL" "MRCONSO") ("KLUILRL" "MRCONSO") ("KSUILRL" "MRCONSO")
330 ("KCUILRLUS" "MRCONSO") ("KLUILRLUS" "MRCONSO") ("KSUILRLUS" "MRCONSO")
331 ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT")
332 ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
333 ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
334 ("KSRL" "MRDEF") ("KSRL" "MRRANK")("KSRL" "MRREL") ("KSRL" "MRSAT")
335 ("KSRLUS" "MRDEF") ("KSRLUS" "MRRANK")("KSRLUS" "MRREL") ("KSRLUS" "MRSAT")
336 ("KLRL" "MRCOC") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
337 ("KLRLUS" "MRCOC") ("KLRLUS" "MRSTY") ("KLRLUS" "MRXW_ENG") ("KLRLUS" "MRXNW_ENG")
338 ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
339 ("KLRLUS" "MRXNS_ENG") ("KLRLUS" "MRXW_NONENG")
341 ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
342 ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
343 ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
345 ;; Semantic NET indices
346 ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
347 ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
350 ("SRL" "MRSAB") ("KSRLUS" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
351 ("VCUI" "MRSAB") ("LAT" "MRSAB") ("MAPSETCUI" "MRMAP") ("MAPSETCUI" "MRSMAP")
353 "Columns in files to index")
356 (defparameter +custom-index-cols+
359 '(("CUI" "KCON") ("LRL" "KCON"))
360 "Indexes to custom tables")
362 ;; File & Column functions
365 (add-ucols (gen-ucols-meta))
366 (add-ucols (gen-ucols-custom))
367 (add-ucols (gen-ucols-generic "LRFLD"))
368 (add-ucols (gen-ucols-generic "SRFLD")))
370 (defun gen-ucols-meta ()
371 "Initialize all umls columns"
373 (with-umls-file (line "MRCOLS.RRF")
374 (destructuring-bind (col des ref min av max fil dty) line
375 (push (make-ucol col des ref (parse-integer min) (read-from-string av)
376 (parse-integer max) fil dty)
380 (defun gen-ucols-custom ()
381 "Initialize umls columns for custom columns"
382 (loop for customcol in +custom-cols+
384 (make-ucol (nth 1 customcol) "" 0 0 0 (eval (nth 3 customcol))
385 (nth 0 customcol) nil :sqltype (canonicalize-column-type (nth 2 customcol))
386 :custom-value-fun (nth 4 customcol))))
388 (defun gen-ucols-generic (col-filename)
389 "Initialize for generic (LEX/NET) columns"
391 (with-umls-file (line col-filename)
392 (destructuring-bind (nam des ref fil) line
393 (setq nam (escape-column-name nam))
394 (dolist (file (delimited-string-to-list fil #\,))
396 (make-ucol nam des ref nil nil nil file nil)
402 (add-ufiles (gen-ufiles-generic "MRFILES.RRF" "META"))
403 (add-ufiles (gen-ufiles-generic "LRFIL" "LEX"))
404 (add-ufiles (gen-ufiles-generic "SRFIL" "NET"))
405 ;; needs to come last
406 (add-ufiles (gen-ufiles-custom)))
409 (defun gen-ufiles-generic (files-filename dir)
410 "Initialize generic UMLS file structures"
412 (with-umls-file (line files-filename)
413 (destructuring-bind (fil des fmt cls rws bts) line
417 (parse-integer rws) (parse-integer bts)
418 (concatenate 'list (umls-field-string-to-list fmt)
419 (custom-colnames-for-filename fil)))
423 (defun gen-ufiles-custom ()
424 (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index"
425 5 0 0 (fields (find-ufile "MRXW_ENG.RRF"))))