1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: parse-2002.lisp
6 ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
7 ;;;; change from year to year
8 ;;;; Author: Kevin M. Rosenberg
13 ;;;; This file, part of UMLisp, is
14 ;;;; Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D.
16 ;;;; UMLisp users are granted the rights to distribute and use this software
17 ;;;; as governed by the terms of the GNU General Public License.
18 ;;;; *************************************************************************
20 (in-package #:umlisp-orf)
22 ;;; Pre-read data for custom fields into hash tables
23 (defvar *preparse-hash-init?* nil)
25 (eval-when (:compile-toplevel :load-toplevel :execute)
26 (let ((pfstr-hash nil) ;;; Preferred concept strings by CUI
27 (cui-lrl-hash nil) ;;; LRL by CUI
28 (lui-lrl-hash nil) ;;; LRL by LUI
29 (cuisui-lrl-hash nil) ;;; LRL by CUISUI
30 (sab-srl-hash nil)) ;;; SRL by SAB
32 (defun make-preparse-hash-table ()
36 (clrhash cui-lrl-hash)
37 (clrhash lui-lrl-hash)
38 (clrhash cuisui-lrl-hash)
39 (clrhash sab-srl-hash))
41 pfstr-hash (make-hash-table :size 800000)
42 cui-lrl-hash (make-hash-table :size 800000)
43 lui-lrl-hash (make-hash-table :size 1500000)
44 cuisui-lrl-hash (make-hash-table :size 1800000)
45 sab-srl-hash (make-hash-table :size 100 :test 'equal))))
47 (defun buffered-ensure-preparse (&optional (force-read nil))
48 (when (or force-read (not *preparse-hash-init?*))
49 (make-preparse-hash-table)
50 (setq *preparse-hash-init?* t))
51 (with-buffered-umls-file (line "MRCON")
52 (let ((cui (parse-ui (aref line 0)))
53 (lui (parse-ui (aref line 3)))
54 (sui (parse-ui (aref line 5)))
55 (lrl (parse-integer (aref line 7))))
56 (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
57 (if (and (string-equal (aref line 1) "ENG") ; LAT
58 (string-equal (aref line 2) "P") ; ts
59 (string-equal (aref line 4) "PF")) ; stt
60 (setf (gethash cui pfstr-hash) (aref line 6))))
61 (set-lrl-hash cui lrl cui-lrl-hash)
62 (set-lrl-hash lui lrl lui-lrl-hash)
63 (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash)))
64 (with-buffered-umls-file (line "MRSO")
65 (let ((sab (aref line 3)))
66 (unless (gethash sab sab-srl-hash) ;; if haven't stored
67 (setf (gethash sab sab-srl-hash) (aref line 6))))))
69 (defun ensure-preparse (&optional (force-read nil))
70 (when (or force-read (not *preparse-hash-init?*))
71 (make-preparse-hash-table)
72 (setq *preparse-hash-init?* t))
73 (with-umls-file (line "MRCON")
74 (let ((cui (parse-ui (nth 0 line)))
75 (lui (parse-ui (nth 3 line)))
76 (sui (parse-ui (nth 5 line)))
77 (lrl (parse-integer (nth 7 line))))
78 (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
79 (if (and (string-equal (nth 1 line) "ENG") ; LAT
80 (string-equal (nth 2 line) "P") ; ts
81 (string-equal (nth 4 line) "PF")) ; stt
82 (setf (gethash cui pfstr-hash) (nth 6 line))))
83 (set-lrl-hash cui lrl cui-lrl-hash)
84 (set-lrl-hash lui lrl lui-lrl-hash)
85 (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash)))
86 (with-umls-file (line "MRSO")
87 (let ((sab (nth 3 line)))
88 (multiple-value-bind (val found) (gethash sab sab-srl-hash)
89 (declare (ignore val))
91 (setf (gethash sab sab-srl-hash) (parse-integer (nth 6 line))))))))
93 (defun pfstr-hash (cui)
94 (gethash cui pfstr-hash))
97 (gethash cui cui-lrl-hash))
100 (gethash lui lui-lrl-hash))
102 (defun cuisui-lrl (cuisui)
103 (gethash cuisui cuisui-lrl-hash))
106 (aif (gethash sab sab-srl-hash) it 0))
109 (defun set-lrl-hash (key lrl hash)
110 "Set the least restrictive level in hash table"
111 (multiple-value-bind (hash-lrl found) (gethash key hash)
112 (if (or (not found) (< lrl hash-lrl))
113 (setf (gethash key hash) lrl))))
115 ;; UMLS file and column structures
116 ;;; SQL datatypes symbols
117 ;;; sql-u - Unique identifier
118 ;;; sql-s - Small integer (16-bit)
119 ;;; sql-i - Integer (32-bit)
120 ;;; sql-l - Big integer (64-bit)
121 ;;; sql-f - Floating point
122 ;;; sql-c - Character data
124 (defparameter +col-datatypes+
125 '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
126 ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
127 ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
128 ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
130 ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i)
131 ("KSRL" sql-i) ("KLRL" sql-i)
133 ("EUI" sql-u) ("EUI2" sql-u)
134 ;;; Semantic net columns
135 ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
136 ;; New fields for 2002AD
137 ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
139 "SQL data types for each non-string column")
141 (defparameter +custom-tables+
144 '(("MRCONSO" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL FROM MRCON m, MRSO s WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI")
145 ("MRCONFULL" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL, t.TUI FROM MRCON m, MRSO s, MRSTY t WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI AND m.CUI=t.CUI AND s.CUI=t.CUI"))
146 "Custom tables to create")
148 (defparameter +custom-cols+
149 '(("MRCON" "KPFSTR" "TEXT" 1024
150 (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
151 ("MRCON" "KCUISUI" "BIGINT" 0
152 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
153 ("MRCON" "KCUILUI" "BIGINT" 0
154 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
155 ("MRCON" "KCUILRL" "INTEGER" 0
156 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
157 ("MRCON" "KLUILRL" "INTEGER" 0
158 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
159 ("MRLO" "KLRL" "INTEGER" 0
160 (lambda (x) (write-to-string
161 (if (zerop (length (nth 4 x)))
162 (cui-lrl (parse-ui (nth 0 x)))
163 (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
164 ("MRSTY" "KLRL" "INTEGER" 0
165 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
166 ("MRCOC" "KLRL" "INTEGER" 0
167 (lambda (x) (write-to-string
168 (max (cui-lrl (parse-ui (nth 0 x)))
169 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
170 ("MRSAT" "KSRL" "INTEGER" 0
171 (lambda (x) (write-to-string (sab-srl (nth 5 x)))))
172 ("MRREL" "KSRL" "INTEGER" 0
173 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
174 ("MRRANK" "KSRL" "INTEGER" 0
175 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
176 ("MRDEF" "KSRL" "INTEGER" 0
177 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
178 ("MRCXT" "KSRL" "INTEGER" 0
179 (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
180 ("MRATX" "KSRL" "INTEGER" 0
181 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
182 ("MRXW.ENG" "KLRL" "INTEGER" 0
183 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
185 (parse-ui (nth 4 x)))))))
186 ("MRXW.NONENG" "KLRL" "INTEGER" 0
187 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
189 (parse-ui (nth 4 x)))))))
190 ("MRXNW.ENG" "KLRL" "INTEGER" 0
191 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
193 (parse-ui (nth 4 x)))))))
194 ("MRXNS.ENG" "KLRL" "INTEGER" 0
195 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
197 (parse-ui (nth 4 x)))))))
198 ("MRREL" "KPFSTR2" "TEXT" 1024
199 (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
200 ("MRCOC" "KPFSTR2" "TEXT" 1024
201 (lambda (x) (pfstr-hash (parse-ui (nth 1 x)))))
202 ("MRCXT" "KCUISUI" "BIGINT" 0
203 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
204 ("MRSAT" "KCUILUI" "BIGINT" 0
205 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
206 ("MRSAT" "KCUISUI" "BIGINT" 0
207 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
208 ("MRSO" "KCUISUI" "BIGINT" 0
209 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
210 ("MRXW.ENG" "KCUISUI" "BIGINT" 0
211 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
212 ("MRXNW.ENG" "KCUISUI" "BIGINT" 0
213 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
214 ("MRXNS.ENG" "KCUISUI" "BIGINT" 0
215 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
216 ("MRXW.NONENG" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
217 ("MRXW.NONENG" "WD" "VARCHAR" 200 (lambda (x) (nth 1 x)))
218 ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
219 ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
220 ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
221 ("MRXW.NONENG" "KCUISUI" "BIGINT" 0
222 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
223 "Custom columns to create.(filename, col, sqltype, value-func).")
225 (defparameter +index-cols+
226 '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON")
228 ("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
229 ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
230 ("CUI" "MRSO") ("SAB" "MRSO") ("SRL" "MRSO") ("CUI" "MRSTY")
231 ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
232 #+ignore ("NSTR" "MRXNS_ENG" 10)
233 ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
234 ("KCUISUI" "MRCON") ("KCUILUI" "MRCON") ("KCUILRL" "MRCON")
235 ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT")
236 ("KCUISUI" "MRSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT")
237 ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
238 ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
239 ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK")
240 ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
241 ("KLRL" "MRLO") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
242 ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
244 ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
245 ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
246 ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
248 ;; Semantic NET indices
249 ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
250 ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
252 ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
253 ("VCUI" "MRSAB") ("LAT" "MRSAB"))
254 "Columns in files to index")
257 (defparameter +custom-index-cols+
260 '(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL"))
261 "Indexes to custom tables")
263 ;; File & Column functions
266 (add-ucols (gen-ucols-meta))
267 (add-ucols (gen-ucols-custom))
268 (add-ucols (gen-ucols-generic "LRFLD"))
269 (add-ucols (gen-ucols-generic "SRFLD")))
271 (defun gen-ucols-meta ()
272 "Initialize all umls columns"
274 (with-umls-file (line "MRCOLS")
275 (destructuring-bind (col des ref min av max fil dty) line
276 (push (make-ucol col des ref (parse-integer min) (read-from-string av)
277 (parse-integer max) fil dty)
281 (defun gen-ucols-custom ()
282 "Initialize umls columns for custom columns"
283 (loop for customcol in +custom-cols+
285 (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol)
286 (nth 0 customcol) nil :sqltype (nth 2 customcol)
287 :custom-value-fun (nth 4 customcol))))
289 (defun gen-ucols-generic (col-filename)
290 "Initialize for generic (LEX/NET) columns"
292 (with-umls-file (line col-filename)
293 (destructuring-bind (nam des ref fil) line
294 (setq nam (escape-column-name nam))
295 (dolist (file (delimited-string-to-list fil #\,))
297 (make-ucol nam des ref nil nil nil file nil)
303 (add-ufiles (gen-ufiles-generic "MRFILES"))
304 (add-ufiles (gen-ufiles-generic "LRFIL"))
305 (add-ufiles (gen-ufiles-generic "SRFIL"))
306 ;; needs to come last
307 (add-ufiles (gen-ufiles-custom)))
310 (defun gen-ufiles-generic (files-filename)
311 "Initialize all LEX file structures"
313 (with-umls-file (line files-filename)
314 (destructuring-bind (fil des fmt cls rws bts) line
316 fil des (substitute #\_ #\. fil) (parse-integer cls)
317 (parse-integer rws) (parse-integer bts)
318 (concatenate 'list (umls-field-string-to-list fmt)
319 (custom-colnames-for-filename fil)))
323 (defun gen-ufiles-custom ()
324 (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG"
325 5 0 0 (fields (find-ufile "MRXW.ENG"))))