1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: parse-2002.lisp
6 ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
7 ;;;; change from year to year
8 ;;;; Author: Kevin M. Rosenberg
13 ;;;; This file, part of UMLisp, is
14 ;;;; Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D.
16 ;;;; UMLisp users are granted the rights to distribute and use this software
17 ;;;; as governed by the terms of the GNU General Public License.
18 ;;;; *************************************************************************
22 ;;; Pre-read data for custom fields into hash tables
23 (defvar *preparse-hash-init?* nil)
25 (eval-when (:compile-toplevel :load-toplevel :execute)
26 (let ((pfstr-hash nil) ;;; Preferred concept strings by CUI
27 (cui-lrl-hash nil) ;;; LRL by CUI
28 (lui-lrl-hash nil) ;;; LRL by LUI
29 (cuisui-lrl-hash nil) ;;; LRL by CUISUI
30 (sab-srl-hash nil)) ;;; SRL by SAB
32 (defun make-preparse-hash-table ()
36 (clrhash cui-lrl-hash)
37 (clrhash lui-lrl-hash)
38 (clrhash cuisui-lrl-hash)
39 (clrhash sab-srl-hash))
41 pfstr-hash (make-hash-table :size 800000)
42 cui-lrl-hash (make-hash-table :size 800000)
43 lui-lrl-hash (make-hash-table :size 1500000)
44 cuisui-lrl-hash (make-hash-table :size 1800000)
45 sab-srl-hash (make-hash-table :size 100 :test 'equal))))
47 (defun ensure-preparse (&optional (force-read nil))
48 (when (or force-read (not *preparse-hash-init?*))
49 (make-preparse-hash-table)
50 (setq *preparse-hash-init?* t))
51 (with-umls-file (line "MRCONSO.RRF")
52 (let ((cui (parse-ui (nth 0 line)))
53 (lui (parse-ui (nth 3 line)))
54 (sui (parse-ui (nth 5 line)))
56 (srl (parse-integer (nth 15 line))))
57 (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
58 (if (and (string-equal (nth 1 line) "ENG") ; LAT
59 (string-equal (nth 2 line) "P") ; ts
60 (string-equal (nth 4 line) "PF")) ; stt
61 (setf (gethash cui pfstr-hash) (nth 14 line))))
62 (set-lrl-hash cui srl cui-lrl-hash)
63 (set-lrl-hash lui srl lui-lrl-hash)
64 (set-lrl-hash (make-cuisui cui sui) srl cuisui-lrl-hash)
65 (multiple-value-bind (val found) (gethash sab sab-srl-hash)
66 (declare (ignore val))
68 (setf (gethash sab sab-srl-hash) srl))))))
70 (defun pfstr-hash (cui)
71 (gethash cui pfstr-hash))
74 (gethash cui cui-lrl-hash))
77 (gethash lui lui-lrl-hash))
79 (defun cuisui-lrl (cuisui)
80 (gethash cuisui cuisui-lrl-hash))
83 (aif (gethash sab sab-srl-hash) it 0))
86 (defun set-lrl-hash (key lrl hash)
87 "Set the least restrictive level in hash table"
88 (multiple-value-bind (hash-lrl found) (gethash key hash)
89 (if (or (not found) (< lrl hash-lrl))
90 (setf (gethash key hash) lrl))))
92 ;; UMLS file and column structures
93 ;;; SQL datatypes symbols
94 ;;; sql-u - Unique identifier
95 ;;; sql-s - Small integer (16-bit)
96 ;;; sql-i - Integer (32-bit)
97 ;;; sql-l - Big integer (64-bit)
98 ;;; sql-f - Floating point
99 ;;; sql-c - Character data
101 (defparameter +col-datatypes+
102 '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
103 ("AUI" sql-u) ("AUI1" sql-u) ("AUI2" sql-u)
104 ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
105 ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-c)
106 ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u)
108 ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i)
109 ("KSRL" sql-i) ("KLRL" sql-i)
111 ("EUI" sql-u) ("EUI2" sql-u)
112 ;;; Semantic net columns
113 ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
114 ;; New fields for 2002AD
115 ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
117 "SQL data types for each non-string column")
119 (defparameter +custom-tables+
121 "Custom tables to create")
123 (defparameter +custom-cols+
124 '(("MRCONSO.RRF" "KPFSTR" "TEXT" 1024
125 (lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
126 ("MRCONSO.RRF" "KCUISUI" "BIGINT" 0
127 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x))))))
128 ("MRCONSO.RRF" "KCUILUI" "BIGINT" 0
129 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x))))))
130 ("MRCONSO.RRF" "KCUILRL" "INTEGER" 0
131 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
132 ("MRCONSO.RRF" "KLUILRL" "INTEGER" 0
133 (lambda (x) (write-to-string (lui-lrl (parse-ui (nth 3 x))))))
134 ;; Deprecated, last in 2004AA -- skip index
136 ("MRLO.RRF" "KLRL" "INTEGER" 0
137 (lambda (x) (write-to-string
138 (if (zerop (length (nth 4 x)))
139 (cui-lrl (parse-ui (nth 0 x)))
140 (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x))))))))
141 ("MRSTY.RRF" "KLRL" "INTEGER" 0
142 (lambda (x) (write-to-string (cui-lrl (parse-ui (nth 0 x))))))
143 ("MRCOC.RRF" "KLRL" "INTEGER" 0
144 (lambda (x) (write-to-string
145 (max (cui-lrl (parse-ui (nth 0 x)))
146 (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0)))))
147 ("MRSAT.RRF" "KSRL" "INTEGER" 0
148 (lambda (x) (write-to-string (sab-srl (nth 9 x)))))
149 ("MRREL.RRF" "KSRL" "INTEGER" 0
150 (lambda (x) (write-to-string (sab-srl (nth 10 x)))))
151 ("MRRANK.RRF" "KSRL" "INTEGER" 0
152 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
153 ("MRDEF.RRF" "KSRL" "INTEGER" 0
154 (lambda (x) (write-to-string (sab-srl (nth 4 x)))))
155 ("MRCXT.RRF" "KSRL" "INTEGER" 0
156 (lambda (x) (write-to-string (sab-srl (nth 2 x)))))
157 ("MRATX.RRF" "KSRL" "INTEGER" 0
158 (lambda (x) (write-to-string (sab-srl (nth 1 x)))))
159 ("MRXW_ENG.RRF" "KLRL" "INTEGER" 0
160 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
162 (parse-ui (nth 4 x)))))))
163 ("MRXW_NONENG.RRF" "KLRL" "INTEGER" 0
164 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
166 (parse-ui (nth 4 x)))))))
167 ("MRXNW_ENG.RRF" "KLRL" "INTEGER" 0
168 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
170 (parse-ui (nth 4 x)))))))
171 ("MRXNS_ENG.RRF" "KLRL" "INTEGER" 0
172 (lambda (x) (write-to-string (cuisui-lrl (make-cuisui
174 (parse-ui (nth 4 x)))))))
175 ("MRREL.RRF" "KPFSTR2" "TEXT" 1024
176 (lambda (x) (pfstr-hash (parse-ui (nth 4 x)))))
177 ("MRCOC.RRF" "KPFSTR2" "TEXT" 1024
178 (lambda (x) (pfstr-hash (parse-ui (nth 2 x)))))
179 ("MRCXT.RRF" "KCUISUI" "BIGINT" 0
180 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
181 ("MRSAT.RRF" "KCUILUI" "BIGINT" 0
182 (lambda (x) (write-to-string (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x))))))
183 ("MRSAT.RRF" "KCUISUI" "BIGINT" 0
184 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x))))))
185 ("MRXW_ENG.RRF" "KCUISUI" "BIGINT" 0
186 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
187 ("MRXNW_ENG.RRF" "KCUISUI" "BIGINT" 0
188 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
189 ("MRXNS_ENG.RRF" "KCUISUI" "BIGINT" 0
190 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
191 ("MRXW_NONENG.RRF" "LAT" "VARCHAR" 3 (lambda (x) (nth 0 x)))
192 ("MRXW_NONENG.RRF" "WD" "VARCHAR" 200 (lambda (x) (nth 1 x)))
193 ("MRXW_NONENG.RRF" "CUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 2 x)))))
194 ("MRXW_NONENG.RRF" "LUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 3 x)))))
195 ("MRXW_NONENG.RRF" "SUI" "INTEGER" 0 (lambda (x) (write-to-string (parse-ui (nth 4 x)))))
196 ("MRXW_NONENG.RRF" "KCUISUI" "BIGINT" 0
197 (lambda (x) (write-to-string (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
198 "Custom columns to create.(filename, col, sqltype, value-func).")
200 (defparameter +index-cols+
201 '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCONSO") ("LUI" "MRCONSO")
202 ("SRL" "MRCONSO") ("AUI" "MRCONSO")
203 ("SUI" "MRCONSO") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
204 ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT")
206 ("TUI" "MRSTY") ("CUI" "MRXNS_ENG")
207 #+ignore ("NSTR" "MRXNS_ENG" 10)
208 ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG")
209 ("KCUISUI" "MRCON") ("KCUILUI" "MRCON") ("KCUILRL" "MRCON")
210 ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT")
211 ("KCUISUI" "MRSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT")
212 ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG")
213 ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG")
214 ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK")
215 ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC")
216 #+ignore ("KLRL" "MRLO") ;; deprecated
217 ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG")
218 ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG")
220 ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD")
221 ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL")
222 ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD")
224 ;; Semantic NET indices
225 ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
226 ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
228 ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
229 ("VCUI" "MRSAB") ("LAT" "MRSAB"))
230 "Columns in files to index")
233 (defparameter +custom-index-cols+
236 '(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL"))
237 "Indexes to custom tables")
239 ;; File & Column functions
242 (add-ucols (gen-ucols-meta))
243 (add-ucols (gen-ucols-custom))
244 (add-ucols (gen-ucols-generic "LRFLD"))
245 (add-ucols (gen-ucols-generic "SRFLD")))
247 (defun gen-ucols-meta ()
248 "Initialize all umls columns"
250 (with-umls-file (line "MRCOLS.RRF")
251 (destructuring-bind (col des ref min av max fil dty) line
252 (push (make-ucol col des ref (parse-integer min) (read-from-string av)
253 (parse-integer max) fil dty)
257 (defun gen-ucols-custom ()
258 "Initialize umls columns for custom columns"
259 (loop for customcol in +custom-cols+
261 (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol)
262 (nth 0 customcol) nil :sqltype (nth 2 customcol)
263 :custom-value-fun (nth 4 customcol))))
265 (defun gen-ucols-generic (col-filename)
266 "Initialize for generic (LEX/NET) columns"
268 (with-umls-file (line col-filename)
269 (destructuring-bind (nam des ref fil) line
270 (setq nam (escape-column-name nam))
271 (dolist (file (delimited-string-to-list fil #\,))
273 (make-ucol nam des ref nil nil nil file nil)
279 (add-ufiles (gen-ufiles-generic "MRFILES.RRF" "META"))
280 (add-ufiles (gen-ufiles-generic "LRFIL" "LEX"))
281 (add-ufiles (gen-ufiles-generic "SRFIL" "NET"))
282 ;; needs to come last
283 (add-ufiles (gen-ufiles-custom)))
286 (defun gen-ufiles-generic (files-filename dir)
287 "Initialize all LEX file structures"
289 (with-umls-file (line files-filename)
290 (destructuring-bind (fil des fmt cls rws bts) line
294 (parse-integer rws) (parse-integer bts)
295 (concatenate 'list (umls-field-string-to-list fmt)
296 (custom-colnames-for-filename fil)))
300 (defun gen-ufiles-custom ()
301 (make-ufile "META" "MRXW_NONENG.RRF" "Custom NonEnglish Index"
302 5 0 0 (fields (find-ufile "MRXW_ENG.RRF"))))