;;;; Name: parse-2002.lisp
;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may
;;;; change from year to year
-;;;; Programmer: Kevin M. Rosenberg
+;;;; Author: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: parse-2002.lisp,v 1.3 2002/10/09 23:03:41 kevin Exp $
+;;;; $Id: parse-2002.lisp,v 1.8 2003/05/06 07:44:07 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
;;;; as governed by the terms of the GNU General Public License.
;;;; *************************************************************************
-(in-package :umlisp)
-(declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))
+(in-package #:umlisp)
+
+(eval-when (:compile-toplevel)
+ (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3))))
;;; Pre-read data for custom fields into hash tables
(defvar *parse-hash-init?* nil)
(setq *parse-hash-init?* t))
(with-buffered-umls-file (line "MRCON")
(let ((cui (parse-ui (aref line 0)))
- (lui (parse-ui (nth 3 line)))
- (sui (parse-ui (nth 5 line)))
- (lrl (parse-integer (nth 7 line))))
+ (lui (parse-ui (aref line 3)))
+ (sui (parse-ui (aref line 5)))
+ (lrl (parse-integer (aref line 7))))
(unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui
(if (and (string-equal (aref line 1) "ENG") ; LAT
(string-equal (aref line 2) "P") ; ts
(set-lrl-hash lui lrl lui-lrl-hash)
(set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash)))
(with-buffered-umls-file (line "MRSO")
- (let ((sab (aref 3 line)))
+ (let ((sab (aref line 3)))
(unless (gethash sab sab-srl-hash) ;; if haven't stored
- (setf (gethash sab sab-srl-hash) (aref 6 line))))))
+ (setf (gethash sab sab-srl-hash) (aref line 6))))))
(defun init-hash-table (&optional (force-read nil))
(when (or force-read (not *parse-hash-init?*))
(setf (gethash key hash) lrl))))
;; UMLS file and column structures
-
-(defstruct (umls-file)
- "Record for each UMLS File"
- fil table des fmt cls rws bts fields colstructs)
-
-(defstruct (umls-col)
- "Record for each UMLS Column in each file"
- col des ref min av max fil sqltype
- dty ;; new in 2002 umls: suggested SQL datatype
- parsefunc quotechar datatype custom-value-func)
-
;;; SQL datatypes symbols
;;; sql-u - Unique identifier
;;; sql-s - Small integer (16-bit)
;;; sql-l - Big integer (64-bit)
;;; sql-f - Floating point
-(defconstant +col-datatypes+
+(defparameter +col-datatypes+
'(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u)
("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s)
("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-s)
;;; LEX columns
("EUI" sql-u) ("EUI2" sql-u)
;;; Semantic net columns
- ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u))
+ ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)
+ ;; New fields for 2002AD
+ ("RCUI" sql-u) ("VCUI" sql-u) ("CFR" sql-i) ("TFR" sql-i)
+ )
"SQL data types for each non-string column")
-(defconstant +custom-tables+
+(defparameter +custom-tables+
nil
#+ignore
'(("MRCONSO" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL FROM MRCON m, MRSO s WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI")
("MRCONFULL" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL, t.TUI FROM MRCON m, MRSO s, MRSTY t WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI AND m.CUI=t.CUI AND s.CUI=t.CUI"))
"Custom tables to create")
-(defconstant +custom-cols+
+(defparameter +custom-cols+
'(("MRCON" "KPFSTR" "TEXT" 1024
(lambda (x) (pfstr-hash (parse-ui (nth 0 x)))))
("MRCON" "KCUISUI" "BIGINT" 0
(lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))
("MRXW.NONENG" "LAT" "CHAR" 3 (lambda (x) (nth 0 x)))
("MRXW.NONENG" "WD" "CHAR" 200 (lambda (x) (nth 1 x)))
- ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (nth 2 x)))
- ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (nth 3 x)))
- ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (nth 4 x)))
+ ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (parse-ui (nth 2 x))))
+ ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (parse-ui (nth 3 x))))
+ ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (parse-ui (nth 4 x))))
("MRXW.NONENG" "KCUISUI" "BIGINT" 0
(lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))))
"Custom columns to create.(filename, col, sqltype, value-func).")
-(defconstant +index-cols+
+(defparameter +index-cols+
'(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON")
("LRL" "MRCON")
("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO")
;; Semantic NET indices
("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1")
("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR")
- ("RL" "SRSTR"))
+ ("RL" "SRSTR")
+ ("SRL" "MRSAB") ("RSAB" "MRSAB") ("VSAB" "MRSAB") ("RCUI" "MRSAB")
+ ("VCUI" "MRSAB") ("LAT" "MRSAB"))
"Columns in files to index")
-
-(defconstant +custom-index-cols+
+(defparameter +custom-index-cols+
nil
#+ignore
'(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL"))
(let ((cols '()))
(with-umls-file (line "MRCOLS")
(destructuring-bind (col des ref min av max fil dty) line
- (let ((c (make-umls-col
+ (let ((c (make-instance 'ucol
:col col
:des des
:ref ref
:fil fil
:dty dty ;; new in 2002 UMLS
:sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
+ :parse-fun #'add-sql-quotes
+ :custom-value-fun nil
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col col))
(push c cols))))
"Initialize umls columns for custom columns"
(let ((cols '()))
(dolist (customcol +custom-cols+)
- (let ((c (make-umls-col :col (nth 1 customcol)
- :des ""
- :ref 0
- :min 0
- :max (nth 3 customcol)
- :av 0
- :dty nil
- :fil (nth 0 customcol)
- :sqltype (nth 2 customcol)
- :parsefunc #'add-sql-quotes
- :custom-value-func (nth 4 customcol)
+ (let ((c (make-instance 'ucol
+ :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol)
+ :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol)
+ :parse-fun #'add-sql-quotes :custom-value-fun (nth 4 customcol)
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col (nth 1 customcol)))
(push c cols)))
(destructuring-bind (nam des ref fil) line
(setq nam (escape-column-name nam))
(dolist (file (delimited-string-to-list fil #\,))
- (let ((c (make-umls-col
+ (let ((c (make-instance 'ucol
:col nam
:des des
:ref ref
:fil file
:dty nil
:sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
+ :parse-fun #'add-sql-quotes
+ :custom-value-fun nil
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col nam))
(push c cols)))))
(let ((files '()))
(with-umls-file (line files-filename)
(destructuring-bind (fil des fmt cls rws bts) line
- (let ((f (make-umls-file
+ (let ((f (make-instance 'ufile
:fil fil
:table (substitute #\_ #\. fil)
:des des
:fields (concatenate 'list
(umls-field-string-to-list fmt)
(custom-colnames-for-filename fil)))))
- (setf (umls-file-colstructs f) (umls-cols-for-umls-file f))
+ (setf (ucols f) (ucols-for-ufile f))
(push f files))))
(nreverse files)))
(defun init-custom-files ()
- (let ((ffile (make-umls-file :fil "MRXW.NONENG"
- :des "Custom NonEnglish Index"
- :table "MRXW_NONENG"
- :cls 5
- :rws 0
- :bts 0
- :fields (umls-file-fields (find-umls-file "MRXW.ENG")))))
- (setf (umls-file-colstructs ffile)
- (umls-cols-for-umls-file ffile))
+ (let ((ffile (make-instance 'ufile
+ :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG"
+ :cls 5 :rws 0 :bts 0 :fields (fields (find-ufile "MRXW.ENG")))))
+ (setf (ucols ffile)
+ (ucols-for-ufile ffile))
(list ffile)))
(defun datatype-for-col (colname)
(defun add-datatype-to-col (col datatype)
"Add data type information to column"
- (setf (umls-col-datatype col) datatype)
+ (setf (datatype col) datatype)
(case datatype
- (sql-u (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-ui
- (umls-col-quotechar col) ""))
- (sql-s (setf (umls-col-sqltype col) "SMALLINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-l (setf (umls-col-sqltype col) "BIGINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-i (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-f (setf (umls-col-sqltype col) "NUMERIC"
- (umls-col-parsefunc col) #'read-from-string
- (umls-col-quotechar col) ""))
+ (sql-u (setf (sqltype col) "INTEGER"
+ (parse-fun col) #'parse-ui
+ (quotechar col) ""))
+ (sql-s (setf (sqltype col) "SMALLINT"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-l (setf (sqltype col) "BIGINT"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-i (setf (sqltype col) "INTEGER"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-f (setf (sqltype col) "NUMERIC"
+ (parse-fun col) #'read-from-string
+ (quotechar col) ""))
(t ; Default column type, optimized text storage
- (setf (umls-col-parsefunc col) #'add-sql-quotes
- (umls-col-quotechar col) "'")
- (when (and (umls-col-max col) (umls-col-av col))
- (if (> (umls-col-max col) 255)
- (setf (umls-col-sqltype col) "TEXT")
- (if (< (- (umls-col-max col) (umls-col-av col)) 4)
- (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4
- (setf (umls-col-sqltype col) "VARCHAR")))))))
+ (setf (parse-fun col) #'add-sql-quotes
+ (quotechar col) "'")
+ (when (and (cmax col) (av col))
+ (if (> (cmax col) 255)
+ (setf (sqltype col) "TEXT")
+ (if (< (- (cmax col) (av col)) 4)
+ (setf (sqltype col) "CHAR") ; if average bytes wasted < 4
+ (setf (sqltype col) "VARCHAR")))))))