;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*- ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; ;;;; Name: parse-2002.lisp ;;;; Purpose: Parsing and SQL insertion routines for UMLisp which may ;;;; change from year to year ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; ;;;; $Id: parse-2002.lisp,v 1.4 2002/10/14 09:25:20 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. ;;;; ;;;; UMLisp users are granted the rights to distribute and use this software ;;;; as governed by the terms of the GNU General Public License. ;;;; ************************************************************************* (in-package :umlisp) (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3))) ;;; Pre-read data for custom fields into hash tables (defvar *parse-hash-init?* nil) (eval-when (:compile-toplevel :load-toplevel :execute) (let ((pfstr-hash nil) ;;; Preferred concept strings by CUI (cui-lrl-hash nil) ;;; LRL by CUI (lui-lrl-hash nil) ;;; LRL by LUI (cuisui-lrl-hash nil) ;;; LRL by CUISUI (sab-srl-hash nil)) ;;; SRL by SAB (defun make-parse-hash-table () (if pfstr-hash (progn (clrhash pfstr-hash) (clrhash cui-lrl-hash) (clrhash lui-lrl-hash) (clrhash cuisui-lrl-hash) (clrhash sab-srl-hash)) (setf pfstr-hash (make-hash-table :size 800000) cui-lrl-hash (make-hash-table :size 800000) lui-lrl-hash (make-hash-table :size 1500000) cuisui-lrl-hash (make-hash-table :size 1800000) sab-srl-hash (make-hash-table :size 100 :test 'equal)))) (defun binit-hash-table (&optional (force-read nil)) (when (or force-read (not *parse-hash-init?*)) (make-parse-hash-table) (setq *parse-hash-init?* t)) (with-buffered-umls-file (line "MRCON") (let ((cui (parse-ui (aref line 0))) (lui (parse-ui (aref line 3))) (sui (parse-ui (aref line 5))) (lrl (parse-integer (aref line 7)))) (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui (if (and (string-equal (aref line 1) "ENG") ; LAT (string-equal (aref line 2) "P") ; ts (string-equal (aref line 4) "PF")) ; stt (setf (gethash cui pfstr-hash) (aref line 6)))) (set-lrl-hash cui lrl cui-lrl-hash) (set-lrl-hash lui lrl lui-lrl-hash) (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) (with-buffered-umls-file (line "MRSO") (let ((sab (aref line 3))) (unless (gethash sab sab-srl-hash) ;; if haven't stored (setf (gethash sab sab-srl-hash) (aref line 6)))))) (defun init-hash-table (&optional (force-read nil)) (when (or force-read (not *parse-hash-init?*)) (make-parse-hash-table) (setq *parse-hash-init?* t)) (with-umls-file (line "MRCON") (let ((cui (parse-ui (nth 0 line))) (lui (parse-ui (nth 3 line))) (sui (parse-ui (nth 5 line))) (lrl (parse-integer (nth 7 line)))) (unless (gethash cui pfstr-hash) ;; if haven't stored pfstr for cui (if (and (string-equal (nth 1 line) "ENG") ; LAT (string-equal (nth 2 line) "P") ; ts (string-equal (nth 4 line) "PF")) ; stt (setf (gethash cui pfstr-hash) (nth 6 line)))) (set-lrl-hash cui lrl cui-lrl-hash) (set-lrl-hash lui lrl lui-lrl-hash) (set-lrl-hash (make-cuisui cui sui) lrl cuisui-lrl-hash))) (with-umls-file (line "MRSO") (let ((sab (nth 3 line))) (multiple-value-bind (val found) (gethash sab sab-srl-hash) (declare (ignore val)) (unless found (setf (gethash sab sab-srl-hash) (parse-integer (nth 6 line)))))))) (defun pfstr-hash (cui) (gethash cui pfstr-hash)) (defun cui-lrl (cui) (gethash cui cui-lrl-hash)) (defun lui-lrl (lui) (gethash lui lui-lrl-hash)) (defun cuisui-lrl (cuisui) (gethash cuisui cuisui-lrl-hash)) (defun sab-srl (sab) (kmrcl:aif (gethash sab sab-srl-hash) kmrcl::it 0)) )) ;; closure (defun set-lrl-hash (key lrl hash) "Set the least restrictive level in hash table" (multiple-value-bind (hash-lrl found) (gethash key hash) (if (or (not found) (< lrl hash-lrl)) (setf (gethash key hash) lrl)))) ;; UMLS file and column structures (defstruct (umls-file) "Record for each UMLS File" fil table des fmt cls rws bts fields colstructs) (defstruct (umls-col) "Record for each UMLS Column in each file" col des ref min av max fil sqltype dty ;; new in 2002 umls: suggested SQL datatype parsefunc quotechar datatype custom-value-func) ;;; SQL datatypes symbols ;;; sql-u - Unique identifier ;;; sql-s - Small integer (16-bit) ;;; sql-i - Integer (32-bit) ;;; sql-l - Big integer (64-bit) ;;; sql-f - Floating point (defparameter +col-datatypes+ '(("AV" sql-f) ("BTS" sql-i) ("CLS" sql-i) ("COF" sql-i) ("CUI1" sql-u) ("CUI2" sql-u) ("CUI" sql-u) ("CXN" sql-s) ("FR" sql-i) ("LRL" sql-s) ("LUI" sql-u) ("MAX" sql-s) ("MIN" sql-s) ("RANK" sql-s) ("REF" sql-s) ("RNK" sql-s) ("RWS" sql-i) ("SRL" sql-s) ("SUI" sql-u) ("TUI" sql-u) ;;; Custom columns ("KCUISUI" sql-l) ("KCUILUI" sql-l) ("KCUILRL" sql-i) ("KLUILRL" sql-i) ("KSRL" sql-i) ("KLRL" sql-i) ;;; LEX columns ("EUI" sql-u) ("EUI2" sql-u) ;;; Semantic net columns ("UI" sql-u) ("UI2" sql-u) ("UI3" sql-u)) "SQL data types for each non-string column") (defparameter +custom-tables+ nil #+ignore '(("MRCONSO" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL FROM MRCON m, MRSO s WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI") ("MRCONFULL" "SELECT m.CUI, m.LAT, m.TS, m.LUI, m.STT, m.SUI, m.STR, m.LRL, s.SAB, s.TTY, s.SCD, s.SRL, t.TUI FROM MRCON m, MRSO s, MRSTY t WHERE m.CUI=s.CUI AND m.LUI=s.LUI AND m.SUI=s.SUI AND m.CUI=t.CUI AND s.CUI=t.CUI")) "Custom tables to create") (defparameter +custom-cols+ '(("MRCON" "KPFSTR" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 0 x))))) ("MRCON" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 5 x)))))) ("MRCON" "KCUILUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 3 x)))))) ("MRCON" "KCUILRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) ("MRCON" "KLUILRL" "INTEGER" 0 (lambda (x) (format nil "~d" (lui-lrl (parse-ui (nth 3 x)))))) ("MRLO" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (if (zerop (length (nth 4 x))) (cui-lrl (parse-ui (nth 0 x))) (cuisui-lrl (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 4 x)))))))) ("MRSTY" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cui-lrl (parse-ui (nth 0 x)))))) ("MRCOC" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (max (cui-lrl (parse-ui (nth 0 x))) (kmrcl:aif (cui-lrl (parse-ui (nth 1 x))) kmrcl::it 0))))) ("MRSAT" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 5 x))))) ("MRREL" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 4 x))))) ("MRRANK" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) ("MRDEF" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) ("MRCXT" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 2 x))))) ("MRATX" "KSRL" "INTEGER" 0 (lambda (x) (format nil "~d" (sab-srl (nth 1 x))))) ("MRXW.ENG" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXW.NONENG" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNW.ENG" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRXNS.ENG" "KLRL" "INTEGER" 0 (lambda (x) (format nil "~d" (cuisui-lrl (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) ("MRREL" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 2 x))))) ("MRCOC" "KPFSTR2" "TEXT" 1024 (lambda (x) (pfstr-hash (parse-ui (nth 1 x))))) ("MRCXT" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUILUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuilui (parse-ui (nth 0 x)) (parse-ui (nth 1 x)))))) ("MRSAT" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRSO" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 0 x)) (parse-ui (nth 2 x)))))) ("MRXW.ENG" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNW.ENG" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXNS.ENG" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x)))))) ("MRXW.NONENG" "LAT" "CHAR" 3 (lambda (x) (nth 0 x))) ("MRXW.NONENG" "WD" "CHAR" 200 (lambda (x) (nth 1 x))) ("MRXW.NONENG" "CUI" "INTEGER" 0 (lambda (x) (nth 2 x))) ("MRXW.NONENG" "LUI" "INTEGER" 0 (lambda (x) (nth 3 x))) ("MRXW.NONENG" "SUI" "INTEGER" 0 (lambda (x) (nth 4 x))) ("MRXW.NONENG" "KCUISUI" "BIGINT" 0 (lambda (x) (format nil "~d" (make-cuisui (parse-ui (nth 2 x)) (parse-ui (nth 4 x))))))) "Custom columns to create.(filename, col, sqltype, value-func).") (defparameter +index-cols+ '(("CUI" "MRATX") ("CUI1" "MRCOC") ("CUI" "MRCON") ("LUI" "MRCON") ("LRL" "MRCON") ("SUI" "MRCON") ("CUI" "MRCXT") ("CUI" "MRDEF") ("CUI" "MRLO") ("CUI1" "MRREL") ("CUI" "MRSAT") ("LUI" "MRSAT") ("SUI" "MRSAT") ("CUI" "MRSO") ("SAB" "MRSO") ("SRL" "MRSO") ("CUI" "MRSTY") ("TUI" "MRSTY") ("CUI" "MRXNS_ENG") ("NSTR" "MRXNS_ENG" 10) ("CUI" "MRXNW_ENG") ("NWD" "MRXNW_ENG") ("WD" "MRXW_ENG") ("KCUISUI" "MRCON") ("KCUILUI" "MRCON") ("KCUILRL" "MRCON") ("KLUILRL" "MRCON") ("KCUISUI" "MRCXT") ("KCUISUI" "MRSO") ("KCUISUI" "MRSAT") ("KCUILUI" "MRSAT") ("KCUISUI" "MRXW_ENG") ("KCUISUI" "MRXNW_ENG") ("KCUISUI" "MRXNS_ENG") ("KCUISUI" "MRXW_NONENG") ("KSRL" "MRATX") ("KSRL" "MRCXT") ("KSRL" "MRDEF") ("KSRL" "MRRANK") ("KSRL" "MRREL") ("KSRL" "MRSAT") ("KLRL" "MRCOC") ("KLRL" "MRLO") ("KLRL" "MRSTY") ("KLRL" "MRXW_ENG") ("KLRL" "MRXNW_ENG") ("KLRL" "MRXNS_ENG") ("KLRL" "MRXW_NONENG") ;; LEX indices ("EUI" "LRABR") ("EUI2" "LRABR") ("EUI" "LRAGR") ("EUI" "LRCMP") ("EUI" "LRMOD") ("EUI" "LRNOM") ("EUI2" "LRNOM") ("EUI" "LRPRN") ("EUI" "LRPRP") ("EUI" "LRSPL") ("EUI" "LRTRM") ("EUI" "LRTYP") ("EUI" "LRWD") ("WRD" "LRWD") ("BAS" "LRABR") ;; Semantic NET indices ("UI" "SRSTRE1") ("UI2" "SRSTRE1") ("UI3" "SRSTRE1") ("STY_RL" "SRDEF") ("RT" "SRDEF") ("STY_RL" "SRSTR") ("STY_RL2" "SRSTR") ("RL" "SRSTR")) "Columns in files to index") (defparameter +custom-index-cols+ nil #+ignore '(("CUI" "MRCONFULL") ("SAB" "MRCONFULL") ("TUI" "MRCONFULL")) "Indexes to custom tables") ;; File & Column functions (defun init-umls (&optional (alwaysclear nil)) "Initialize all UMLS file and column structures if not already initialized" (when (or alwaysclear (null *umls-files*)) (init-umls-cols) (init-umls-files) (init-field-lengths))) (defun init-umls-cols () (setq *umls-cols* (append (init-meta-cols) (init-custom-cols) (init-generic-cols "LRFLD") (init-generic-cols "SRFLD")))) (defun init-meta-cols () "Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line (let ((c (make-umls-col :col col :des des :ref ref :min (parse-integer min) :av (read-from-string av) :max (parse-integer max) :fil fil :dty dty ;; new in 2002 UMLS :sqltype "VARCHAR" ; default data type :parsefunc #'add-sql-quotes :custom-value-func nil :quotechar "'"))) (add-datatype-to-col c (datatype-for-col col)) (push c cols)))) (nreverse cols))) (defun init-custom-cols () "Initialize umls columns for custom columns" (let ((cols '())) (dolist (customcol +custom-cols+) (let ((c (make-umls-col :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol) :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol) :parsefunc #'add-sql-quotes :custom-value-func (nth 4 customcol) :quotechar "'"))) (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) (push c cols))) (nreverse cols))) (defun escape-column-name (name) (substitute #\_ #\/ name)) (defun init-generic-cols (col-filename) "Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) (let ((c (make-umls-col :col nam :des des :ref ref :min nil :av nil :max nil :fil file :dty nil :sqltype "VARCHAR" ; default data type :parsefunc #'add-sql-quotes :custom-value-func nil :quotechar "'"))) (add-datatype-to-col c (datatype-for-col nam)) (push c cols))))) (nreverse cols))) (defun init-umls-files () (setq *umls-files* (append (init-generic-files "MRFILES") (init-generic-files "LRFIL") (init-generic-files "SRFIL"))) ;; need to separate this since init-custom-files depends on *umls-files* (setq *umls-files* (append *umls-files* (init-custom-files)))) (defun umls-field-string-to-list (fmt) "Converts a comma delimited list of fields into a list of field names. Will append a unique number (starting at 2) onto a column name that is repeated in the list" (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,)) (col-count (make-hash-table :test 'equal))) (dotimes (i (length field-list)) (declare (fixnum i)) (let ((col (nth i field-list))) (multiple-value-bind (key found) (gethash col col-count) (if found (let ((next-id (1+ key))) (setf (nth i field-list) (concatenate 'string col (format nil "~D" next-id))) (setf (gethash col col-count) next-id)) (setf (gethash col col-count) 1))))) field-list)) (defun init-generic-files (files-filename) "Initialize all LEX file structures" (let ((files '())) (with-umls-file (line files-filename) (destructuring-bind (fil des fmt cls rws bts) line (let ((f (make-umls-file :fil fil :table (substitute #\_ #\. fil) :des des :fmt (escape-column-name fmt) :cls (parse-integer cls) :rws (parse-integer rws) :bts (parse-integer bts) :fields (concatenate 'list (umls-field-string-to-list fmt) (custom-colnames-for-filename fil))))) (setf (umls-file-colstructs f) (umls-cols-for-umls-file f)) (push f files)))) (nreverse files))) (defun init-custom-files () (let ((ffile (make-umls-file :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG" :cls 5 :rws 0 :bts 0 :fields (umls-file-fields (find-umls-file "MRXW.ENG"))))) (setf (umls-file-colstructs ffile) (umls-cols-for-umls-file ffile)) (list ffile))) (defun datatype-for-col (colname) "Return datatype for column name" (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal)))) (defun add-datatype-to-col (col datatype) "Add data type information to column" (setf (umls-col-datatype col) datatype) (case datatype (sql-u (setf (umls-col-sqltype col) "INTEGER" (umls-col-parsefunc col) #'parse-ui (umls-col-quotechar col) "")) (sql-s (setf (umls-col-sqltype col) "SMALLINT" (umls-col-parsefunc col) #'parse-integer (umls-col-quotechar col) "")) (sql-l (setf (umls-col-sqltype col) "BIGINT" (umls-col-parsefunc col) #'parse-integer (umls-col-quotechar col) "")) (sql-i (setf (umls-col-sqltype col) "INTEGER" (umls-col-parsefunc col) #'parse-integer (umls-col-quotechar col) "")) (sql-f (setf (umls-col-sqltype col) "NUMERIC" (umls-col-parsefunc col) #'read-from-string (umls-col-quotechar col) "")) (t ; Default column type, optimized text storage (setf (umls-col-parsefunc col) #'add-sql-quotes (umls-col-quotechar col) "'") (when (and (umls-col-max col) (umls-col-av col)) (if (> (umls-col-max col) 255) (setf (umls-col-sqltype col) "TEXT") (if (< (- (umls-col-max col) (umls-col-av col)) 4) (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4 (setf (umls-col-sqltype col) "VARCHAR")))))))