;;;; Author: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: data-structures.lisp,v 1.7 2003/05/06 06:09:29 kevin Exp $
+;;;; $Id: data-structures.lisp,v 1.8 2003/05/06 07:17:35 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
;; Preliminary objects to replace structures
(defclass ufile ()
- ((fil :initarg :fil)
- (table :initarg :table)
- (des :initarg :des)
- (fmt :initarg :fmt)
- (cls :initarg :cls)
- (rws :initarg :rws)
- (bts :initarg :bts)
- (fields :initarg :fields)
- (ucols :initarg ucols))
- (:documentation "A UMLS File"))
+ ((fil :initarg :fil :accessor fil)
+ (table :initarg :table :accessor table)
+ (des :initarg :des :accessor des)
+ (fmt :initarg :fmt :accessor fmt)
+ (cls :initarg :cls :accessor cls)
+ (rws :initarg :rws :accessor rws)
+ (bts :initarg :bts :accessor bts)
+ (fields :initarg :fields :accessor fields)
+ (ucols :initarg :ucols :accessor ucols))
+ (:default-initargs :fil nil :table nil :des nil :fmt nil :cls nil :rws nil :bts nil
+ :fields nil :ucols nil)
+ (:documentation "UMLS File"))
(defclass ucol ()
- ((col :initarg :col)
- (des :initarg des)
- (ref :initarg ref)
- (min :initarg :min)
- (av :initarg :av)
- (max :initarg :max)
- (fil :initarg :fil)
- (sqltype :initarg :sqltype)
- (dty :initarg :dty :documentation "new in 2002: suggested SQL datatype")
- (parsefunc :initarg :parsefunc)
- (quotechar :initarg :quotechar)
- (datatype :initarg :datatype)
- (custom-value-func :initarg :custom-value-func))
- (:documentation "A UMLS column"))
-
-(defstruct (umls-file)
- "Record for each UMLS File"
- fil table des fmt cls rws bts fields colstructs)
-
-(defstruct (umls-col)
- "Record for each UMLS Column in each file"
- col des ref min av max fil sqltype
- dty ;; new in 2002 umls: suggested SQL datatype
- parsefunc quotechar datatype custom-value-func)
+ ((col :initarg :col :accessor col)
+ (des :initarg :des :accessor des)
+ (ref :initarg :ref :accessor ref)
+ (min :initarg :min :accessor cmin)
+ (av :initarg :av :accessor av)
+ (max :initarg :max :accessor cmax)
+ (fil :initarg :fil :accessor fil)
+ (sqltype :initarg :sqltype :accessor sqltype)
+ (dty :initarg :dty :accessor dty :documentation "new in 2002: suggested SQL datatype")
+ (parse-fun :initarg :parse-fun :accessor parse-fun)
+ (quotechar :initarg :quotechar :accessor quotechar)
+ (datatype :initarg :datatype :accessor datatype)
+ (custom-value-fun :initarg :custom-value-fun :accessor custom-value-fun))
+ (:default-initargs :col nil :des nil :ref nil :min nil :av nil :max nil :fil nil
+ :sqltype nil :dty nil :parse-fun nil :datatype nil
+ :custom-value-fun nil)
+ (:documentation "UMLS column"))
;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: parse-2002.lisp,v 1.6 2003/05/06 06:09:29 kevin Exp $
+;;;; $Id: parse-2002.lisp,v 1.7 2003/05/06 07:17:35 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
(let ((cols '()))
(with-umls-file (line "MRCOLS")
(destructuring-bind (col des ref min av max fil dty) line
- (let ((c (make-umls-col
+ (let ((c (make-instance 'ucol
:col col
:des des
:ref ref
:fil fil
:dty dty ;; new in 2002 UMLS
:sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
+ :parse-fun #'add-sql-quotes
+ :custom-value-fun nil
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col col))
(push c cols))))
"Initialize umls columns for custom columns"
(let ((cols '()))
(dolist (customcol +custom-cols+)
- (let ((c (make-umls-col :col (nth 1 customcol)
- :des ""
- :ref 0
- :min 0
- :max (nth 3 customcol)
- :av 0
- :dty nil
- :fil (nth 0 customcol)
- :sqltype (nth 2 customcol)
- :parsefunc #'add-sql-quotes
- :custom-value-func (nth 4 customcol)
+ (let ((c (make-instance 'ucol
+ :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol)
+ :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol)
+ :parse-fun #'add-sql-quotes :custom-value-fun (nth 4 customcol)
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col (nth 1 customcol)))
(push c cols)))
(destructuring-bind (nam des ref fil) line
(setq nam (escape-column-name nam))
(dolist (file (delimited-string-to-list fil #\,))
- (let ((c (make-umls-col
+ (let ((c (make-instance 'ucol
:col nam
:des des
:ref ref
:fil file
:dty nil
:sqltype "VARCHAR" ; default data type
- :parsefunc #'add-sql-quotes
- :custom-value-func nil
+ :parse-fun #'add-sql-quotes
+ :custom-value-fun nil
:quotechar "'")))
(add-datatype-to-col c (datatype-for-col nam))
(push c cols)))))
(let ((files '()))
(with-umls-file (line files-filename)
(destructuring-bind (fil des fmt cls rws bts) line
- (let ((f (make-umls-file
+ (let ((f (make-instance 'ufile
:fil fil
:table (substitute #\_ #\. fil)
:des des
:fields (concatenate 'list
(umls-field-string-to-list fmt)
(custom-colnames-for-filename fil)))))
- (setf (umls-file-colstructs f) (umls-cols-for-umls-file f))
+ (setf (ucols f) (ucols-for-umls-file f))
(push f files))))
(nreverse files)))
(defun init-custom-files ()
- (let ((ffile (make-umls-file :fil "MRXW.NONENG"
- :des "Custom NonEnglish Index"
- :table "MRXW_NONENG"
- :cls 5
- :rws 0
- :bts 0
- :fields (umls-file-fields (find-umls-file "MRXW.ENG")))))
- (setf (umls-file-colstructs ffile)
- (umls-cols-for-umls-file ffile))
+ (let ((ffile (make-instance 'ufile
+ :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG"
+ :cls 5 :rws 0 :bts 0 :fields (fields (find-umls-file "MRXW.ENG")))))
+ (setf (ucols ffile)
+ (ucols-for-umls-file ffile))
(list ffile)))
(defun datatype-for-col (colname)
(defun add-datatype-to-col (col datatype)
"Add data type information to column"
- (setf (umls-col-datatype col) datatype)
+ (setf (datatype col) datatype)
(case datatype
- (sql-u (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-ui
- (umls-col-quotechar col) ""))
- (sql-s (setf (umls-col-sqltype col) "SMALLINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-l (setf (umls-col-sqltype col) "BIGINT"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-i (setf (umls-col-sqltype col) "INTEGER"
- (umls-col-parsefunc col) #'parse-integer
- (umls-col-quotechar col) ""))
- (sql-f (setf (umls-col-sqltype col) "NUMERIC"
- (umls-col-parsefunc col) #'read-from-string
- (umls-col-quotechar col) ""))
+ (sql-u (setf (sqltype col) "INTEGER"
+ (parse-fun col) #'parse-ui
+ (quotechar col) ""))
+ (sql-s (setf (sqltype col) "SMALLINT"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-l (setf (sqltype col) "BIGINT"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-i (setf (sqltype col) "INTEGER"
+ (parse-fun col) #'parse-integer
+ (quotechar col) ""))
+ (sql-f (setf (sqltype col) "NUMERIC"
+ (parse-fun col) #'read-from-string
+ (quotechar col) ""))
(t ; Default column type, optimized text storage
- (setf (umls-col-parsefunc col) #'add-sql-quotes
- (umls-col-quotechar col) "'")
- (when (and (umls-col-max col) (umls-col-av col))
- (if (> (umls-col-max col) 255)
- (setf (umls-col-sqltype col) "TEXT")
- (if (< (- (umls-col-max col) (umls-col-av col)) 4)
- (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4
- (setf (umls-col-sqltype col) "VARCHAR")))))))
+ (setf (parse-fun col) #'add-sql-quotes
+ (quotechar col) "'")
+ (when (and (cmax col) (av col))
+ (if (> (cmax col) 255)
+ (setf (sqltype col) "TEXT")
+ (if (< (- (cmax col) (av col)) 4)
+ (setf (sqltype col) "CHAR") ; if average bytes wasted < 4
+ (setf (sqltype col) "VARCHAR")))))))
;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: parse-common.lisp,v 1.6 2003/05/06 01:34:57 kevin Exp $
+;;;; $Id: parse-common.lisp,v 1.7 2003/05/06 07:17:35 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
(defun file-field-lengths (files)
(let ((lengths '()))
(dolist (file files)
- (setq file (umls-file-fil file))
+ (setq file (fil file))
(let (max-field count-field num-fields (count-lines 0))
(with-umls-file (fields file)
(unless num-fields
Currently, these are the LEX and NET files."
(let ((measure-files '()))
(dolist (file *umls-files*)
- (let ((filename (umls-file-fil file)))
+ (let ((filename (fil file)))
(unless (or (char= #\M (char filename 0))
(char= #\m (char filename 0)))
(push file measure-files))))
(av-field (caddr length-list))
(file (find-umls-file filename)))
(when file
- (if (/= (length max-field) (length (umls-file-fields file)))
+ (if (/= (length max-field) (length (fields file)))
(format t "Warning: Number of file fields ~A doesn't match length of fields in file structure ~S"
max-field file)
- (dotimes (i (max (length max-field) (length (umls-file-fields file))))
+ (dotimes (i (max (length max-field) (length (fields file))))
(declare (fixnum i))
- (let* ((field (nth i (umls-file-fields file)))
+ (let* ((field (nth i (fields file)))
(col (find-umls-col field filename)))
(if col
(progn
- (setf (umls-col-max col) (aref max-field i))
- (setf (umls-col-av col) (aref av-field i))
- (add-datatype-to-col col (datatype-for-col (umls-col-col col))))
+ (setf (cmax col) (aref max-field i))
+ (setf (av col) (aref av-field i))
+ (add-datatype-to-col col (datatype-for-col (col col))))
(error "can't find column ~A" field)))))))))))
(defun find-col-in-columns (colname filename cols)
"Returns list of umls-col structure for a column name and a filename"
(dolist (col cols)
- (when (and (string-equal filename (umls-col-fil col))
- (string-equal colname (umls-col-col col)))
+ (when (and (string-equal filename (fil col))
+ (string-equal colname (col col)))
(return-from find-col-in-columns col)))
nil)
(let ((base-colname (subseq colname 0 (1- (length colname)))))
(setq col (find-col-in-columns base-colname filename cols))
(if col
- (let ((new-col (make-umls-col
+ (let ((new-col (make-instance 'ucol
:col (copy-seq colname)
- :des (copy-seq (umls-col-des col))
- :ref (copy-seq (umls-col-ref col))
- :min (umls-col-min col)
- :max (umls-col-max col)
- :fil (copy-seq (umls-col-fil col))
- :sqltype (copy-seq (umls-col-sqltype col))
- :dty (copy-seq (umls-col-dty col))
- :parsefunc (umls-col-parsefunc col)
- :quotechar (copy-seq (umls-col-quotechar col))
- :datatype (umls-col-datatype col)
- :custom-value-func (umls-col-custom-value-func col))))
+ :des (copy-seq (des col))
+ :ref (copy-seq (ref col))
+ :min (cmin col)
+ :max (cmax col)
+ :fil (copy-seq (fil col))
+ :sqltype (copy-seq (sqltype col))
+ :dty (copy-seq (dty col))
+ :parse-fun (parse-fun col)
+ :quotechar (copy-seq (quotechar col))
+ :datatype (datatype col)
+ :custom-value-fun (custom-value-fun col))))
(push new-col *umls-cols*)
new-col)
(error "Couldn't find a base column for col ~A in file ~A"
colname filename)))
- (let ((new-col (make-umls-col
+ (let ((new-col (make-instance 'ucol
:col (copy-seq colname)
:des "Unknown"
:ref ""
:fil filename
:sqltype "VARCHAR"
:dty nil
- :parsefunc #'add-sql-quotes
+ :parse-fun #'add-sql-quotes
:quotechar "'"
:datatype nil
- :custom-value-func nil)))
+ :custom-value-fun nil)))
(push new-col *umls-cols*)
new-col))))))
(defun find-umls-file (filename)
"Returns umls-file structure for a filename"
- (find-if (lambda (f) (string-equal filename (umls-file-fil f))) *umls-files*))
+ (find-if (lambda (f) (string-equal filename (fil f))) *umls-files*))
-(defun umls-cols-for-umls-file (file)
+(defun ucols-for-umls-file (file)
"Returns list of umls-cols for a file structure"
- (let ((filename (umls-file-fil file)))
+ (let ((filename (fil file)))
(mapcar (lambda (col) (find-umls-col col filename))
- (umls-file-fields file))))
+ (fields file))))
;;;; Programmer: Kevin M. Rosenberg
;;;; Date Started: Apr 2000
;;;;
-;;;; $Id: sql-create.lisp,v 1.18 2003/05/06 06:44:17 kevin Exp $
+;;;; $Id: sql-create.lisp,v 1.19 2003/05/06 07:17:35 kevin Exp $
;;;;
;;;; This file, part of UMLisp, is
;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
"Return sql command to create a table"
(let ((col-func
(lambda (c)
- (let ((sqltype (umls-col-sqltype c)))
+ (let ((sqltype (sqltype c)))
(concatenate 'string
- (umls-col-col c)
+ (col c)
" "
(if (or (string-equal sqltype "VARCHAR")
(string-equal sqltype "CHAR"))
- (format nil "~a (~a)" sqltype (umls-col-max c))
+ (format nil "~a (~a)" sqltype (cmax c))
sqltype))))))
- (format nil "CREATE TABLE ~a (~{~a~^,~})" (umls-file-table file)
- (mapcar col-func (umls-cols-for-umls-file file)))))
+ (format nil "CREATE TABLE ~a (~{~a~^,~})" (table file)
+ (mapcar col-func (ucols-for-umls-file file)))))
(defun create-custom-table-cmd (tablename sql-cmd)
"Return SQL command to create a custom table"
(format nil "CREATE TABLE ~a AS ~a;" tablename sql-cmd))
(defun insert-col-value (col value)
- (if (null (umls-col-parsefunc col))
+ (if (null (parse-fun col))
value
- (format nil "~A" (funcall (umls-col-parsefunc col) value))))
+ (format nil "~A" (funcall (parse-fun col) value))))
(defun insert-values-cmd (file values)
"Return sql insert command for a row of values"
(let ((insert-func
(lambda (col value)
- (let ((q (umls-col-quotechar col)))
+ (let ((q (quotechar col)))
(concatenate 'string q (insert-col-value col value) q)))))
(format
nil "INSERT INTO ~a (~{~a~^,~}) VALUES (~A)"
- (umls-file-table file)
- (umls-file-fields file)
+ (table file)
+ (fields file)
(concat-separated-strings
","
- (mapcar insert-func (remove-custom-cols (umls-file-colstructs file)) values)
- (custom-col-values (custom-colstructs-for-file file) values t)))))
+ (mapcar insert-func (remove-custom-cols (ucols file)) values)
+ (custom-col-values (custom-ucols-for-file file) values t)))))
(defun custom-col-value (col values doquote)
- (let ((custom-value (funcall (umls-col-custom-value-func col) values)))
+ (let ((custom-value (funcall (custom-value-fun col) values)))
(if custom-value
(if doquote
- (let ((q (umls-col-quotechar col)))
+ (let ((q (quotechar col)))
(concatenate 'string q (escape-backslashes custom-value) q))
(escape-backslashes custom-value))
"")))
-(defun custom-col-values (colstructs values doquote)
+(defun custom-col-values (ucols values doquote)
"Returns a list of string column values for SQL inserts for custom columns"
- (loop for col in colstructs collect (custom-col-value col values doquote)))
+ (loop for col in ucols collect (custom-col-value col values doquote)))
(defun remove-custom-cols (cols)
"Remove custom cols from a list col umls-cols"
- (remove-if #'umls-col-custom-value-func cols))
+ (remove-if #'custom-value-fun cols))
(defun find-custom-cols-for-filename (filename)
(remove-if-not (lambda (x) (string-equal filename (car x))) +custom-cols+))
(defun custom-colnames-for-filename (filename)
(mapcar #'cadr (find-custom-cols-for-filename filename)))
-(defun custom-colstructs-for-file (file)
- (remove-if-not #'umls-col-custom-value-func (umls-file-colstructs file)))
+(defun custom-ucols-for-file (file)
+ (remove-if-not #'custom-value-fun (ucols file)))
(defun noneng-lang-index-files ()
(remove-if-not
- (lambda (f) (and (> (length (umls-file-fil f)) 4)
- (string-equal (umls-file-fil f) "MRXW." :end1 5)
- (not (string-equal (umls-file-fil f) "MRXW.ENG"))
- (not (string-equal (umls-file-fil f) "MRXW.NONENG"))))
+ (lambda (f) (and (> (length (fil f)) 4)
+ (string-equal (fil f) "MRXW." :end1 5)
+ (not (string-equal (fil f) "MRXW.ENG"))
+ (not (string-equal (fil f) "MRXW.NONENG"))))
*umls-files*))
;;; SQL Command Functions
"SQL Databases: drop all tables"
(dolist (file *umls-files*)
(ignore-errors
- (sql-execute (format nil "DROP TABLE ~a" (umls-file-table file)) conn))))
+ (sql-execute (format nil "DROP TABLE ~a" (table file)) conn))))
(defun sql-create-tables (conn)
"SQL Databases: create all tables"
(defun sql-insert-values (conn file)
"SQL Databases: inserts all values for a file"
- (with-umls-file (line (umls-file-fil file))
+ (with-umls-file (line (fil file))
(sql-execute (insert-values-cmd file line) conn)))
(defun sql-insert-all-values (conn)
(defun translate-umls-file (file extension)
"Translate a umls file into a format suitable for sql copy cmd"
- (translate-files (umls-file-fil file) extension (list file)))
+ (translate-files (fil file) extension (list file)))
(defun make-noneng-index-file (extension)
"Make non-english index file"
(format t "File ~A already exists: skipping~%" output-path)
(with-open-file (ostream output-path :direction :output)
(dolist (input-file input-files)
- (with-umls-file (line (umls-file-fil input-file))
+ (with-umls-file (line (fil input-file))
(umls-translate input-file line ostream)
(princ #\newline ostream)))))))
"Return postgresql copy statement for a file"
(format
nil "COPY ~a FROM '~a' using delimiters '|' with null as ''"
- (umls-file-table file) (umls-pathname (umls-file-fil file) extension)))
+ (table file) (umls-pathname (fil file) extension)))
(defun mysql-copy-cmd (file extension)
"Return mysql copy statement for a file"
(format
nil
"LOAD DATA LOCAL INFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\""
- (umls-pathname (umls-file-fil file) extension) (umls-file-table file)))
+ (umls-pathname (fil file) extension) (table file)))
(defun col-value (col value)
- (if (eq (umls-col-datatype col) 'sql-u)
+ (if (eq (datatype col) 'sql-u)
(let ((ui (parse-ui value "")))
(if (stringp ui)
ui
"Translate a single line for sql output"
(print-separated-strings
strm "|"
- (mapcar #'col-value (remove-custom-cols (umls-file-colstructs file)) line)
- (custom-col-values (custom-colstructs-for-file file) line nil)))
+ (mapcar #'col-value (remove-custom-cols (ucols file)) line)
+ (custom-col-values (custom-ucols-for-file file) line nil)))
;;; Routines for analyzing cost of fixed size storage
(unavoidable '())
(avoidable '()))
(dolist (file *umls-files*)
- (dolist (col (umls-file-colstructs file))
- (let* ((avwaste (- (umls-col-max col) (umls-col-av col)))
- (cwaste (* avwaste (umls-file-rws file))))
+ (dolist (col (ucols file))
+ (let* ((avwaste (- (cmax col) (av col)))
+ (cwaste (* avwaste (rws file))))
(when (plusp cwaste)
(if (<= avwaste 6)
(progn
(incf totalunavoidable cwaste)
- (push (list (umls-file-fil file) (umls-col-col col)
+ (push (list (fil file) (col col)
avwaste cwaste)
unavoidable))
(progn
(incf totalavoidable cwaste)
- (push (list (umls-file-fil file) (umls-col-col col)
+ (push (list (fil file) (col col)
avwaste cwaste)
avoidable)))
(incf totalwaste cwaste)))))
(let ((max 0))
(declare (fixnum max))
(dolist (col *umls-cols*)
- (when (> (umls-col-max col) max)
- (setq max (umls-col-max col))))
+ (when (> (cmax col) max)
+ (setq max (cmax col))))
max))
(defun max-umls-row ()
(let ((rowsizes '()))
(dolist (file *umls-files*)
(let ((row 0)
- (fields (umls-file-colstructs file)))
+ (fields (ucols file)))
(dolist (field fields)
- (incf row (1+ (umls-col-max field))))
+ (incf row (1+ (cmax field))))
(push row rowsizes)))
(car (sort rowsizes #'>))))