From: Kevin M. Rosenberg Date: Tue, 6 May 2003 07:17:35 +0000 (+0000) Subject: r4840: Auto commit for Debian build X-Git-Tag: v2006ac.2~186 X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=commitdiff_plain;h=ebbaacd0a589db2c590846742da33e3b4bf25d02 r4840: Auto commit for Debian build --- diff --git a/data-structures.lisp b/data-structures.lisp index ca9ef9b..c06b2ca 100644 --- a/data-structures.lisp +++ b/data-structures.lisp @@ -7,7 +7,7 @@ ;;;; Author: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: data-structures.lisp,v 1.7 2003/05/06 06:09:29 kevin Exp $ +;;;; $Id: data-structures.lisp,v 1.8 2003/05/06 07:17:35 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -57,40 +57,35 @@ ;; Preliminary objects to replace structures (defclass ufile () - ((fil :initarg :fil) - (table :initarg :table) - (des :initarg :des) - (fmt :initarg :fmt) - (cls :initarg :cls) - (rws :initarg :rws) - (bts :initarg :bts) - (fields :initarg :fields) - (ucols :initarg ucols)) - (:documentation "A UMLS File")) + ((fil :initarg :fil :accessor fil) + (table :initarg :table :accessor table) + (des :initarg :des :accessor des) + (fmt :initarg :fmt :accessor fmt) + (cls :initarg :cls :accessor cls) + (rws :initarg :rws :accessor rws) + (bts :initarg :bts :accessor bts) + (fields :initarg :fields :accessor fields) + (ucols :initarg :ucols :accessor ucols)) + (:default-initargs :fil nil :table nil :des nil :fmt nil :cls nil :rws nil :bts nil + :fields nil :ucols nil) + (:documentation "UMLS File")) (defclass ucol () - ((col :initarg :col) - (des :initarg des) - (ref :initarg ref) - (min :initarg :min) - (av :initarg :av) - (max :initarg :max) - (fil :initarg :fil) - (sqltype :initarg :sqltype) - (dty :initarg :dty :documentation "new in 2002: suggested SQL datatype") - (parsefunc :initarg :parsefunc) - (quotechar :initarg :quotechar) - (datatype :initarg :datatype) - (custom-value-func :initarg :custom-value-func)) - (:documentation "A UMLS column")) - -(defstruct (umls-file) - "Record for each UMLS File" - fil table des fmt cls rws bts fields colstructs) - -(defstruct (umls-col) - "Record for each UMLS Column in each file" - col des ref min av max fil sqltype - dty ;; new in 2002 umls: suggested SQL datatype - parsefunc quotechar datatype custom-value-func) + ((col :initarg :col :accessor col) + (des :initarg :des :accessor des) + (ref :initarg :ref :accessor ref) + (min :initarg :min :accessor cmin) + (av :initarg :av :accessor av) + (max :initarg :max :accessor cmax) + (fil :initarg :fil :accessor fil) + (sqltype :initarg :sqltype :accessor sqltype) + (dty :initarg :dty :accessor dty :documentation "new in 2002: suggested SQL datatype") + (parse-fun :initarg :parse-fun :accessor parse-fun) + (quotechar :initarg :quotechar :accessor quotechar) + (datatype :initarg :datatype :accessor datatype) + (custom-value-fun :initarg :custom-value-fun :accessor custom-value-fun)) + (:default-initargs :col nil :des nil :ref nil :min nil :av nil :max nil :fil nil + :sqltype nil :dty nil :parse-fun nil :datatype nil + :custom-value-fun nil) + (:documentation "UMLS column")) diff --git a/parse-2002.lisp b/parse-2002.lisp index 79119fd..8bbb009 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -8,7 +8,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: parse-2002.lisp,v 1.6 2003/05/06 06:09:29 kevin Exp $ +;;;; $Id: parse-2002.lisp,v 1.7 2003/05/06 07:17:35 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -279,7 +279,7 @@ (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (let ((c (make-umls-col + (let ((c (make-instance 'ucol :col col :des des :ref ref @@ -289,8 +289,8 @@ :fil fil :dty dty ;; new in 2002 UMLS :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil + :parse-fun #'add-sql-quotes + :custom-value-fun nil :quotechar "'"))) (add-datatype-to-col c (datatype-for-col col)) (push c cols)))) @@ -300,17 +300,10 @@ "Initialize umls columns for custom columns" (let ((cols '())) (dolist (customcol +custom-cols+) - (let ((c (make-umls-col :col (nth 1 customcol) - :des "" - :ref 0 - :min 0 - :max (nth 3 customcol) - :av 0 - :dty nil - :fil (nth 0 customcol) - :sqltype (nth 2 customcol) - :parsefunc #'add-sql-quotes - :custom-value-func (nth 4 customcol) + (let ((c (make-instance 'ucol + :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol) + :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol) + :parse-fun #'add-sql-quotes :custom-value-fun (nth 4 customcol) :quotechar "'"))) (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) (push c cols))) @@ -326,7 +319,7 @@ (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) - (let ((c (make-umls-col + (let ((c (make-instance 'ucol :col nam :des des :ref ref @@ -336,8 +329,8 @@ :fil file :dty nil :sqltype "VARCHAR" ; default data type - :parsefunc #'add-sql-quotes - :custom-value-func nil + :parse-fun #'add-sql-quotes + :custom-value-fun nil :quotechar "'"))) (add-datatype-to-col c (datatype-for-col nam)) (push c cols))))) @@ -375,7 +368,7 @@ append a unique number (starting at 2) onto a column name that is repeated in th (let ((files '())) (with-umls-file (line files-filename) (destructuring-bind (fil des fmt cls rws bts) line - (let ((f (make-umls-file + (let ((f (make-instance 'ufile :fil fil :table (substitute #\_ #\. fil) :des des @@ -386,20 +379,16 @@ append a unique number (starting at 2) onto a column name that is repeated in th :fields (concatenate 'list (umls-field-string-to-list fmt) (custom-colnames-for-filename fil))))) - (setf (umls-file-colstructs f) (umls-cols-for-umls-file f)) + (setf (ucols f) (ucols-for-umls-file f)) (push f files)))) (nreverse files))) (defun init-custom-files () - (let ((ffile (make-umls-file :fil "MRXW.NONENG" - :des "Custom NonEnglish Index" - :table "MRXW_NONENG" - :cls 5 - :rws 0 - :bts 0 - :fields (umls-file-fields (find-umls-file "MRXW.ENG"))))) - (setf (umls-file-colstructs ffile) - (umls-cols-for-umls-file ffile)) + (let ((ffile (make-instance 'ufile + :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG" + :cls 5 :rws 0 :bts 0 :fields (fields (find-umls-file "MRXW.ENG"))))) + (setf (ucols ffile) + (ucols-for-umls-file ffile)) (list ffile))) (defun datatype-for-col (colname) @@ -408,32 +397,32 @@ append a unique number (starting at 2) onto a column name that is repeated in th (defun add-datatype-to-col (col datatype) "Add data type information to column" - (setf (umls-col-datatype col) datatype) + (setf (datatype col) datatype) (case datatype - (sql-u (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-ui - (umls-col-quotechar col) "")) - (sql-s (setf (umls-col-sqltype col) "SMALLINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-l (setf (umls-col-sqltype col) "BIGINT" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-i (setf (umls-col-sqltype col) "INTEGER" - (umls-col-parsefunc col) #'parse-integer - (umls-col-quotechar col) "")) - (sql-f (setf (umls-col-sqltype col) "NUMERIC" - (umls-col-parsefunc col) #'read-from-string - (umls-col-quotechar col) "")) + (sql-u (setf (sqltype col) "INTEGER" + (parse-fun col) #'parse-ui + (quotechar col) "")) + (sql-s (setf (sqltype col) "SMALLINT" + (parse-fun col) #'parse-integer + (quotechar col) "")) + (sql-l (setf (sqltype col) "BIGINT" + (parse-fun col) #'parse-integer + (quotechar col) "")) + (sql-i (setf (sqltype col) "INTEGER" + (parse-fun col) #'parse-integer + (quotechar col) "")) + (sql-f (setf (sqltype col) "NUMERIC" + (parse-fun col) #'read-from-string + (quotechar col) "")) (t ; Default column type, optimized text storage - (setf (umls-col-parsefunc col) #'add-sql-quotes - (umls-col-quotechar col) "'") - (when (and (umls-col-max col) (umls-col-av col)) - (if (> (umls-col-max col) 255) - (setf (umls-col-sqltype col) "TEXT") - (if (< (- (umls-col-max col) (umls-col-av col)) 4) - (setf (umls-col-sqltype col) "CHAR") ; if average bytes wasted < 4 - (setf (umls-col-sqltype col) "VARCHAR"))))))) + (setf (parse-fun col) #'add-sql-quotes + (quotechar col) "'") + (when (and (cmax col) (av col)) + (if (> (cmax col) 255) + (setf (sqltype col) "TEXT") + (if (< (- (cmax col) (av col)) 4) + (setf (sqltype col) "CHAR") ; if average bytes wasted < 4 + (setf (sqltype col) "VARCHAR"))))))) diff --git a/parse-common.lisp b/parse-common.lisp index 1e40444..1e397bf 100644 --- a/parse-common.lisp +++ b/parse-common.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: parse-common.lisp,v 1.6 2003/05/06 01:34:57 kevin Exp $ +;;;; $Id: parse-common.lisp,v 1.7 2003/05/06 07:17:35 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -51,7 +51,7 @@ (defun file-field-lengths (files) (let ((lengths '())) (dolist (file files) - (setq file (umls-file-fil file)) + (setq file (fil file)) (let (max-field count-field num-fields (count-lines 0)) (with-umls-file (fields file) (unless num-fields @@ -77,7 +77,7 @@ Currently, these are the LEX and NET files." (let ((measure-files '())) (dolist (file *umls-files*) - (let ((filename (umls-file-fil file))) + (let ((filename (fil file))) (unless (or (char= #\M (char filename 0)) (char= #\m (char filename 0))) (push file measure-files)))) @@ -88,18 +88,18 @@ Currently, these are the LEX and NET files." (av-field (caddr length-list)) (file (find-umls-file filename))) (when file - (if (/= (length max-field) (length (umls-file-fields file))) + (if (/= (length max-field) (length (fields file))) (format t "Warning: Number of file fields ~A doesn't match length of fields in file structure ~S" max-field file) - (dotimes (i (max (length max-field) (length (umls-file-fields file)))) + (dotimes (i (max (length max-field) (length (fields file)))) (declare (fixnum i)) - (let* ((field (nth i (umls-file-fields file))) + (let* ((field (nth i (fields file))) (col (find-umls-col field filename))) (if col (progn - (setf (umls-col-max col) (aref max-field i)) - (setf (umls-col-av col) (aref av-field i)) - (add-datatype-to-col col (datatype-for-col (umls-col-col col)))) + (setf (cmax col) (aref max-field i)) + (setf (av col) (aref av-field i)) + (add-datatype-to-col col (datatype-for-col (col col)))) (error "can't find column ~A" field))))))))))) @@ -109,8 +109,8 @@ Currently, these are the LEX and NET files." (defun find-col-in-columns (colname filename cols) "Returns list of umls-col structure for a column name and a filename" (dolist (col cols) - (when (and (string-equal filename (umls-col-fil col)) - (string-equal colname (umls-col-col col))) + (when (and (string-equal filename (fil col)) + (string-equal colname (col col))) (return-from find-col-in-columns col))) nil) @@ -125,24 +125,24 @@ Currently, these are the LEX and NET files." (let ((base-colname (subseq colname 0 (1- (length colname))))) (setq col (find-col-in-columns base-colname filename cols)) (if col - (let ((new-col (make-umls-col + (let ((new-col (make-instance 'ucol :col (copy-seq colname) - :des (copy-seq (umls-col-des col)) - :ref (copy-seq (umls-col-ref col)) - :min (umls-col-min col) - :max (umls-col-max col) - :fil (copy-seq (umls-col-fil col)) - :sqltype (copy-seq (umls-col-sqltype col)) - :dty (copy-seq (umls-col-dty col)) - :parsefunc (umls-col-parsefunc col) - :quotechar (copy-seq (umls-col-quotechar col)) - :datatype (umls-col-datatype col) - :custom-value-func (umls-col-custom-value-func col)))) + :des (copy-seq (des col)) + :ref (copy-seq (ref col)) + :min (cmin col) + :max (cmax col) + :fil (copy-seq (fil col)) + :sqltype (copy-seq (sqltype col)) + :dty (copy-seq (dty col)) + :parse-fun (parse-fun col) + :quotechar (copy-seq (quotechar col)) + :datatype (datatype col) + :custom-value-fun (custom-value-fun col)))) (push new-col *umls-cols*) new-col) (error "Couldn't find a base column for col ~A in file ~A" colname filename))) - (let ((new-col (make-umls-col + (let ((new-col (make-instance 'ucol :col (copy-seq colname) :des "Unknown" :ref "" @@ -151,10 +151,10 @@ Currently, these are the LEX and NET files." :fil filename :sqltype "VARCHAR" :dty nil - :parsefunc #'add-sql-quotes + :parse-fun #'add-sql-quotes :quotechar "'" :datatype nil - :custom-value-func nil))) + :custom-value-fun nil))) (push new-col *umls-cols*) new-col)))))) @@ -164,12 +164,12 @@ Currently, these are the LEX and NET files." (defun find-umls-file (filename) "Returns umls-file structure for a filename" - (find-if (lambda (f) (string-equal filename (umls-file-fil f))) *umls-files*)) + (find-if (lambda (f) (string-equal filename (fil f))) *umls-files*)) -(defun umls-cols-for-umls-file (file) +(defun ucols-for-umls-file (file) "Returns list of umls-cols for a file structure" - (let ((filename (umls-file-fil file))) + (let ((filename (fil file))) (mapcar (lambda (col) (find-umls-col col filename)) - (umls-file-fields file)))) + (fields file)))) diff --git a/sql-create.lisp b/sql-create.lisp index 8a77db3..af0dd99 100644 --- a/sql-create.lisp +++ b/sql-create.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: sql-create.lisp,v 1.18 2003/05/06 06:44:17 kevin Exp $ +;;;; $Id: sql-create.lisp,v 1.19 2003/05/06 07:17:35 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -25,58 +25,58 @@ "Return sql command to create a table" (let ((col-func (lambda (c) - (let ((sqltype (umls-col-sqltype c))) + (let ((sqltype (sqltype c))) (concatenate 'string - (umls-col-col c) + (col c) " " (if (or (string-equal sqltype "VARCHAR") (string-equal sqltype "CHAR")) - (format nil "~a (~a)" sqltype (umls-col-max c)) + (format nil "~a (~a)" sqltype (cmax c)) sqltype)))))) - (format nil "CREATE TABLE ~a (~{~a~^,~})" (umls-file-table file) - (mapcar col-func (umls-cols-for-umls-file file))))) + (format nil "CREATE TABLE ~a (~{~a~^,~})" (table file) + (mapcar col-func (ucols-for-umls-file file))))) (defun create-custom-table-cmd (tablename sql-cmd) "Return SQL command to create a custom table" (format nil "CREATE TABLE ~a AS ~a;" tablename sql-cmd)) (defun insert-col-value (col value) - (if (null (umls-col-parsefunc col)) + (if (null (parse-fun col)) value - (format nil "~A" (funcall (umls-col-parsefunc col) value)))) + (format nil "~A" (funcall (parse-fun col) value)))) (defun insert-values-cmd (file values) "Return sql insert command for a row of values" (let ((insert-func (lambda (col value) - (let ((q (umls-col-quotechar col))) + (let ((q (quotechar col))) (concatenate 'string q (insert-col-value col value) q))))) (format nil "INSERT INTO ~a (~{~a~^,~}) VALUES (~A)" - (umls-file-table file) - (umls-file-fields file) + (table file) + (fields file) (concat-separated-strings "," - (mapcar insert-func (remove-custom-cols (umls-file-colstructs file)) values) - (custom-col-values (custom-colstructs-for-file file) values t))))) + (mapcar insert-func (remove-custom-cols (ucols file)) values) + (custom-col-values (custom-ucols-for-file file) values t))))) (defun custom-col-value (col values doquote) - (let ((custom-value (funcall (umls-col-custom-value-func col) values))) + (let ((custom-value (funcall (custom-value-fun col) values))) (if custom-value (if doquote - (let ((q (umls-col-quotechar col))) + (let ((q (quotechar col))) (concatenate 'string q (escape-backslashes custom-value) q)) (escape-backslashes custom-value)) ""))) -(defun custom-col-values (colstructs values doquote) +(defun custom-col-values (ucols values doquote) "Returns a list of string column values for SQL inserts for custom columns" - (loop for col in colstructs collect (custom-col-value col values doquote))) + (loop for col in ucols collect (custom-col-value col values doquote))) (defun remove-custom-cols (cols) "Remove custom cols from a list col umls-cols" - (remove-if #'umls-col-custom-value-func cols)) + (remove-if #'custom-value-fun cols)) (defun find-custom-cols-for-filename (filename) (remove-if-not (lambda (x) (string-equal filename (car x))) +custom-cols+)) @@ -89,15 +89,15 @@ (defun custom-colnames-for-filename (filename) (mapcar #'cadr (find-custom-cols-for-filename filename))) -(defun custom-colstructs-for-file (file) - (remove-if-not #'umls-col-custom-value-func (umls-file-colstructs file))) +(defun custom-ucols-for-file (file) + (remove-if-not #'custom-value-fun (ucols file))) (defun noneng-lang-index-files () (remove-if-not - (lambda (f) (and (> (length (umls-file-fil f)) 4) - (string-equal (umls-file-fil f) "MRXW." :end1 5) - (not (string-equal (umls-file-fil f) "MRXW.ENG")) - (not (string-equal (umls-file-fil f) "MRXW.NONENG")))) + (lambda (f) (and (> (length (fil f)) 4) + (string-equal (fil f) "MRXW." :end1 5) + (not (string-equal (fil f) "MRXW.ENG")) + (not (string-equal (fil f) "MRXW.NONENG")))) *umls-files*)) ;;; SQL Command Functions @@ -119,7 +119,7 @@ "SQL Databases: drop all tables" (dolist (file *umls-files*) (ignore-errors - (sql-execute (format nil "DROP TABLE ~a" (umls-file-table file)) conn)))) + (sql-execute (format nil "DROP TABLE ~a" (table file)) conn)))) (defun sql-create-tables (conn) "SQL Databases: create all tables" @@ -133,7 +133,7 @@ (defun sql-insert-values (conn file) "SQL Databases: inserts all values for a file" - (with-umls-file (line (umls-file-fil file)) + (with-umls-file (line (fil file)) (sql-execute (insert-values-cmd file line) conn))) (defun sql-insert-all-values (conn) @@ -196,7 +196,7 @@ This is much faster that using create-umls-db-insert." (defun translate-umls-file (file extension) "Translate a umls file into a format suitable for sql copy cmd" - (translate-files (umls-file-fil file) extension (list file))) + (translate-files (fil file) extension (list file))) (defun make-noneng-index-file (extension) "Make non-english index file" @@ -209,7 +209,7 @@ This is much faster that using create-umls-db-insert." (format t "File ~A already exists: skipping~%" output-path) (with-open-file (ostream output-path :direction :output) (dolist (input-file input-files) - (with-umls-file (line (umls-file-fil input-file)) + (with-umls-file (line (fil input-file)) (umls-translate input-file line ostream) (princ #\newline ostream))))))) @@ -217,17 +217,17 @@ This is much faster that using create-umls-db-insert." "Return postgresql copy statement for a file" (format nil "COPY ~a FROM '~a' using delimiters '|' with null as ''" - (umls-file-table file) (umls-pathname (umls-file-fil file) extension))) + (table file) (umls-pathname (fil file) extension))) (defun mysql-copy-cmd (file extension) "Return mysql copy statement for a file" (format nil "LOAD DATA LOCAL INFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\"" - (umls-pathname (umls-file-fil file) extension) (umls-file-table file))) + (umls-pathname (fil file) extension) (table file))) (defun col-value (col value) - (if (eq (umls-col-datatype col) 'sql-u) + (if (eq (datatype col) 'sql-u) (let ((ui (parse-ui value ""))) (if (stringp ui) ui @@ -238,8 +238,8 @@ This is much faster that using create-umls-db-insert." "Translate a single line for sql output" (print-separated-strings strm "|" - (mapcar #'col-value (remove-custom-cols (umls-file-colstructs file)) line) - (custom-col-values (custom-colstructs-for-file file) line nil))) + (mapcar #'col-value (remove-custom-cols (ucols file)) line) + (custom-col-values (custom-ucols-for-file file) line nil))) ;;; Routines for analyzing cost of fixed size storage @@ -253,19 +253,19 @@ This is much faster that using create-umls-db-insert." (unavoidable '()) (avoidable '())) (dolist (file *umls-files*) - (dolist (col (umls-file-colstructs file)) - (let* ((avwaste (- (umls-col-max col) (umls-col-av col))) - (cwaste (* avwaste (umls-file-rws file)))) + (dolist (col (ucols file)) + (let* ((avwaste (- (cmax col) (av col))) + (cwaste (* avwaste (rws file)))) (when (plusp cwaste) (if (<= avwaste 6) (progn (incf totalunavoidable cwaste) - (push (list (umls-file-fil file) (umls-col-col col) + (push (list (fil file) (col col) avwaste cwaste) unavoidable)) (progn (incf totalavoidable cwaste) - (push (list (umls-file-fil file) (umls-col-col col) + (push (list (fil file) (col col) avwaste cwaste) avoidable))) (incf totalwaste cwaste))))) @@ -294,8 +294,8 @@ This is much faster that using create-umls-db-insert." (let ((max 0)) (declare (fixnum max)) (dolist (col *umls-cols*) - (when (> (umls-col-max col) max) - (setq max (umls-col-max col)))) + (when (> (cmax col) max) + (setq max (cmax col)))) max)) (defun max-umls-row () @@ -305,8 +305,8 @@ This is much faster that using create-umls-db-insert." (let ((rowsizes '())) (dolist (file *umls-files*) (let ((row 0) - (fields (umls-file-colstructs file))) + (fields (ucols file))) (dolist (field fields) - (incf row (1+ (umls-col-max field)))) + (incf row (1+ (cmax field)))) (push row rowsizes))) (car (sort rowsizes #'>))))