;;;; Author: Kevin M. Rosenberg
;;;; Created: Apr 2000
;;;;
-;;;; $Id$
-;;;;
;;;; This file, part of UMLisp, is
-;;;; Copyright (c) 2000-2006 by Kevin M. Rosenberg, M.D.
+;;;; Copyright (c) 2000-2010 by Kevin M. Rosenberg, M.D.
;;;;
;;;; UMLisp users are granted the rights to distribute and use this software
;;;; as governed by the terms of the GNU General Public License.
(setq sqltype "VARCHAR2(20)")))))
(concatenate 'string
- (col c)
+ (sqlname c)
" "
(if (or (string-equal sqltype "VARCHAR")
(string-equal sqltype "CHAR"))
" MAX_ROWS=200000000"
"")
(if (eq *umls-sql-type* :mysql)
- " TYPE=MYISAM CHARACTER SET utf8"
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
""))))
(defun create-custom-table-cmd (tablename sql-cmd)
(format nil "DROP INDEX ~a"
(concatenate 'string tablename "_" colname "_X")))))
+(defun sql-create-indexes-mysql (conn indexes verbose)
+ (let ((tables nil)
+ (table-cols nil))
+ (dolist (idx indexes)
+ (pushnew (second idx) tables :test 'string-equal)
+ (let ((table-col (find (second idx) table-cols :key 'car :test 'string-equal)))
+ (if table-col
+ (vector-push-extend (cons (first idx) (third idx)) (second table-col))
+ (push (list (second idx) (make-array (list 1) :initial-contents (list (cons (first idx) (third idx)))
+ :adjustable t :fill-pointer 1))
+ table-cols))))
+ (dolist (table tables)
+ (let ((table-col (find table table-cols :key 'car :test 'string-equal))
+ (first t)
+ (str (format nil "ALTER TABLE ~A" table)))
+ (loop for col across (second table-col)
+ do
+ (let ((colname (car col))
+ (length (cdr col)))
+ (ignore-errors (sql-execute (drop-index-cmd colname table) conn))
+ (setq str (concatenate 'string
+ str
+ (if first
+ (progn
+ (setq first nil)
+ " ")
+ ", ")
+ (format nil "ADD INDEX ~A (~A)"
+ (concatenate 'string table "_" colname "_X")
+ (concatenate 'string
+ colname
+ (if (integerp length)
+ (format nil " (~d)" length)
+ "")))))))
+ (when verbose
+ (format t "UMLS Import: Creating indexes for columns ~A on table ~A.~%"
+ (mapcar 'car (coerce (second table-col) 'list)) table))
+ (when conn
+ (sql-execute str conn))
+ ))))
+
(defun sql-create-indexes (conn &key (indexes +index-cols+) verbose)
"SQL Databases: create all indexes"
- (dolist (idx indexes)
- (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%"
- (first idx) (second idx)))
- (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn))
- (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn)))
+ (if (eql :mysql *umls-sql-type*)
+ (sql-create-indexes-mysql conn indexes verbose)
+ (dolist (idx indexes)
+ (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%"
+ (first idx) (second idx)))
+ (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn))
+ (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))))
(defun make-usrl (conn)
(if (eql :mysql *umls-sql-type*)
(sql-execute "drop table if exists USRL" conn)
(ignore-errors (sql-execute "drop table USRL" conn)))
- (sql-execute "create table USRL (sab varchar(80), srl integer)" conn)
+ (sql-execute
+ (concatenate 'string
+ "create table USRL (sab varchar(80), srl integer)"
+ (if (eq *umls-sql-type* :mysql)
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
+ ""))
+ conn)
(dolist (tuple (mutex-sql-query
"select distinct SAB,SRL from MRCONSO order by SAB asc"))
(sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)"
(car tuple) (ensure-integer (cadr tuple)))
conn)))
+(defun make-ustats (conn)
+ (ignore-errors (sql-execute "drop table USTATS" conn))
+ (sql-execute
+ (concatenate 'string"create table USTATS (NAME varchar(160), COUNT bigint, SRL integer)"
+ (if (eq *umls-sql-type* :mysql)
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
+ ""))
+ conn)
+
+ (dolist (srl '(0 1 2 3 4 9))
+ (insert-ustats-count conn "Concept Count" "MRCONSO" "distinct CUI" "KCUILRL" srl)
+ (insert-ustats-count conn "Term Count" "MRCONSO" "distinct KCUILUI" "KCUILRL" srl)
+ (insert-ustats-count conn "Distinct Term Count" "MRCONSO" "distinct LUI" "KLUILRL" srl)
+ (insert-ustats-count conn "String Count" "MRCONSO" "*" "KSUILRL" srl)
+ (insert-ustats-count conn "Distinct String Count" "MRCONSO" "distinct SUI" "KSUILRL" srl)
+ (insert-ustats-count conn "Hierarchcy" "MRHIER" "*" "KSRL" srl)
+ (insert-ustats-count conn "Mappings" "MRMAP" "*" "KSRL" srl)
+ (insert-ustats-count conn "Simple Mappings" "MRSMAP" "*" "KSRL" srl)
+#+mrcoc (insert-ustats-count conn "Co-occuring Concept Count" "MRCOC" "*" "KLRL" srl)
+ (insert-ustats-count conn "Definition Count" "MRDEF" "*" "KSRL" srl)
+ (insert-ustats-count conn "Rank Count" "MRRANK" "*" "KSRL" srl)
+ (insert-ustats-count conn "Relationship Count" "MRREL" "*" "KSRL" srl)
+ (insert-ustats-count conn "Semantic Type Count" "MRSTY" "*" "KLRL" srl)
+ (insert-ustats-count conn "Simple Attribute Count" "MRSAT" "*" "KSRL" srl)
+ (insert-ustats-count conn "Source Abbreviation Count" "MRSAB" "*" "SRL" srl)
+ (insert-ustats-count conn "Word Index Count" "MRXW_ENG" "*" "KLRL" srl)
+ (insert-ustats-count conn "Normalized Word Index Count" "MRXNW_ENG" "*" "KLRL" srl)
+ (insert-ustats-count conn "Normalized String Index Count" "MRXNS_ENG" "*" "KLRL" srl))
+ (sql-execute "create index USTATS_SRL on USTATS (SRL)" conn)
+ (find-ustats-all))
+
+(defun insert-ustats-count (conn name table count-variable srl-control srl)
+ (insert-ustats conn name (find-count-table conn table srl count-variable srl-control) srl))
+
(defun sql-create-special-tables (conn)
(make-usrl conn)
- (make-ustats))
+ (make-ustats conn))
(defun create-umls-db-by-insert (&key verbose)
"SQL Databases: initializes entire database via SQL insert commands"
(dolist (file *umls-files*)
(when verbose (format t "UMLS Import: Importing file ~A to SQL.~%" (fil file)))
(sql-execute (funcall copy-cmd file extension) conn))
- (When verbose (format t "UMLS Import: Creating SQL indices.~%"))
+ (when verbose (format t "UMLS Import: Creating SQL indices.~%"))
(sql-create-indexes conn :verbose verbose)
- (When verbose (format t "UMLS Import: Creating custom tables.~%"))
+ (when verbose (format t "UMLS Import: Creating custom tables.~%"))
(sql-create-custom-tables conn)
- (When verbose (format t "UMLS Import: Creating custom indices.~%"))
+ (when verbose (format t "UMLS Import: Creating custom indices.~%"))
(sql-create-indexes conn :indexes +custom-index-cols+ :verbose verbose)
- (When verbose (format t "UMLS Import: Creating special tables.~%"))
+ (when verbose (format t "UMLS Import: Creating special tables.~%"))
(sql-create-special-tables conn)))
- (When verbose (format t "UMLS Import: Completed.~%"))
+ (when verbose (format t "UMLS Import: Completed.~%"))
t)
(defun translate-all-files (&key (extension "-trans") verbose force)
(let ((translated-lines 0)
(input-lines 0)
(eof (cons nil nil)))
- (catch 'done-counting
- (with-open-file (ts output-path :direction :input
- #+(and clisp unicode) :external-format
- #+(and clisp unicode) charset:utf-8)
- (do ()
- ((eq (read-line ts nil eof) eof))
- (incf translated-lines)))
- (dolist (input-ufile input-ufiles)
- (with-umls-ufile (line input-ufile)
- (incf input-lines)
- (when (> input-lines translated-lines)
- (throw 'done-counting 'incomplete)))))
+ (with-open-file (ts output-path :direction :input
+ #+(and sbcl sb-unicode) :external-format
+ #+(and sbcl sb-unicode) :UTF-8
+ #+(and allegro ics) :external-format
+ #+(and allegro ics) :UTF-8
+ #+lispworks :external-format
+ #+lispworks :UTF-8
+ #+(and clisp unicode) :external-format
+ #+(and clisp unicode) charset:utf-8)
+ (do ((c (read-char ts nil eof) (read-char ts nil eof)))
+ ((eq c eof))
+ (when (eql c #\newline)
+ (incf translated-lines))))
+ (dolist (input-ufile input-ufiles)
+ (with-umls-ufile (line input-ufile)
+ (incf input-lines)
+ (when (> input-lines translated-lines)
+ (return))))
(cond
((< input-lines translated-lines)
(format t "Translated file ~A incomplete, deleting...~%" output-path)
(delete-file output-path)
nil)
((eql input-lines translated-lines)
- (format t "Translated file ~A already exists: skipping...~%" output-path)
+ (format t "Translated file ~A exists and is proper number of lines: skipping...~%" output-path)
t)
((eql input-lines 0)
(warn "The number of input lines is 0 for output file ~A." output-path)
(with-open-file (ostream output-path :direction :output
:if-exists :overwrite
:if-does-not-exist :create
+ #+(and sbcl sb-unicode) :external-format
+ #+(and sbcl sb-unicode) :UTF-8
+ #+(and allegro ics) :external-format
+ #+(and allegro ics) :UTF-8
+ #+lispworks :external-format
+ #+lispworks :UTF-8
#+(and clisp unicode) :external-format
#+(and clisp unicode) charset:utf-8)
(dolist (input-ufile input-ufiles)