X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=create-sql.lisp;h=3a1902019df6632feb72dcfecb2d3595913912c7;hp=93c90d541af858522d8ec657aca23e6e15aa7b13;hb=HEAD;hpb=01888d11a058ecc8c1dcde23291f9cfbb3a307c0 diff --git a/create-sql.lisp b/create-sql.lisp index 93c90d5..3a19020 100644 --- a/create-sql.lisp +++ b/create-sql.lisp @@ -7,10 +7,8 @@ ;;;; Author: Kevin M. Rosenberg ;;;; Created: Apr 2000 ;;;; -;;;; $Id$ -;;;; ;;;; This file, part of UMLisp, is -;;;; Copyright (c) 2000-2006 by Kevin M. Rosenberg, M.D. +;;;; Copyright (c) 2000-2010 by Kevin M. Rosenberg, M.D. ;;;; ;;;; UMLisp users are granted the rights to distribute and use this software ;;;; as governed by the terms of the GNU General Public License. @@ -32,7 +30,7 @@ (setq sqltype "VARCHAR2(20)"))))) (concatenate 'string - (col c) + (sqlname c) " " (if (or (string-equal sqltype "VARCHAR") (string-equal sqltype "CHAR")) @@ -46,7 +44,7 @@ " MAX_ROWS=200000000" "") (if (eq *umls-sql-type* :mysql) - " TYPE=MYISAM CHARACTER SET utf8" + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" "")))) (defun create-custom-table-cmd (tablename sql-cmd) @@ -208,28 +206,111 @@ (format nil "DROP INDEX ~a" (concatenate 'string tablename "_" colname "_X"))))) +(defun sql-create-indexes-mysql (conn indexes verbose) + (let ((tables nil) + (table-cols nil)) + (dolist (idx indexes) + (pushnew (second idx) tables :test 'string-equal) + (let ((table-col (find (second idx) table-cols :key 'car :test 'string-equal))) + (if table-col + (vector-push-extend (cons (first idx) (third idx)) (second table-col)) + (push (list (second idx) (make-array (list 1) :initial-contents (list (cons (first idx) (third idx))) + :adjustable t :fill-pointer 1)) + table-cols)))) + (dolist (table tables) + (let ((table-col (find table table-cols :key 'car :test 'string-equal)) + (first t) + (str (format nil "ALTER TABLE ~A" table))) + (loop for col across (second table-col) + do + (let ((colname (car col)) + (length (cdr col))) + (ignore-errors (sql-execute (drop-index-cmd colname table) conn)) + (setq str (concatenate 'string + str + (if first + (progn + (setq first nil) + " ") + ", ") + (format nil "ADD INDEX ~A (~A)" + (concatenate 'string table "_" colname "_X") + (concatenate 'string + colname + (if (integerp length) + (format nil " (~d)" length) + ""))))))) + (when verbose + (format t "UMLS Import: Creating indexes for columns ~A on table ~A.~%" + (mapcar 'car (coerce (second table-col) 'list)) table)) + (when conn + (sql-execute str conn)) + )))) + (defun sql-create-indexes (conn &key (indexes +index-cols+) verbose) "SQL Databases: create all indexes" - (dolist (idx indexes) - (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%" - (first idx) (second idx))) - (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) - (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) + (if (eql :mysql *umls-sql-type*) + (sql-create-indexes-mysql conn indexes verbose) + (dolist (idx indexes) + (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%" + (first idx) (second idx))) + (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) + (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn)))) (defun make-usrl (conn) (if (eql :mysql *umls-sql-type*) (sql-execute "drop table if exists USRL" conn) (ignore-errors (sql-execute "drop table USRL" conn))) - (sql-execute "create table USRL (sab varchar(80), srl integer)" conn) + (sql-execute + (concatenate 'string + "create table USRL (sab varchar(80), srl integer)" + (if (eq *umls-sql-type* :mysql) + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" + "")) + conn) (dolist (tuple (mutex-sql-query "select distinct SAB,SRL from MRCONSO order by SAB asc")) (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" (car tuple) (ensure-integer (cadr tuple))) conn))) +(defun make-ustats (conn) + (ignore-errors (sql-execute "drop table USTATS" conn)) + (sql-execute + (concatenate 'string"create table USTATS (NAME varchar(160), COUNT bigint, SRL integer)" + (if (eq *umls-sql-type* :mysql) + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" + "")) + conn) + + (dolist (srl '(0 1 2 3 4 9)) + (insert-ustats-count conn "Concept Count" "MRCONSO" "distinct CUI" "KCUILRL" srl) + (insert-ustats-count conn "Term Count" "MRCONSO" "distinct KCUILUI" "KCUILRL" srl) + (insert-ustats-count conn "Distinct Term Count" "MRCONSO" "distinct LUI" "KLUILRL" srl) + (insert-ustats-count conn "String Count" "MRCONSO" "*" "KSUILRL" srl) + (insert-ustats-count conn "Distinct String Count" "MRCONSO" "distinct SUI" "KSUILRL" srl) + (insert-ustats-count conn "Hierarchcy" "MRHIER" "*" "KSRL" srl) + (insert-ustats-count conn "Mappings" "MRMAP" "*" "KSRL" srl) + (insert-ustats-count conn "Simple Mappings" "MRSMAP" "*" "KSRL" srl) +#+mrcoc (insert-ustats-count conn "Co-occuring Concept Count" "MRCOC" "*" "KLRL" srl) + (insert-ustats-count conn "Definition Count" "MRDEF" "*" "KSRL" srl) + (insert-ustats-count conn "Rank Count" "MRRANK" "*" "KSRL" srl) + (insert-ustats-count conn "Relationship Count" "MRREL" "*" "KSRL" srl) + (insert-ustats-count conn "Semantic Type Count" "MRSTY" "*" "KLRL" srl) + (insert-ustats-count conn "Simple Attribute Count" "MRSAT" "*" "KSRL" srl) + (insert-ustats-count conn "Source Abbreviation Count" "MRSAB" "*" "SRL" srl) + (insert-ustats-count conn "Word Index Count" "MRXW_ENG" "*" "KLRL" srl) + (insert-ustats-count conn "Normalized Word Index Count" "MRXNW_ENG" "*" "KLRL" srl) + (insert-ustats-count conn "Normalized String Index Count" "MRXNS_ENG" "*" "KLRL" srl)) + (sql-execute "create index USTATS_SRL on USTATS (SRL)" conn) + (find-ustats-all)) + +(defun insert-ustats-count (conn name table count-variable srl-control srl) + (insert-ustats conn name (find-count-table conn table srl count-variable srl-control) srl)) + (defun sql-create-special-tables (conn) (make-usrl conn) - (make-ustats)) + (make-ustats conn)) (defun create-umls-db-by-insert (&key verbose) "SQL Databases: initializes entire database via SQL insert commands" @@ -264,15 +345,15 @@ This is much faster that using create-umls-db-insert." (dolist (file *umls-files*) (when verbose (format t "UMLS Import: Importing file ~A to SQL.~%" (fil file))) (sql-execute (funcall copy-cmd file extension) conn)) - (When verbose (format t "UMLS Import: Creating SQL indices.~%")) + (when verbose (format t "UMLS Import: Creating SQL indices.~%")) (sql-create-indexes conn :verbose verbose) - (When verbose (format t "UMLS Import: Creating custom tables.~%")) + (when verbose (format t "UMLS Import: Creating custom tables.~%")) (sql-create-custom-tables conn) - (When verbose (format t "UMLS Import: Creating custom indices.~%")) + (when verbose (format t "UMLS Import: Creating custom indices.~%")) (sql-create-indexes conn :indexes +custom-index-cols+ :verbose verbose) - (When verbose (format t "UMLS Import: Creating special tables.~%")) + (when verbose (format t "UMLS Import: Creating special tables.~%")) (sql-create-special-tables conn))) - (When verbose (format t "UMLS Import: Completed.~%")) + (when verbose (format t "UMLS Import: Completed.~%")) t) (defun translate-all-files (&key (extension "-trans") verbose force) @@ -298,25 +379,31 @@ This is much faster that using create-umls-db-insert." (let ((translated-lines 0) (input-lines 0) (eof (cons nil nil))) - (catch 'done-counting - (with-open-file (ts output-path :direction :input - #+(and clisp unicode) :external-format - #+(and clisp unicode) charset:utf-8) - (do () - ((eq (read-line ts nil eof) eof)) - (incf translated-lines))) - (dolist (input-ufile input-ufiles) - (with-umls-ufile (line input-ufile) - (incf input-lines) - (when (> input-lines translated-lines) - (throw 'done-counting 'incomplete))))) + (with-open-file (ts output-path :direction :input + #+(and sbcl sb-unicode) :external-format + #+(and sbcl sb-unicode) :UTF-8 + #+(and allegro ics) :external-format + #+(and allegro ics) :UTF-8 + #+lispworks :external-format + #+lispworks :UTF-8 + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) + (do ((c (read-char ts nil eof) (read-char ts nil eof))) + ((eq c eof)) + (when (eql c #\newline) + (incf translated-lines)))) + (dolist (input-ufile input-ufiles) + (with-umls-ufile (line input-ufile) + (incf input-lines) + (when (> input-lines translated-lines) + (return)))) (cond ((< input-lines translated-lines) (format t "Translated file ~A incomplete, deleting...~%" output-path) (delete-file output-path) nil) ((eql input-lines translated-lines) - (format t "Translated file ~A already exists: skipping...~%" output-path) + (format t "Translated file ~A exists and is proper number of lines: skipping...~%" output-path) t) ((eql input-lines 0) (warn "The number of input lines is 0 for output file ~A." output-path) @@ -335,6 +422,12 @@ This is much faster that using create-umls-db-insert." (with-open-file (ostream output-path :direction :output :if-exists :overwrite :if-does-not-exist :create + #+(and sbcl sb-unicode) :external-format + #+(and sbcl sb-unicode) :UTF-8 + #+(and allegro ics) :external-format + #+(and allegro ics) :UTF-8 + #+lispworks :external-format + #+lispworks :UTF-8 #+(and clisp unicode) :external-format #+(and clisp unicode) charset:utf-8) (dolist (input-ufile input-ufiles) @@ -366,7 +459,7 @@ This is much faster that using create-umls-db-insert." "Return mysql copy statement for a file" (format nil - "LOAD DATA ~AINFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\"" + "LOAD DATA ~AINFILE '~a' INTO TABLE ~a FIELDS TERMINATED BY '|'" (if local-file "LOCAL " "") (namestring (ufile-pathname file extension)) (table file)))