X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=create-sql.lisp;h=4a4413a1aa2e93e7f1f4431c7fb6ed66fedbfcf2;hp=4978843e5bb280230387844eab008ee9544f5f6d;hb=57c4b059639968903aec88e65103a7263cb97535;hpb=188873f068b0c53febe4ee0ededbc755fce4869d diff --git a/create-sql.lisp b/create-sql.lisp index 4978843..4a4413a 100644 --- a/create-sql.lisp +++ b/create-sql.lisp @@ -10,7 +10,7 @@ ;;;; $Id$ ;;;; ;;;; This file, part of UMLisp, is -;;;; Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D. +;;;; Copyright (c) 2000-2006 by Kevin M. Rosenberg, M.D. ;;;; ;;;; UMLisp users are granted the rights to distribute and use this software ;;;; as governed by the terms of the GNU General Public License. @@ -20,8 +20,8 @@ (defun create-table-cmd (file) "Return sql command to create a table" - (let ((col-func - (lambda (c) + (let ((col-func + (lambda (c) (let ((sqltype (sqltype c))) (case *umls-sql-type* (:oracle @@ -30,7 +30,7 @@ (setq sqltype "VARCHAR2")) ((string-equal sqltype "BIGINT") (setq sqltype "VARCHAR2(20)"))))) - + (concatenate 'string (col c) " " @@ -38,20 +38,28 @@ (string-equal sqltype "CHAR")) (format nil "~a (~a)" sqltype (cmax c)) sqltype)))))) - (format nil "CREATE TABLE ~a (~{~a~^,~})" (table file) - (mapcar col-func (ucols file))))) + (format nil "CREATE TABLE ~a (~{~a~^,~})~A~A" + (table file) + (mapcar col-func (ucols file)) + (if (and (eq *umls-sql-type* :mysql) + (string-equal (table file) "MRCXT")) + " MAX_ROWS=200000000" + "") + (if (eq *umls-sql-type* :mysql) + " TYPE=MYISAM CHARACTER SET utf8" + "")))) (defun create-custom-table-cmd (tablename sql-cmd) "Return SQL command to create a custom table" (format nil "CREATE TABLE ~a AS ~a;" tablename sql-cmd)) (defun insert-col-value (col value) - (if (null (parse-fun col)) + (if (null (parse-fun col)) value (format nil "~A" (funcall (parse-fun col) value)))) (defun insert-values-cmd (file values) - "Return sql insert command for a row of values" + "Return sql insert command for a row of values" (let ((insert-func (lambda (col value) (concatenate 'string (quote-str col) @@ -62,7 +70,7 @@ (table file) (fields file) (concat-separated-strings - "," + "," (mapcar insert-func (remove-custom-cols (ucols file)) values) (custom-col-values (custom-ucols-for-file file) values t))))) @@ -101,7 +109,7 @@ (defun noneng-lang-index-files () (remove-if-not (lambda (f) (and (> (length (fil f)) 4) - (string-equal (fil f) "MRXW_" :end1 5) + (string-equal (fil f) "MRXW_" :end1 5) (not (string-equal (fil f) "MRXW_ENG.RRF")) (not (string-equal (fil f) "MRXW_NONENG.RRF")))) *umls-files*)) @@ -112,7 +120,7 @@ "Return sql create index command" (format nil "CREATE INDEX ~a ON ~a (~a)" (concatenate 'string tablename "_" colname "_X") - tablename + tablename (case *umls-sql-type* (:mysql (concatenate 'string colname @@ -136,26 +144,56 @@ (defun sql-drop-tables (conn) "SQL Databases: drop all tables" (dolist (file *umls-files*) - (ignore-errors + (ignore-errors (sql-execute (format nil "DROP TABLE ~a" (table file)) conn)))) (defun sql-create-tables (conn) - "SQL Databases: create all tables" + "SQL Databases: create all tables" (dolist (file *umls-files*) (sql-execute (create-table-cmd file) conn))) +#+ignore +(defun sql-create-kcon-table (conn) + "Create concept table, one row per concept." + (ignore-errors (execute-command "DROP TABLE KCON" :database conn)) + (execute-command + (format nil "CREATE TABLE KCON (CUI INTEGER, STR ~A, LRL ~A)" + (case *umls-sql-type* + (:oracle + (format nil "VARCHAR2(~D)" + (slot-value (find-ucol "STR" "MRCONSO.RRF") 'max))) + (t "TEXT")) + (case *umls-sql-type* + (:mysql "TINYINT") + ((:postgresql :postgresql-socket) "INT2") + (:oracle "NUMBER(2,0)") + (t "INTEGER"))) + :database conn) + ;; KCON deprecated by KPFENG field in MRCONSO + #+nil + (dolist (tuple (query "select distinct cui from MRCONSO order by cui" + :database conn)) + (let ((cui (car tuple))) + (execute-command + (format nil "INSERT into KCON VALUES (~D,'~A',~D)" + cui + (add-sql-quotes (pfstr-hash cui) ) + (cui-lrl cui)) + :database conn)))) + (defun sql-create-custom-tables (conn) "SQL Databases: create all custom tables" + ;;(sql-create-kcon-table conn) (dolist (ct +custom-tables+) (sql-execute (create-custom-table-cmd (car ct) (cadr ct)) conn))) - + (defun sql-insert-values (conn file) - "SQL Databases: inserts all values for a file" + "SQL Databases: inserts all values for a file" (with-umls-file (line (fil file)) (sql-execute (insert-values-cmd file line) conn))) (defun sql-insert-all-values (conn) - "SQL Databases: inserts all values for all files" + "SQL Databases: inserts all values for all files" (dolist (file *umls-files*) (sql-insert-values conn file))) @@ -174,7 +212,7 @@ "SQL Databases: create all indexes" (dolist (idx indexes) (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) - (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) + (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) (defun make-usrl (conn) (if (eql :mysql *umls-sql-type*) @@ -182,8 +220,8 @@ (ignore-errors (sql-execute "drop table USRL" conn))) (sql-execute "create table USRL (sab varchar(80), srl integer)" conn) (dolist (tuple (mutex-sql-query - "select distinct SAB,SRL from MRSO order by SAB asc")) - (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" + "select distinct SAB,SRL from MRCONSO order by SAB asc")) + (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" (car tuple) (ensure-integer (cadr tuple))) conn))) @@ -204,8 +242,8 @@ (sql-create-indexes conn +custom-index-cols+) (sql-create-special-tables conn))) -(defun create-umls-db (&key (extension ".trans") (skip-translation nil)) - "SQL Databases: initializes entire database via SQL copy commands. +(defun create-umls-db (&key (extension "-trans") (skip-translation nil)) + "SQL Databases: initializes entire database via SQL copy commands. This is much faster that using create-umls-db-insert." (ensure-ucols+ufiles) (ensure-preparse) @@ -226,7 +264,7 @@ This is much faster that using create-umls-db-insert." (sql-create-indexes conn +custom-index-cols+) (sql-create-special-tables conn)))) -(defun translate-all-files (&optional (extension ".trans")) +(defun translate-all-files (&optional (extension "-trans")) "Copy translated files and return postgresql copy commands to import" (make-noneng-index-file extension) (dolist (f (remove "MRXW_NONENG.RRF" *umls-files* :test #'string= :key #'fil)) @@ -241,16 +279,53 @@ This is much faster that using create-umls-db-insert." (translate-files (find-ufile "MRXW_NONENG.RRF") extension (noneng-lang-index-files))) +(defun verify-translation-file (output-path input-ufiles) + "Returns t if translation file exists and is correct size. Warns and deletes incomplete translation file." + (when (probe-file output-path) + (let ((translated-lines 0) + (input-lines 0) + (eof (cons nil nil))) + (catch 'done-counting + (with-open-file (ts output-path :direction :input + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) + (do () + ((eq (read-line ts nil eof) eof)) + (incf translated-lines))) + (dolist (input-ufile input-ufiles) + (with-umls-ufile (line input-ufile) + (incf input-lines) + (when (> input-lines translated-lines) + (throw 'done-counting 'incomplete))))) + (cond + ((eql input-lines 0) + (error "Input lines is 0") + nil) + ((< input-lines translated-lines) + (format t "Translated file ~A incomplete, deleting...~%" output-path) + (delete-file output-path) + nil) + ((eql input-lines translated-lines) + (format t "Translated file ~A already exists: skipping...~%" output-path) + t) + ((> translated-lines input-lines) + (error "Shouldn't happen. Translated lines of ~A is ~D, greater than input lines ~D" + output-path translated-lines input-lines) + (delete-file output-path) + nil))))) + (defun translate-files (out-ufile extension input-ufiles) "Translate a umls file into a format suitable for sql copy cmd" (let ((output-path (ufile-pathname out-ufile extension))) - (if (probe-file output-path) - (format t "File ~A already exists: skipping~%" output-path) - (with-open-file (ostream output-path :direction :output) - (dolist (input-ufile input-ufiles) - (with-umls-ufile (line input-ufile) - (translate-line out-ufile line ostream) - (princ #\newline ostream))))))) + (when (verify-translation-file output-path input-ufiles) + (return-from translate-files output-path)) + (with-open-file (ostream output-path :direction :output + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) + (dolist (input-ufile input-ufiles) + (with-umls-ufile (line input-ufile) + (translate-line out-ufile line ostream) + (princ #\newline ostream)))))) (defun translate-line (file line strm) "Translate a single line for sql output" @@ -262,25 +337,25 @@ This is much faster that using create-umls-db-insert." (write-to-string ui))) (escape-backslashes value)))) (print-separated-strings - strm "|" + strm "|" (mapcar #'col-value (remove-custom-cols (ucols file)) line) (custom-col-values (custom-ucols-for-file file) line nil)))) (defun pg-copy-cmd (file extension) - "Return postgresql copy statement for a file" + "Return postgresql copy statement for a file" (format nil "COPY ~a FROM '~a' using delimiters '|' with null as ''" - (table file) (umls-pathname (fil file) extension))) + (table file) (ufile-pathname file extension))) -(defun mysql-copy-cmd (file extension &key local-file) - "Return mysql copy statement for a file" +(defun mysql-copy-cmd (file extension &key (local-file t)) + "Return mysql copy statement for a file" (format nil "LOAD DATA ~AINFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\"" (if local-file "LOCAL " "") - (umls-pathname (fil file) extension) (table file))) + (namestring (ufile-pathname file extension)) (table file))) + - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Routines for analyzing cost of fixed size storage