X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=create-sql.lisp;h=04bc2b6bed3e0f676ddeb64761509a7360fd39e3;hp=d145ad9255f8c08c8c60d593800cb6c2ec5cc1ab;hb=e6ef11f4c42de5a6754f73d9a714a0213b3a0133;hpb=cdaa9cb65482eaca5a8eafbbe7b3bec9fb157512 diff --git a/create-sql.lisp b/create-sql.lisp index d145ad9..04bc2b6 100644 --- a/create-sql.lisp +++ b/create-sql.lisp @@ -208,9 +208,11 @@ (format nil "DROP INDEX ~a" (concatenate 'string tablename "_" colname "_X"))))) -(defun sql-create-indexes (conn &optional (indexes +index-cols+)) +(defun sql-create-indexes (conn &key (indexes +index-cols+) verbose) "SQL Databases: create all indexes" (dolist (idx indexes) + (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%" + (first idx) (second idx))) (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) @@ -229,7 +231,7 @@ (make-usrl conn) (make-ustats)) -(defun create-umls-db-by-insert () +(defun create-umls-db-by-insert (&key verbose) "SQL Databases: initializes entire database via SQL insert commands" (ensure-ucols+ufiles) (ensure-preparse) @@ -239,16 +241,18 @@ (sql-insert-all-values conn) (sql-create-indexes conn) (sql-create-custom-tables conn) - (sql-create-indexes conn +custom-index-cols+) + (sql-create-indexes conn :indexes +custom-index-cols+ :verbose verbose) (sql-create-special-tables conn))) -(defun create-umls-db (&key (extension "-trans") (skip-translation nil)) +(defun create-umls-db (&key (extension "-trans") (force-translation nil) (verbose nil)) "SQL Databases: initializes entire database via SQL copy commands. This is much faster that using create-umls-db-insert." + (when verbose (format t "UMLS Import: Starting.~%")) (ensure-ucols+ufiles) + (when verbose (format t "UMLS Import: Preparsing files.~%")) (ensure-preparse) - (unless skip-translation - (translate-all-files extension)) + (when verbose (format t "UMLS Import: Converting text UMLS files to optimized format.~%")) + (translate-all-files :extension extension :verbose verbose :force force-translation) (let ((copy-cmd (ecase (umls-sql-type) (:mysql #'mysql-copy-cmd) @@ -258,27 +262,35 @@ This is much faster that using create-umls-db-insert." (sql-drop-tables conn) (sql-create-tables conn) (dolist (file *umls-files*) + (when verbose (format t "UMLS Import: Importing file ~A to SQL.~%" (fil file))) (sql-execute (funcall copy-cmd file extension) conn)) - (sql-create-indexes conn) + (When verbose (format t "UMLS Import: Creating SQL indices.~%")) + (sql-create-indexes conn :verbose verbose) + (When verbose (format t "UMLS Import: Creating custom tables.~%")) (sql-create-custom-tables conn) - (sql-create-indexes conn +custom-index-cols+) + (When verbose (format t "UMLS Import: Creating custom indices.~%")) + (sql-create-indexes conn :indexes +custom-index-cols+ :verbose verbose) + (When verbose (format t "UMLS Import: Creating special tables.~%")) (sql-create-special-tables conn))) + (When verbose (format t "UMLS Import: Completed.~%")) t) -(defun translate-all-files (&optional (extension "-trans")) - "Copy translated files and return postgresql copy commands to import" - (make-noneng-index-file extension) +(defun translate-all-files (&key (extension "-trans") verbose force) + "Translate all *umls-files* to optimized import format." + (when verbose (format t "UMLS Import: Translating file ~A.~%" (fil (find-ufile "MRXW_NONENG.RRF")))) + (make-noneng-index-file extension :force force) (dolist (f (remove "MRXW_NONENG.RRF" *umls-files* :test #'string= :key #'fil)) - (translate-umls-file f extension))) + (when verbose (format t "UMLS Import: Translating file ~A.~%" (fil f))) + (translate-umls-file f extension :force force))) -(defun translate-umls-file (file extension) +(defun translate-umls-file (file extension &key force) "Translate a umls file into a format suitable for sql copy cmd" - (translate-files file extension (list file))) + (translate-files file extension (list file) :force force)) -(defun make-noneng-index-file (extension) +(defun make-noneng-index-file (extension &key force) "Make non-english index file" (translate-files (find-ufile "MRXW_NONENG.RRF") - extension (noneng-lang-index-files))) + extension (noneng-lang-index-files) :force force)) (defun verify-translation-file (output-path input-ufiles) "Returns t if translation file exists and is correct size. Warns and deletes incomplete translation file." @@ -315,12 +327,14 @@ This is much faster that using create-umls-db-insert." (delete-file output-path) nil))))) -(defun translate-files (out-ufile extension input-ufiles) +(defun translate-files (out-ufile extension input-ufiles &key force) "Translate a umls file into a format suitable for sql copy cmd" (let ((output-path (ufile-pathname out-ufile extension))) - (when (verify-translation-file output-path input-ufiles) + (when (and (not force) (verify-translation-file output-path input-ufiles)) (return-from translate-files output-path)) (with-open-file (ostream output-path :direction :output + :if-exists :overwrite + :if-does-not-exist :create #+(and clisp unicode) :external-format #+(and clisp unicode) charset:utf-8) (dolist (input-ufile input-ufiles) @@ -409,9 +423,9 @@ This is much faster that using create-umls-db-insert." (declare (optimize (speed 3) (space 0))) (ensure-ucols+ufiles) (let ((max 0)) - (declare (fixnum max)) + (declare (type (integer 0 1000000) max)) (dolist (ucol *umls-cols*) - (when (> (cmax ucol) max) + (when (> (the (integer 0 1000000) (cmax ucol)) max) (setq max (cmax ucol)))) max)) @@ -422,7 +436,11 @@ This is much faster that using create-umls-db-insert." (let ((rowsizes '())) (dolist (file *umls-files*) (let ((row 0)) + (declare (type (integer 0 1000000) row)) (dolist (ucol (ucols file)) - (incf row (1+ (cmax ucol)))) + (let* ((col-max (cmax ucol)) + (max-with-delim (1+ col-max))) + (declare (type (integer 0 1000000) col-max max-with-delim)) + (incf row max-with-delim))) (push row rowsizes))) (car (sort rowsizes #'>))))