X-Git-Url: http://git.kpe.io/?p=umlisp-orf.git;a=blobdiff_plain;f=create-sql.lisp;h=a20adce6c63c17a6d80624b5fec88f432ea5a506;hp=e417b1a0498f9f1ef68fc4924012f9e9cda14e16;hb=3c963bdf7389ec0d00b893fb7b7757ab884f3222;hpb=d8fe27c58aa49f4a19f8b0dc11f97e0db7662e9e diff --git a/create-sql.lisp b/create-sql.lisp index e417b1a..a20adce 100644 --- a/create-sql.lisp +++ b/create-sql.lisp @@ -20,41 +20,41 @@ (defun create-table-cmd (file) "Return sql command to create a table" - (let ((col-func - (lambda (c) - (let ((sqltype (sqltype c))) - (concatenate 'string - (col c) - " " - (if (or (string-equal sqltype "VARCHAR") - (string-equal sqltype "CHAR")) - (format nil "~a (~a)" sqltype (cmax c)) - sqltype)))))) + (let ((col-func + (lambda (c) + (let ((sqltype (sqltype c))) + (concatenate 'string + (col c) + " " + (if (or (string-equal sqltype "VARCHAR") + (string-equal sqltype "CHAR")) + (format nil "~a (~a)" sqltype (cmax c)) + sqltype)))))) (format nil "CREATE TABLE ~a (~{~a~^,~})" (table file) - (mapcar col-func (ucols file))))) + (mapcar col-func (ucols file))))) (defun create-custom-table-cmd (tablename sql-cmd) "Return SQL command to create a custom table" (format nil "CREATE TABLE ~a AS ~a;" tablename sql-cmd)) (defun insert-col-value (col value) - (if (null (parse-fun col)) + (if (null (parse-fun col)) value (format nil "~A" (funcall (parse-fun col) value)))) (defun insert-values-cmd (file values) - "Return sql insert command for a row of values" + "Return sql insert command for a row of values" (let ((insert-func - (lambda (col value) - (concatenate 'string (quote-str col) - (insert-col-value col value) - (quote-str col))))) + (lambda (col value) + (concatenate 'string (quote-str col) + (insert-col-value col value) + (quote-str col))))) (format nil "INSERT INTO ~a (~{~a~^,~}) VALUES (~A)" (table file) (fields file) (concat-separated-strings - "," + "," (mapcar insert-func (remove-custom-cols (ucols file)) values) (custom-col-values (custom-ucols-for-file file) values t))))) @@ -62,12 +62,12 @@ (defun custom-col-value (col values doquote) (let ((custom-value (funcall (custom-value-fun col) values))) (if custom-value - (if doquote - (concatenate 'string (quote-str col) - (escape-backslashes custom-value) - (quote-str col)) - (escape-backslashes custom-value)) - ""))) + (if doquote + (concatenate 'string (quote-str col) + (escape-backslashes custom-value) + (quote-str col)) + (escape-backslashes custom-value)) + ""))) (defun custom-col-values (ucols values doquote) "Returns a list of string column values for SQL inserts for custom columns" @@ -82,7 +82,7 @@ (defun find-custom-col (filename col) (find-if (lambda (x) (and (string-equal filename (car x)) - (string-equal col (cadr x)))) +custom-cols+)) + (string-equal col (cadr x)))) +custom-cols+)) (defun custom-colnames-for-filename (filename) (mapcar #'cadr (find-custom-cols-for-filename filename))) @@ -93,9 +93,9 @@ (defun noneng-lang-index-files () (remove-if-not (lambda (f) (and (> (length (fil f)) 4) - (string-equal (fil f) "MRXW." :end1 5) - (not (string-equal (fil f) "MRXW.ENG")) - (not (string-equal (fil f) "MRXW.NONENG")))) + (string-equal (fil f) "MRXW." :end1 5) + (not (string-equal (fil f) "MRXW.ENG")) + (not (string-equal (fil f) "MRXW.NONENG")))) *umls-files*)) ;;; SQL Command Functions @@ -103,21 +103,21 @@ (defun create-index-cmd (colname tablename length) "Return sql create index command" (format nil "CREATE INDEX ~a ON ~a (~a)" - (concatenate 'string tablename "_" colname "_X") - tablename - (case *umls-sql-type* - (:mysql - (concatenate 'string colname - (if (integerp length) - (format nil " (~d)" length) - ""))) - ((:postgresql :postgresql-socket) - ;; FIXME: incorrect syntax - (if (integerp length) - (format nil "substr((~A)::text,1,~D)" colname length) - colname)) - (t - colname)))) + (concatenate 'string tablename "_" colname "_X") + tablename + (case *umls-sql-type* + (:mysql + (concatenate 'string colname + (if (integerp length) + (format nil " (~d)" length) + ""))) + ((:postgresql :postgresql-socket) + ;; FIXME: incorrect syntax + (if (integerp length) + (format nil "substr((~A)::text,1,~D)" colname length) + colname)) + (t + colname)))) (defun create-all-tables-cmdfile () "Return sql commands to create all tables. Not need for automated SQL import" @@ -128,11 +128,11 @@ (defun sql-drop-tables (conn) "SQL Databases: drop all tables" (dolist (file *umls-files*) - (ignore-errors + (ignore-errors (sql-execute (format nil "DROP TABLE ~a" (table file)) conn)))) (defun sql-create-tables (conn) - "SQL Databases: create all tables" + "SQL Databases: create all tables" (dolist (file *umls-files*) (sql-execute (create-table-cmd file) conn))) @@ -140,14 +140,14 @@ "SQL Databases: create all custom tables" (dolist (ct +custom-tables+) (sql-execute (create-custom-table-cmd (car ct) (cadr ct)) conn))) - + (defun sql-insert-values (conn file) - "SQL Databases: inserts all values for a file" + "SQL Databases: inserts all values for a file" (with-umls-file (line (fil file)) (sql-execute (insert-values-cmd file line) conn))) (defun sql-insert-all-values (conn) - "SQL Databases: inserts all values for all files" + "SQL Databases: inserts all values for all files" (dolist (file *umls-files*) (sql-insert-values conn file))) @@ -156,17 +156,17 @@ (case *umls-sql-type* (:mysql (format nil "DROP INDEX ~a ON ~a" - (concatenate 'string tablename "_" colname "_X") - tablename)) + (concatenate 'string tablename "_" colname "_X") + tablename)) (t (format nil "DROP INDEX ~a" - (concatenate 'string tablename "_" colname "_X"))))) + (concatenate 'string tablename "_" colname "_X"))))) (defun sql-create-indexes (conn &optional (indexes +index-cols+)) "SQL Databases: create all indexes" (dolist (idx indexes) (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) - (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) + (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) (defun make-usrl (conn) (if (eql :mysql *umls-sql-type*) @@ -174,10 +174,10 @@ (ignore-errors (sql-execute "drop table USRL" conn))) (sql-execute "create table USRL (sab varchar(80), srl integer)" conn) (dolist (tuple (mutex-sql-query - "select distinct SAB,SRL from MRSO order by SAB asc")) - (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" - (car tuple) (ensure-integer (cadr tuple))) - conn))) + "select distinct SAB,SRL from MRSO order by SAB asc")) + (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" + (car tuple) (ensure-integer (cadr tuple))) + conn))) (defun sql-create-special-tables (conn) (make-usrl conn) @@ -197,22 +197,22 @@ (sql-create-special-tables conn))) (defun create-umls-db (&key (extension ".trans") (skip-translation nil)) - "SQL Databases: initializes entire database via SQL copy commands. + "SQL Databases: initializes entire database via SQL copy commands. This is much faster that using create-umls-db-insert." (ensure-ucols+ufiles) (ensure-preparse) (unless skip-translation (translate-all-files extension)) (let ((copy-cmd - (ecase (umls-sql-type) - (:mysql #'mysql-copy-cmd) - (:postgresql #'pg-copy-cmd)))) + (ecase (umls-sql-type) + (:mysql #'mysql-copy-cmd) + (:postgresql #'pg-copy-cmd)))) (with-sql-connection (conn) (clsql:truncate-database :database conn) (sql-drop-tables conn) (sql-create-tables conn) (dolist (file *umls-files*) - (sql-execute (funcall copy-cmd file extension) conn)) + (sql-execute (funcall copy-cmd file extension) conn)) (sql-create-indexes conn) (sql-create-custom-tables conn) (sql-create-indexes conn +custom-index-cols+) @@ -231,48 +231,48 @@ This is much faster that using create-umls-db-insert." (defun make-noneng-index-file (extension) "Make non-english index file" (translate-files (find-ufile "MRXW.NONENG") - extension (noneng-lang-index-files))) + extension (noneng-lang-index-files))) (defun translate-files (out-ufile extension input-ufiles) "Translate a umls file into a format suitable for sql copy cmd" (let ((output-path (umls-pathname (fil out-ufile) extension))) (if (probe-file output-path) - (format t "File ~A already exists: skipping~%" output-path) + (format t "File ~A already exists: skipping~%" output-path) (with-open-file (ostream output-path :direction :output) - (dolist (input-ufile input-ufiles) - (with-umls-file (line (fil input-ufile)) - (translate-line out-ufile line ostream) - (princ #\newline ostream))))))) + (dolist (input-ufile input-ufiles) + (with-umls-file (line (fil input-ufile)) + (translate-line out-ufile line ostream) + (princ #\newline ostream))))))) (defun translate-line (file line strm) "Translate a single line for sql output" (flet ((col-value (col value) - (if (eq (datatype col) 'sql-u) - (let ((ui (parse-ui value ""))) - (if (stringp ui) - ui - (write-to-string ui))) - (escape-backslashes value)))) + (if (eq (datatype col) 'sql-u) + (let ((ui (parse-ui value ""))) + (if (stringp ui) + ui + (write-to-string ui))) + (escape-backslashes value)))) (print-separated-strings - strm "|" + strm "|" (mapcar #'col-value (remove-custom-cols (ucols file)) line) (custom-col-values (custom-ucols-for-file file) line nil)))) (defun pg-copy-cmd (file extension) - "Return postgresql copy statement for a file" + "Return postgresql copy statement for a file" (format nil "COPY ~a FROM '~a' using delimiters '|' with null as ''" (table file) (umls-pathname (fil file) extension))) (defun mysql-copy-cmd (file extension &key local-file) - "Return mysql copy statement for a file" + "Return mysql copy statement for a file" (format nil - "LOAD DATA ~AINFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\"" + "LOAD DATA ~AINFILE '~a' INTO TABLE ~a FIELDS TERMINATED BY '|'" (if local-file "LOCAL " "") (umls-pathname (fil file) extension) (table file))) - + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; Routines for analyzing cost of fixed size storage @@ -282,29 +282,29 @@ This is much faster that using create-umls-db-insert." (defun umls-fixed-size-waste () "Display storage waste if using all fixed size storage" (let ((totalwaste 0) - (totalunavoidable 0) - (totalavoidable 0) - (unavoidable '()) - (avoidable '())) + (totalunavoidable 0) + (totalavoidable 0) + (unavoidable '()) + (avoidable '())) (dolist (file *umls-files*) (dolist (col (ucols file)) - (let* ((avwaste (- (cmax col) (av col))) - (cwaste (* avwaste (rws file)))) - (when (plusp cwaste) - (if (<= avwaste 6) - (progn - (incf totalunavoidable cwaste) - (push (list (fil file) (col col) - avwaste cwaste) - unavoidable)) - (progn - (incf totalavoidable cwaste) - (push (list (fil file) (col col) - avwaste cwaste) - avoidable))) - (incf totalwaste cwaste))))) + (let* ((avwaste (- (cmax col) (av col))) + (cwaste (* avwaste (rws file)))) + (when (plusp cwaste) + (if (<= avwaste 6) + (progn + (incf totalunavoidable cwaste) + (push (list (fil file) (col col) + avwaste cwaste) + unavoidable)) + (progn + (incf totalavoidable cwaste) + (push (list (fil file) (col col) + avwaste cwaste) + avoidable))) + (incf totalwaste cwaste))))) (values totalwaste totalavoidable totalunavoidable - (nreverse avoidable) (nreverse unavoidable)))) + (nreverse avoidable) (nreverse unavoidable)))) (defun display-waste () (ensure-ucols+ufiles) @@ -328,7 +328,7 @@ This is much faster that using create-umls-db-insert." (declare (fixnum max)) (dolist (ucol *umls-cols*) (when (> (cmax ucol) max) - (setq max (cmax ucol)))) + (setq max (cmax ucol)))) max)) (defun max-umls-row () @@ -338,7 +338,7 @@ This is much faster that using create-umls-db-insert." (let ((rowsizes '())) (dolist (file *umls-files*) (let ((row 0)) - (dolist (ucol (ucols file)) - (incf row (1+ (cmax ucol)))) - (push row rowsizes))) + (dolist (ucol (ucols file)) + (incf row (1+ (cmax ucol)))) + (push row rowsizes))) (car (sort rowsizes #'>))))