X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;ds=sidebyside;f=create-sql.lisp;fp=create-sql.lisp;h=e417b1a0498f9f1ef68fc4924012f9e9cda14e16;hb=d8fe27c58aa49f4a19f8b0dc11f97e0db7662e9e;hp=0000000000000000000000000000000000000000;hpb=020186ffddfabdeb617ec0fca3dec958dddce961;p=umlisp-orf.git diff --git a/create-sql.lisp b/create-sql.lisp new file mode 100644 index 0000000..e417b1a --- /dev/null +++ b/create-sql.lisp @@ -0,0 +1,344 @@ +;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*- +;;;; ************************************************************************* +;;;; FILE IDENTIFICATION +;;;; +;;;; Name: sql-create +;;;; Purpose: Create SQL database for UMLisp +;;;; Author: Kevin M. Rosenberg +;;;; Created: Apr 2000 +;;;; +;;;; $Id$ +;;;; +;;;; This file, part of UMLisp, is +;;;; Copyright (c) 2000-2004 by Kevin M. Rosenberg, M.D. +;;;; +;;;; UMLisp users are granted the rights to distribute and use this software +;;;; as governed by the terms of the GNU General Public License. +;;;; ************************************************************************* + +(in-package #:umlisp-orf) + +(defun create-table-cmd (file) + "Return sql command to create a table" + (let ((col-func + (lambda (c) + (let ((sqltype (sqltype c))) + (concatenate 'string + (col c) + " " + (if (or (string-equal sqltype "VARCHAR") + (string-equal sqltype "CHAR")) + (format nil "~a (~a)" sqltype (cmax c)) + sqltype)))))) + (format nil "CREATE TABLE ~a (~{~a~^,~})" (table file) + (mapcar col-func (ucols file))))) + +(defun create-custom-table-cmd (tablename sql-cmd) + "Return SQL command to create a custom table" + (format nil "CREATE TABLE ~a AS ~a;" tablename sql-cmd)) + +(defun insert-col-value (col value) + (if (null (parse-fun col)) + value + (format nil "~A" (funcall (parse-fun col) value)))) + +(defun insert-values-cmd (file values) + "Return sql insert command for a row of values" + (let ((insert-func + (lambda (col value) + (concatenate 'string (quote-str col) + (insert-col-value col value) + (quote-str col))))) + (format + nil "INSERT INTO ~a (~{~a~^,~}) VALUES (~A)" + (table file) + (fields file) + (concat-separated-strings + "," + (mapcar insert-func (remove-custom-cols (ucols file)) values) + (custom-col-values (custom-ucols-for-file file) values t))))) + + +(defun custom-col-value (col values doquote) + (let ((custom-value (funcall (custom-value-fun col) values))) + (if custom-value + (if doquote + (concatenate 'string (quote-str col) + (escape-backslashes custom-value) + (quote-str col)) + (escape-backslashes custom-value)) + ""))) + +(defun custom-col-values (ucols values doquote) + "Returns a list of string column values for SQL inserts for custom columns" + (loop for col in ucols collect (custom-col-value col values doquote))) + +(defun remove-custom-cols (cols) + "Remove custom cols from a list col umls-cols" + (remove-if #'custom-value-fun cols)) + +(defun find-custom-cols-for-filename (filename) + (remove-if-not (lambda (x) (string-equal filename (car x))) +custom-cols+)) + +(defun find-custom-col (filename col) + (find-if (lambda (x) (and (string-equal filename (car x)) + (string-equal col (cadr x)))) +custom-cols+)) + +(defun custom-colnames-for-filename (filename) + (mapcar #'cadr (find-custom-cols-for-filename filename))) + +(defun custom-ucols-for-file (file) + (remove-if-not #'custom-value-fun (ucols file))) + +(defun noneng-lang-index-files () + (remove-if-not + (lambda (f) (and (> (length (fil f)) 4) + (string-equal (fil f) "MRXW." :end1 5) + (not (string-equal (fil f) "MRXW.ENG")) + (not (string-equal (fil f) "MRXW.NONENG")))) + *umls-files*)) + +;;; SQL Command Functions + +(defun create-index-cmd (colname tablename length) + "Return sql create index command" + (format nil "CREATE INDEX ~a ON ~a (~a)" + (concatenate 'string tablename "_" colname "_X") + tablename + (case *umls-sql-type* + (:mysql + (concatenate 'string colname + (if (integerp length) + (format nil " (~d)" length) + ""))) + ((:postgresql :postgresql-socket) + ;; FIXME: incorrect syntax + (if (integerp length) + (format nil "substr((~A)::text,1,~D)" colname length) + colname)) + (t + colname)))) + +(defun create-all-tables-cmdfile () + "Return sql commands to create all tables. Not need for automated SQL import" + (mapcar (lambda (f) (format nil "~a~%~%" (create-table-cmd f))) *umls-files*)) + +;; SQL Execution functions + +(defun sql-drop-tables (conn) + "SQL Databases: drop all tables" + (dolist (file *umls-files*) + (ignore-errors + (sql-execute (format nil "DROP TABLE ~a" (table file)) conn)))) + +(defun sql-create-tables (conn) + "SQL Databases: create all tables" + (dolist (file *umls-files*) + (sql-execute (create-table-cmd file) conn))) + +(defun sql-create-custom-tables (conn) + "SQL Databases: create all custom tables" + (dolist (ct +custom-tables+) + (sql-execute (create-custom-table-cmd (car ct) (cadr ct)) conn))) + +(defun sql-insert-values (conn file) + "SQL Databases: inserts all values for a file" + (with-umls-file (line (fil file)) + (sql-execute (insert-values-cmd file line) conn))) + +(defun sql-insert-all-values (conn) + "SQL Databases: inserts all values for all files" + (dolist (file *umls-files*) + (sql-insert-values conn file))) + +(defun drop-index-cmd (colname tablename) + "Return sql create index command" + (case *umls-sql-type* + (:mysql + (format nil "DROP INDEX ~a ON ~a" + (concatenate 'string tablename "_" colname "_X") + tablename)) + (t + (format nil "DROP INDEX ~a" + (concatenate 'string tablename "_" colname "_X"))))) + +(defun sql-create-indexes (conn &optional (indexes +index-cols+)) + "SQL Databases: create all indexes" + (dolist (idx indexes) + (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn)) + (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))) + +(defun make-usrl (conn) + (if (eql :mysql *umls-sql-type*) + (sql-execute "drop table if exists USRL" conn) + (ignore-errors (sql-execute "drop table USRL" conn))) + (sql-execute "create table USRL (sab varchar(80), srl integer)" conn) + (dolist (tuple (mutex-sql-query + "select distinct SAB,SRL from MRSO order by SAB asc")) + (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" + (car tuple) (ensure-integer (cadr tuple))) + conn))) + +(defun sql-create-special-tables (conn) + (make-usrl conn) + (make-ustats)) + +(defun create-umls-db-by-insert () + "SQL Databases: initializes entire database via SQL insert commands" + (ensure-ucols+ufiles) + (ensure-preparse) + (with-sql-connection (conn) + (sql-drop-tables conn) + (sql-create-tables conn) + (sql-insert-all-values conn) + (sql-create-indexes conn) + (sql-create-custom-tables conn) + (sql-create-indexes conn +custom-index-cols+) + (sql-create-special-tables conn))) + +(defun create-umls-db (&key (extension ".trans") (skip-translation nil)) + "SQL Databases: initializes entire database via SQL copy commands. +This is much faster that using create-umls-db-insert." + (ensure-ucols+ufiles) + (ensure-preparse) + (unless skip-translation + (translate-all-files extension)) + (let ((copy-cmd + (ecase (umls-sql-type) + (:mysql #'mysql-copy-cmd) + (:postgresql #'pg-copy-cmd)))) + (with-sql-connection (conn) + (clsql:truncate-database :database conn) + (sql-drop-tables conn) + (sql-create-tables conn) + (dolist (file *umls-files*) + (sql-execute (funcall copy-cmd file extension) conn)) + (sql-create-indexes conn) + (sql-create-custom-tables conn) + (sql-create-indexes conn +custom-index-cols+) + (sql-create-special-tables conn)))) + +(defun translate-all-files (&optional (extension ".trans")) + "Copy translated files and return postgresql copy commands to import" + (make-noneng-index-file extension) + (dolist (f (remove "MRXW.NONENG" *umls-files* :test #'string= :key #'fil)) + (translate-umls-file f extension))) + +(defun translate-umls-file (file extension) + "Translate a umls file into a format suitable for sql copy cmd" + (translate-files file extension (list file))) + +(defun make-noneng-index-file (extension) + "Make non-english index file" + (translate-files (find-ufile "MRXW.NONENG") + extension (noneng-lang-index-files))) + +(defun translate-files (out-ufile extension input-ufiles) + "Translate a umls file into a format suitable for sql copy cmd" + (let ((output-path (umls-pathname (fil out-ufile) extension))) + (if (probe-file output-path) + (format t "File ~A already exists: skipping~%" output-path) + (with-open-file (ostream output-path :direction :output) + (dolist (input-ufile input-ufiles) + (with-umls-file (line (fil input-ufile)) + (translate-line out-ufile line ostream) + (princ #\newline ostream))))))) + +(defun translate-line (file line strm) + "Translate a single line for sql output" + (flet ((col-value (col value) + (if (eq (datatype col) 'sql-u) + (let ((ui (parse-ui value ""))) + (if (stringp ui) + ui + (write-to-string ui))) + (escape-backslashes value)))) + (print-separated-strings + strm "|" + (mapcar #'col-value (remove-custom-cols (ucols file)) line) + (custom-col-values (custom-ucols-for-file file) line nil)))) + +(defun pg-copy-cmd (file extension) + "Return postgresql copy statement for a file" + (format + nil "COPY ~a FROM '~a' using delimiters '|' with null as ''" + (table file) (umls-pathname (fil file) extension))) + +(defun mysql-copy-cmd (file extension &key local-file) + "Return mysql copy statement for a file" + (format + nil + "LOAD DATA ~AINFILE \"~a\" INTO TABLE ~a FIELDS TERMINATED BY \"|\"" + (if local-file "LOCAL " "") + (umls-pathname (fil file) extension) (table file))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Routines for analyzing cost of fixed size storage +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defun umls-fixed-size-waste () + "Display storage waste if using all fixed size storage" + (let ((totalwaste 0) + (totalunavoidable 0) + (totalavoidable 0) + (unavoidable '()) + (avoidable '())) + (dolist (file *umls-files*) + (dolist (col (ucols file)) + (let* ((avwaste (- (cmax col) (av col))) + (cwaste (* avwaste (rws file)))) + (when (plusp cwaste) + (if (<= avwaste 6) + (progn + (incf totalunavoidable cwaste) + (push (list (fil file) (col col) + avwaste cwaste) + unavoidable)) + (progn + (incf totalavoidable cwaste) + (push (list (fil file) (col col) + avwaste cwaste) + avoidable))) + (incf totalwaste cwaste))))) + (values totalwaste totalavoidable totalunavoidable + (nreverse avoidable) (nreverse unavoidable)))) + +(defun display-waste () + (ensure-ucols+ufiles) + (multiple-value-bind (tw ta tu al ul) (umls-fixed-size-waste) + (format t "Total waste: ~d~%" tw) + (format t "Total avoidable: ~d~%" ta) + (format t "Total unavoidable: ~d~%" tu) + (format t "Avoidable:~%") + (dolist (w al) + (format t " (~a,~a): ~a,~a~%" (car w) (cadr w) (caddr w) (cadddr w))) + (format t "Unavoidable:~%") + (dolist (w ul) + (format t " (~a,~a): ~a,~a~%" (car w) (cadr w) (caddr w) (cadddr w))) + )) + +(defun max-umls-field () + "Return length of longest field" + (declare (optimize (speed 3) (space 0))) + (ensure-ucols+ufiles) + (let ((max 0)) + (declare (fixnum max)) + (dolist (ucol *umls-cols*) + (when (> (cmax ucol) max) + (setq max (cmax ucol)))) + max)) + +(defun max-umls-row () + "Return length of longest row" + (declare (optimize (speed 3) (space 0))) + (ensure-ucols+ufiles) + (let ((rowsizes '())) + (dolist (file *umls-files*) + (let ((row 0)) + (dolist (ucol (ucols file)) + (incf row (1+ (cmax ucol)))) + (push row rowsizes))) + (car (sort rowsizes #'>))))