X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=create-sql.lisp;h=20964f5c4ae3a6a8f4023ecf033582a759061856;hp=88fa898fb89814d881ec3b1c6b3fbe551551fc02;hb=HEAD;hpb=f2f3771917e7d8c2999615d3f30641c8ee251872 diff --git a/create-sql.lisp b/create-sql.lisp index 88fa898..3a19020 100644 --- a/create-sql.lisp +++ b/create-sql.lisp @@ -30,7 +30,7 @@ (setq sqltype "VARCHAR2(20)"))))) (concatenate 'string - (col c) + (sqlname c) " " (if (or (string-equal sqltype "VARCHAR") (string-equal sqltype "CHAR")) @@ -44,7 +44,7 @@ " MAX_ROWS=200000000" "") (if (eq *umls-sql-type* :mysql) - " TYPE=MYISAM CHARACTER SET utf8" + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" "")))) (defun create-custom-table-cmd (tablename sql-cmd) @@ -261,16 +261,56 @@ (if (eql :mysql *umls-sql-type*) (sql-execute "drop table if exists USRL" conn) (ignore-errors (sql-execute "drop table USRL" conn))) - (sql-execute "create table USRL (sab varchar(80), srl integer)" conn) + (sql-execute + (concatenate 'string + "create table USRL (sab varchar(80), srl integer)" + (if (eq *umls-sql-type* :mysql) + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" + "")) + conn) (dolist (tuple (mutex-sql-query "select distinct SAB,SRL from MRCONSO order by SAB asc")) (sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)" (car tuple) (ensure-integer (cadr tuple))) conn))) +(defun make-ustats (conn) + (ignore-errors (sql-execute "drop table USTATS" conn)) + (sql-execute + (concatenate 'string"create table USTATS (NAME varchar(160), COUNT bigint, SRL integer)" + (if (eq *umls-sql-type* :mysql) + " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin" + "")) + conn) + + (dolist (srl '(0 1 2 3 4 9)) + (insert-ustats-count conn "Concept Count" "MRCONSO" "distinct CUI" "KCUILRL" srl) + (insert-ustats-count conn "Term Count" "MRCONSO" "distinct KCUILUI" "KCUILRL" srl) + (insert-ustats-count conn "Distinct Term Count" "MRCONSO" "distinct LUI" "KLUILRL" srl) + (insert-ustats-count conn "String Count" "MRCONSO" "*" "KSUILRL" srl) + (insert-ustats-count conn "Distinct String Count" "MRCONSO" "distinct SUI" "KSUILRL" srl) + (insert-ustats-count conn "Hierarchcy" "MRHIER" "*" "KSRL" srl) + (insert-ustats-count conn "Mappings" "MRMAP" "*" "KSRL" srl) + (insert-ustats-count conn "Simple Mappings" "MRSMAP" "*" "KSRL" srl) +#+mrcoc (insert-ustats-count conn "Co-occuring Concept Count" "MRCOC" "*" "KLRL" srl) + (insert-ustats-count conn "Definition Count" "MRDEF" "*" "KSRL" srl) + (insert-ustats-count conn "Rank Count" "MRRANK" "*" "KSRL" srl) + (insert-ustats-count conn "Relationship Count" "MRREL" "*" "KSRL" srl) + (insert-ustats-count conn "Semantic Type Count" "MRSTY" "*" "KLRL" srl) + (insert-ustats-count conn "Simple Attribute Count" "MRSAT" "*" "KSRL" srl) + (insert-ustats-count conn "Source Abbreviation Count" "MRSAB" "*" "SRL" srl) + (insert-ustats-count conn "Word Index Count" "MRXW_ENG" "*" "KLRL" srl) + (insert-ustats-count conn "Normalized Word Index Count" "MRXNW_ENG" "*" "KLRL" srl) + (insert-ustats-count conn "Normalized String Index Count" "MRXNS_ENG" "*" "KLRL" srl)) + (sql-execute "create index USTATS_SRL on USTATS (SRL)" conn) + (find-ustats-all)) + +(defun insert-ustats-count (conn name table count-variable srl-control srl) + (insert-ustats conn name (find-count-table conn table srl count-variable srl-control) srl)) + (defun sql-create-special-tables (conn) (make-usrl conn) - (make-ustats)) + (make-ustats conn)) (defun create-umls-db-by-insert (&key verbose) "SQL Databases: initializes entire database via SQL insert commands" @@ -339,32 +379,31 @@ This is much faster that using create-umls-db-insert." (let ((translated-lines 0) (input-lines 0) (eof (cons nil nil))) - (catch 'done-counting - (with-open-file (ts output-path :direction :input - #+(and sbcl sb-unicode) :external-format - #+(and sbcl sb-unicode) :UTF-8 - #+(and allegro ics) :external-format - #+(and allegro ics) :UTF-8 - #+lispworks :external-format - #+lispworks :UTF-8 - #+(and clisp unicode) :external-format - #+(and clisp unicode) charset:utf-8) - (do ((c (read-char ts nil eof) (read-char ts nil eof))) - ((eq c eof)) - (when (eql c #\newline) - (incf translated-lines)))) - (dolist (input-ufile input-ufiles) - (with-umls-ufile (line input-ufile) - (incf input-lines) - (when (> input-lines translated-lines) - (throw 'done-counting 'incomplete))))) + (with-open-file (ts output-path :direction :input + #+(and sbcl sb-unicode) :external-format + #+(and sbcl sb-unicode) :UTF-8 + #+(and allegro ics) :external-format + #+(and allegro ics) :UTF-8 + #+lispworks :external-format + #+lispworks :UTF-8 + #+(and clisp unicode) :external-format + #+(and clisp unicode) charset:utf-8) + (do ((c (read-char ts nil eof) (read-char ts nil eof))) + ((eq c eof)) + (when (eql c #\newline) + (incf translated-lines)))) + (dolist (input-ufile input-ufiles) + (with-umls-ufile (line input-ufile) + (incf input-lines) + (when (> input-lines translated-lines) + (return)))) (cond ((< input-lines translated-lines) (format t "Translated file ~A incomplete, deleting...~%" output-path) (delete-file output-path) nil) ((eql input-lines translated-lines) - (format t "Translated file ~A already exists: skipping...~%" output-path) + (format t "Translated file ~A exists and is proper number of lines: skipping...~%" output-path) t) ((eql input-lines 0) (warn "The number of input lines is 0 for output file ~A." output-path)