(setq sqltype "VARCHAR2(20)")))))
(concatenate 'string
- (col c)
+ (sqlname c)
" "
(if (or (string-equal sqltype "VARCHAR")
(string-equal sqltype "CHAR"))
" MAX_ROWS=200000000"
"")
(if (eq *umls-sql-type* :mysql)
- " TYPE=MYISAM CHARACTER SET utf8"
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
""))))
(defun create-custom-table-cmd (tablename sql-cmd)
(format nil "DROP INDEX ~a"
(concatenate 'string tablename "_" colname "_X")))))
+(defun sql-create-indexes-mysql (conn indexes verbose)
+ (let ((tables nil)
+ (table-cols nil))
+ (dolist (idx indexes)
+ (pushnew (second idx) tables :test 'string-equal)
+ (let ((table-col (find (second idx) table-cols :key 'car :test 'string-equal)))
+ (if table-col
+ (vector-push-extend (cons (first idx) (third idx)) (second table-col))
+ (push (list (second idx) (make-array (list 1) :initial-contents (list (cons (first idx) (third idx)))
+ :adjustable t :fill-pointer 1))
+ table-cols))))
+ (dolist (table tables)
+ (let ((table-col (find table table-cols :key 'car :test 'string-equal))
+ (first t)
+ (str (format nil "ALTER TABLE ~A" table)))
+ (loop for col across (second table-col)
+ do
+ (let ((colname (car col))
+ (length (cdr col)))
+ (ignore-errors (sql-execute (drop-index-cmd colname table) conn))
+ (setq str (concatenate 'string
+ str
+ (if first
+ (progn
+ (setq first nil)
+ " ")
+ ", ")
+ (format nil "ADD INDEX ~A (~A)"
+ (concatenate 'string table "_" colname "_X")
+ (concatenate 'string
+ colname
+ (if (integerp length)
+ (format nil " (~d)" length)
+ "")))))))
+ (when verbose
+ (format t "UMLS Import: Creating indexes for columns ~A on table ~A.~%"
+ (mapcar 'car (coerce (second table-col) 'list)) table))
+ (when conn
+ (sql-execute str conn))
+ ))))
+
(defun sql-create-indexes (conn &key (indexes +index-cols+) verbose)
"SQL Databases: create all indexes"
- (dolist (idx indexes)
- (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%"
- (first idx) (second idx)))
- (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn))
- (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn)))
+ (if (eql :mysql *umls-sql-type*)
+ (sql-create-indexes-mysql conn indexes verbose)
+ (dolist (idx indexes)
+ (when verbose (format t "UMLS Import: Creating index for column ~A on table ~A.~%"
+ (first idx) (second idx)))
+ (ignore-errors (sql-execute (drop-index-cmd (car idx) (cadr idx)) conn))
+ (sql-execute (create-index-cmd (car idx) (cadr idx) (caddr idx)) conn))))
(defun make-usrl (conn)
(if (eql :mysql *umls-sql-type*)
(sql-execute "drop table if exists USRL" conn)
(ignore-errors (sql-execute "drop table USRL" conn)))
- (sql-execute "create table USRL (sab varchar(80), srl integer)" conn)
+ (sql-execute
+ (concatenate 'string
+ "create table USRL (sab varchar(80), srl integer)"
+ (if (eq *umls-sql-type* :mysql)
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
+ ""))
+ conn)
(dolist (tuple (mutex-sql-query
"select distinct SAB,SRL from MRCONSO order by SAB asc"))
(sql-execute (format nil "insert into USRL (sab,srl) values ('~a',~d)"
(car tuple) (ensure-integer (cadr tuple)))
conn)))
+(defun make-ustats (conn)
+ (ignore-errors (sql-execute "drop table USTATS" conn))
+ (sql-execute
+ (concatenate 'string"create table USTATS (NAME varchar(160), COUNT bigint, SRL integer)"
+ (if (eq *umls-sql-type* :mysql)
+ " ENGINE=MYISAM CHARACTER SET utf8 COLLATE utf8_bin"
+ ""))
+ conn)
+
+ (dolist (srl '(0 1 2 3 4 9))
+ (insert-ustats-count conn "Concept Count" "MRCONSO" "distinct CUI" "KCUILRL" srl)
+ (insert-ustats-count conn "Term Count" "MRCONSO" "distinct KCUILUI" "KCUILRL" srl)
+ (insert-ustats-count conn "Distinct Term Count" "MRCONSO" "distinct LUI" "KLUILRL" srl)
+ (insert-ustats-count conn "String Count" "MRCONSO" "*" "KSUILRL" srl)
+ (insert-ustats-count conn "Distinct String Count" "MRCONSO" "distinct SUI" "KSUILRL" srl)
+ (insert-ustats-count conn "Hierarchcy" "MRHIER" "*" "KSRL" srl)
+ (insert-ustats-count conn "Mappings" "MRMAP" "*" "KSRL" srl)
+ (insert-ustats-count conn "Simple Mappings" "MRSMAP" "*" "KSRL" srl)
+#+mrcoc (insert-ustats-count conn "Co-occuring Concept Count" "MRCOC" "*" "KLRL" srl)
+ (insert-ustats-count conn "Definition Count" "MRDEF" "*" "KSRL" srl)
+ (insert-ustats-count conn "Rank Count" "MRRANK" "*" "KSRL" srl)
+ (insert-ustats-count conn "Relationship Count" "MRREL" "*" "KSRL" srl)
+ (insert-ustats-count conn "Semantic Type Count" "MRSTY" "*" "KLRL" srl)
+ (insert-ustats-count conn "Simple Attribute Count" "MRSAT" "*" "KSRL" srl)
+ (insert-ustats-count conn "Source Abbreviation Count" "MRSAB" "*" "SRL" srl)
+ (insert-ustats-count conn "Word Index Count" "MRXW_ENG" "*" "KLRL" srl)
+ (insert-ustats-count conn "Normalized Word Index Count" "MRXNW_ENG" "*" "KLRL" srl)
+ (insert-ustats-count conn "Normalized String Index Count" "MRXNS_ENG" "*" "KLRL" srl))
+ (sql-execute "create index USTATS_SRL on USTATS (SRL)" conn)
+ (find-ustats-all))
+
+(defun insert-ustats-count (conn name table count-variable srl-control srl)
+ (insert-ustats conn name (find-count-table conn table srl count-variable srl-control) srl))
+
(defun sql-create-special-tables (conn)
(make-usrl conn)
- (make-ustats))
+ (make-ustats conn))
(defun create-umls-db-by-insert (&key verbose)
"SQL Databases: initializes entire database via SQL insert commands"
(let ((translated-lines 0)
(input-lines 0)
(eof (cons nil nil)))
- (catch 'done-counting
- (with-open-file (ts output-path :direction :input
- #+(and sbcl sb-unicode) :external-format
- #+(and sbcl sb-unicode) :UTF-8
- #+(and allegro ics) :external-format
- #+(and allegro ics) :UTF-8
- #+lispworks :external-format
- #+lispworks :UTF-8
- #+(and clisp unicode) :external-format
- #+(and clisp unicode) charset:utf-8)
- (do ()
- ((eq (read-line ts nil eof) eof))
- (incf translated-lines)))
- (dolist (input-ufile input-ufiles)
- (with-umls-ufile (line input-ufile)
- (incf input-lines)
- (when (> input-lines translated-lines)
- (throw 'done-counting 'incomplete)))))
+ (with-open-file (ts output-path :direction :input
+ #+(and sbcl sb-unicode) :external-format
+ #+(and sbcl sb-unicode) :UTF-8
+ #+(and allegro ics) :external-format
+ #+(and allegro ics) :UTF-8
+ #+lispworks :external-format
+ #+lispworks :UTF-8
+ #+(and clisp unicode) :external-format
+ #+(and clisp unicode) charset:utf-8)
+ (do ((c (read-char ts nil eof) (read-char ts nil eof)))
+ ((eq c eof))
+ (when (eql c #\newline)
+ (incf translated-lines))))
+ (dolist (input-ufile input-ufiles)
+ (with-umls-ufile (line input-ufile)
+ (incf input-lines)
+ (when (> input-lines translated-lines)
+ (return))))
(cond
((< input-lines translated-lines)
(format t "Translated file ~A incomplete, deleting...~%" output-path)
(delete-file output-path)
nil)
((eql input-lines translated-lines)
- (format t "Translated file ~A already exists: skipping...~%" output-path)
+ (format t "Translated file ~A exists and is proper number of lines: skipping...~%" output-path)
t)
((eql input-lines 0)
(warn "The number of input lines is 0 for output file ~A." output-path)