X-Git-Url: http://git.kpe.io/?a=blobdiff_plain;f=parse-2002.lisp;h=726764fd63b112f4d5280b5115eab0ff0b3a16fe;hb=3199369942d2e5ab4f5b060c2c6b655caf505944;hp=4024eb2d32a824d5646c16ea2d0a785a746a9353;hpb=7e088e3bca94e4cba5a00f3c3734db4ea57f4e2f;p=umlisp.git diff --git a/parse-2002.lisp b/parse-2002.lisp index 4024eb2..726764f 100644 --- a/parse-2002.lisp +++ b/parse-2002.lisp @@ -8,7 +8,7 @@ ;;;; Author: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: parse-2002.lisp,v 1.9 2003/05/06 07:55:15 kevin Exp $ +;;;; $Id: parse-2002.lisp,v 1.10 2003/05/07 21:57:06 kevin Exp $ ;;;; ;;;; This file, part of UMLisp, is ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D. @@ -262,167 +262,66 @@ ;; File & Column functions -(defun init-umls (&optional (alwaysclear nil)) -"Initialize all UMLS file and column structures if not already initialized" - (when (or alwaysclear (null *umls-files*)) - (init-umls-cols) - (init-umls-files) - (init-field-lengths))) +(defun gen-ucols () + (add-ucols (gen-ucols-meta)) + (add-ucols (gen-ucols-custom)) + (add-ucols (gen-ucols-generic "LRFLD")) + (add-ucols (gen-ucols-generic "SRFLD"))) -(defun init-umls-cols () - (setq *umls-cols* (append - (init-meta-cols) - (init-custom-cols) - (init-generic-cols "LRFLD") - (init-generic-cols "SRFLD")))) - -(defun init-meta-cols () +(defun gen-ucols-meta () "Initialize all umls columns" (let ((cols '())) (with-umls-file (line "MRCOLS") (destructuring-bind (col des ref min av max fil dty) line - (let ((c (make-instance 'ucol - :col col - :des des - :ref ref - :min (parse-integer min) - :av (read-from-string av) - :max (parse-integer max) - :fil fil - :dty dty ;; new in 2002 UMLS - :sqltype "VARCHAR" ; default data type - :parse-fun #'add-sql-quotes - :custom-value-fun nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col col)) - (push c cols)))) + (push (make-ucol col des ref (parse-integer min) (read-from-string av) + (parse-integer max) fil dty) + cols))) (nreverse cols))) -(defun init-custom-cols () +(defun gen-ucols-custom () "Initialize umls columns for custom columns" - (let ((cols '())) - (dolist (customcol +custom-cols+) - (let ((c (make-instance 'ucol - :col (nth 1 customcol) :des "" :ref 0 :min 0 :max (nth 3 customcol) - :av 0 :dty nil :fil (nth 0 customcol) :sqltype (nth 2 customcol) - :parse-fun #'add-sql-quotes :custom-value-fun (nth 4 customcol) - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col (nth 1 customcol))) - (push c cols))) - (nreverse cols))) - -(defun escape-column-name (name) - (substitute #\_ #\/ name)) + (loop for customcol in +custom-cols+ + collect + (make-ucol (nth 1 customcol) "" 0 0 0 (nth 3 customcol) + (nth 0 customcol) nil :sqltype (nth 2 customcol)))) -(defun init-generic-cols (col-filename) +(defun gen-ucols-generic (col-filename) "Initialize for generic (LEX/NET) columns" (let ((cols '())) (with-umls-file (line col-filename) (destructuring-bind (nam des ref fil) line (setq nam (escape-column-name nam)) (dolist (file (delimited-string-to-list fil #\,)) - (let ((c (make-instance 'ucol - :col nam - :des des - :ref ref - :min nil - :av nil - :max nil - :fil file - :dty nil - :sqltype "VARCHAR" ; default data type - :parse-fun #'add-sql-quotes - :custom-value-fun nil - :quotechar "'"))) - (add-datatype-to-col c (datatype-for-col nam)) - (push c cols))))) + (push + (make-ucol nam des ref nil nil nil file nil) + cols)))) (nreverse cols))) -(defun init-umls-files () - (setq *umls-files* (append - (init-generic-files "MRFILES") - (init-generic-files "LRFIL") - (init-generic-files "SRFIL"))) - ;; need to separate this since init-custom-files depends on *umls-files* - (setq *umls-files* (append *umls-files* (init-custom-files)))) +(defun gen-ufiles () + (add-ufiles (gen-ufiles-generic "MRFILES")) + (add-ufiles (gen-ufiles-generic "LRFIL")) + (add-ufiles (gen-ufiles-generic "SRFIL")) + ;; needs to come last + (add-ufiles (gen-ufiles-custom))) -(defun umls-field-string-to-list (fmt) - "Converts a comma delimited list of fields into a list of field names. Will -append a unique number (starting at 2) onto a column name that is repeated in the list" - (let ((field-list (delimited-string-to-list (escape-column-name fmt) #\,)) - (col-count (make-hash-table :test 'equal))) - (dotimes (i (length field-list)) - (declare (fixnum i)) - (let ((col (nth i field-list))) - (multiple-value-bind (key found) (gethash col col-count) - (if found - (let ((next-id (1+ key))) - (setf (nth i field-list) (concatenate 'string col (write-to-string next-id))) - (setf (gethash col col-count) next-id)) - (setf (gethash col col-count) 1))))) - field-list)) - -(defun init-generic-files (files-filename) + +(defun gen-ufiles-generic (files-filename) "Initialize all LEX file structures" (let ((files '())) - (with-umls-file (line files-filename) - (destructuring-bind (fil des fmt cls rws bts) line - (let ((f (make-instance 'ufile - :fil fil - :table (substitute #\_ #\. fil) - :des des - :fmt (escape-column-name fmt) - :cls (parse-integer cls) - :rws (parse-integer rws) - :bts (parse-integer bts) - :fields (concatenate 'list - (umls-field-string-to-list fmt) - (custom-colnames-for-filename fil))))) - (setf (ucols f) (ucols-for-ufile f)) - (push f files)))) - (nreverse files))) - -(defun init-custom-files () - (let ((ffile (make-instance 'ufile - :fil "MRXW.NONENG" :des "Custom NonEnglish Index" :table "MRXW_NONENG" - :cls 5 :rws 0 :bts 0 :fields (fields (find-ufile "MRXW.ENG"))))) - (setf (ucols ffile) - (ucols-for-ufile ffile)) - (list ffile))) - -(defun datatype-for-col (colname) -"Return datatype for column name" - (car (cdr (find colname +col-datatypes+ :key #'car :test #'string-equal)))) + (with-umls-file (line files-filename) + (destructuring-bind (fil des fmt cls rws bts) line + (push (make-ufile + fil des (substitute #\_ #\. fil) (parse-integer cls) + (parse-integer rws) (parse-integer bts) + (concatenate 'list (umls-field-string-to-list fmt) + (custom-colnames-for-filename fil))) + files))) + (nreverse files))) -(defun add-datatype-to-col (col datatype) -"Add data type information to column" - (setf (datatype col) datatype) - (case datatype - (sql-u (setf (sqltype col) "INTEGER" - (parse-fun col) #'parse-ui - (quotechar col) "")) - (sql-s (setf (sqltype col) "SMALLINT" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-l (setf (sqltype col) "BIGINT" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-i (setf (sqltype col) "INTEGER" - (parse-fun col) #'parse-integer - (quotechar col) "")) - (sql-f (setf (sqltype col) "NUMERIC" - (parse-fun col) #'read-from-string - (quotechar col) "")) - (t ; Default column type, optimized text storage - (setf (parse-fun col) #'add-sql-quotes - (quotechar col) "'") - (when (and (cmax col) (av col)) - (if (> (cmax col) 255) - (setf (sqltype col) "TEXT") - (if (< (- (cmax col) (av col)) 4) - (setf (sqltype col) "CHAR") ; if average bytes wasted < 4 - (setf (sqltype col) "VARCHAR"))))))) +(defun gen-ufiles-custom () + (make-ufile "MRXW.NONENG" "Custom NonEnglish Index" "MRXW_NONENG" + 5 0 0 (fields (find-ufile "MRXW.ENG"))))