X-Git-Url: http://git.kpe.io/?p=umlisp.git;a=blobdiff_plain;f=parse-common.lisp;h=b1395665b68eb612ef4de7fcfccaa16436ce2981;hp=ae7af531d762865617de025b4ad5bc2d9b998acc;hb=HEAD;hpb=cb84d39847530c3317d03230eb82af671c71ef79 diff --git a/parse-common.lisp b/parse-common.lisp index ae7af53..f7fab1e 100644 --- a/parse-common.lisp +++ b/parse-common.lisp @@ -25,10 +25,10 @@ (gen-ucols) (set-ucols-for-ufiles *umls-files*) (ensure-field-lengths)) - (error (e) - (warn "Error reading ucols+ufiles: ~A." e) - (setf *umls-cols* nil *umls-files* nil) - nil)) + (error (e) + (warn "Error reading ucols+ufiles: ~A." e) + (setf *umls-cols* nil *umls-files* nil) + nil)) t) @@ -89,23 +89,26 @@ "Initial colstruct field lengths for files that don't have a measurement. Currently, these are the LEX and NET files." (dolist (length-list (ufiles-field-lengths (ufiles-to-measure))) - (destructuring-bind (filename fields-max fields-av) length-list + (destructuring-bind (filename fields-max fields-av count-lines) length-list (let ((file (find-ufile filename))) (unless file (error "Can't find ~A filename in ufiles" filename)) - (unless (= (length fields-max) (length (fields file))) - (error - "Number of file fields ~A not equal to field count in ufile ~S" - fields-max file)) - (dotimes (i (length (fields file))) - (declare (fixnum i)) - (let* ((field (nth i (fields file))) - (col (find-ucol field filename))) - (unless col - (error "can't find column ~A" field)) - (setf (cmax col) (aref fields-max i)) - (setf (av col) (aref fields-av i)) - (ensure-ucol-datatype col (datatype-for-colname (col col))))))))) + (if (zerop count-lines) + (warn "File ~A is empty." filename) + (progn + (unless (= (length fields-max) (length (fields file))) + (error + "Number of file fields ~A not equal to field count in ufile ~S" + fields-max file)) + (dotimes (i (length (fields file))) + (declare (fixnum i)) + (let* ((field (nth i (fields file))) + (col (find-ucol field filename))) + (unless col + (error "can't find column ~A" field)) + (setf (cmax col) (aref fields-max i)) + (setf (av col) (aref fields-av i)) + (ensure-ucol-datatype col (datatype-for-colname (col col))))))))))) (defun ufiles-to-measure () "Returns a list of ufiles to measure" @@ -131,7 +134,7 @@ Currently, these are the LEX and NET files." :initial-element 0)) (setq fields-av (make-array num-fields :element-type '(or integer float) :initial-element 0))) - (dotimes (i num-fields) + (dotimes (i (or num-fields 0)) (declare (fixnum i)) (let* ((str (nth i line)) (len (length #-(and clisp unicode) str @@ -145,9 +148,12 @@ Currently, these are the LEX and NET files." (when (> len (aref fields-max i)) (setf (aref fields-max i) len)))) (incf count-lines)) - (dotimes (i num-fields) - (setf (aref fields-av i) (float (/ (aref fields-av i) count-lines)))) - (list (fil ufile) fields-max fields-av))) + (dotimes (i (or num-fields 0)) + (setf (aref fields-av i) + (if (plusp count-lines) + (float (/ (aref fields-av i) count-lines)) + 0))) + (list (fil ufile) fields-max fields-av count-lines))) ;;; UMLS column/file functions @@ -372,3 +378,17 @@ append a unique number (starting at 2) onto a column name that is repeated in th (defun escape-column-name (name) (substitute #\_ #\/ name)) + +;; SQLNAME is required for collision of SQL reserved words (MYSQL 8: RANK) +;; and column names in UMLS (RANK in MRRANK) +(defvar *sql-reserved-names* '("RANK")) +(defmethod sqlname ((c ucol)) + (sqlname (col c))) +(defmethod sqlname ((name string)) + (if (find name *sql-reserved-names* :test #'string-equal) + (concatenate 'string "_" name) + name)) +(defmethod sqlname ((l list)) + (mapcar #'sqlname l)) +(defmethod sqlname ((s symbol)) + (sqlname (symbol-name s)))