(gen-ucols)
(set-ucols-for-ufiles *umls-files*)
(ensure-field-lengths))
- (error (e)
- (warn "Error reading ucols+ufiles: ~A." e)
- (setf *umls-cols* nil *umls-files* nil)
- nil))
+ (error (e)
+ (warn "Error reading ucols+ufiles: ~A." e)
+ (setf *umls-cols* nil *umls-files* nil)
+ nil))
t)
(defun ufile-pathname (ufile &optional (extension ""))
"Return pathname for a umls filename with an optional extension"
(assert (typep ufile 'ufile))
- (let* ((dirs (append (list (dir ufile))
- (awhen (subdir ufile) (list it))))
+ (let* ((dirs (nconc (list (dir ufile))
+ (awhen (subdir ufile) (list it))))
(name-list (delimited-string-to-list (fil ufile) #\.))
(name (if (second name-list)
(first name-list)
"Initial colstruct field lengths for files that don't have a measurement.
Currently, these are the LEX and NET files."
(dolist (length-list (ufiles-field-lengths (ufiles-to-measure)))
- (destructuring-bind (filename fields-max fields-av) length-list
+ (destructuring-bind (filename fields-max fields-av count-lines) length-list
(let ((file (find-ufile filename)))
(unless file
(error "Can't find ~A filename in ufiles" filename))
- (unless (= (length fields-max) (length (fields file)))
- (error
- "Number of file fields ~A not equal to field count in ufile ~S"
- fields-max file))
- (dotimes (i (length (fields file)))
- (declare (fixnum i))
- (let* ((field (nth i (fields file)))
- (col (find-ucol field filename)))
- (unless col
- (error "can't find column ~A" field))
- (setf (cmax col) (aref fields-max i))
- (setf (av col) (aref fields-av i))
- (ensure-ucol-datatype col (datatype-for-colname (col col)))))))))
+ (if (zerop count-lines)
+ (warn "File ~A is empty." filename)
+ (progn
+ (unless (= (length fields-max) (length (fields file)))
+ (error
+ "Number of file fields ~A not equal to field count in ufile ~S"
+ fields-max file))
+ (dotimes (i (length (fields file)))
+ (declare (fixnum i))
+ (let* ((field (nth i (fields file)))
+ (col (find-ucol field filename)))
+ (unless col
+ (error "can't find column ~A" field))
+ (setf (cmax col) (aref fields-max i))
+ (setf (av col) (aref fields-av i))
+ (ensure-ucol-datatype col (datatype-for-colname (col col)))))))))))
(defun ufiles-to-measure ()
"Returns a list of ufiles to measure"
:initial-element 0))
(setq fields-av (make-array num-fields :element-type '(or integer float)
:initial-element 0)))
- (dotimes (i num-fields)
+ (dotimes (i (or num-fields 0))
(declare (fixnum i))
(let* ((str (nth i line))
(len (length #-(and clisp unicode) str
(when (> len (aref fields-max i))
(setf (aref fields-max i) len))))
(incf count-lines))
- (dotimes (i num-fields)
- (setf (aref fields-av i) (float (/ (aref fields-av i) count-lines))))
- (list (fil ufile) fields-max fields-av)))
+ (dotimes (i (or num-fields 0))
+ (setf (aref fields-av i)
+ (if (plusp count-lines)
+ (float (/ (aref fields-av i) count-lines))
+ 0)))
+ (list (fil ufile) fields-max fields-av count-lines)))
;;; UMLS column/file functions
(defun escape-column-name (name)
(substitute #\_ #\/ name))
+
+;; SQLNAME is required for collision of SQL reserved words (MYSQL 8: RANK)
+;; and column names in UMLS (RANK in MRRANK)
+(defvar *sql-reserved-names* '("RANK"))
+(defmethod sqlname ((c ucol))
+ (sqlname (col c)))
+(defmethod sqlname ((name string))
+ (if (find name *sql-reserved-names* :test #'string-equal)
+ (concatenate 'string "_" name)
+ name))
+(defmethod sqlname ((l list))
+ (mapcar #'sqlname l))
+(defmethod sqlname ((s symbol))
+ (sqlname (symbol-name s)))