1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: umlisp -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
5 ;;;; Name: parse-common.lisp
6 ;;;; Purpose: Common, stable parsing routines for UMLisp
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Apr 2000
10 ;;;; $Id: parse-common.lisp,v 1.4 2002/10/21 02:23:46 kevin Exp $
12 ;;;; This file, part of UMLisp, is
13 ;;;; Copyright (c) 2000-2002 by Kevin M. Rosenberg, M.D.
15 ;;;; UMLisp users are granted the rights to distribute and use this software
16 ;;;; as governed by the terms of the GNU General Public License.
17 ;;;; *************************************************************************
20 (declaim (optimize (speed 3) (safety 1) (compilation-speed 0) (debug 3)))
22 (defun umls-pathname (filename &optional (extension ""))
23 "Return pathname for a umls filename"
27 (make-pathname :name (concatenate 'string filename extension))
28 (case (char filename 0)
40 (defun read-umls-line (strm)
41 "Read a line from a UMLS stream, split into fields"
42 (let ((line (read-line strm nil 'eof)))
43 (if (stringp line) ;; ensure not 'eof
44 (let* ((len (length line))
45 (maybe-remove-terminal ;; LRWD doesn't have '|' at end of line
46 (if (char= #\| (char line (1- len)))
47 (subseq line 0 (1- len))
49 (declare (fixnum len))
50 (delimited-string-to-list maybe-remove-terminal #\|))
54 ;;; Find field lengths for LEX and NET files
56 (defun file-field-lengths (files)
59 (setq file (umls-file-fil file))
60 (let (max-field count-field num-fields (count-lines 0))
61 (with-umls-file (fields file)
63 (setq num-fields (length fields))
64 (setq max-field (make-array num-fields :element-type 'fixnum
66 (setq count-field (make-array num-fields :element-type 'number
68 (dotimes (i (length fields))
70 (let ((len (length (nth i fields))))
71 (incf (aref count-field i) len)
72 (when (> len (aref max-field i))
73 (setf (aref max-field i) len))))
75 (dotimes (i num-fields)
76 (setf (aref count-field i) (float (/ (aref count-field i) count-lines))))
77 (push (list file max-field count-field) lengths)))
80 (defun init-field-lengths ()
81 "Initial colstruct field lengths for files that don't have a measurement.
82 Currently, these are the LEX and NET files."
83 (let ((measure-files '()))
84 (dolist (file *umls-files*)
85 (let ((filename (umls-file-fil file)))
86 (unless (or (char= #\M (char filename 0))
87 (char= #\m (char filename 0)))
88 (push file measure-files))))
89 (let ((length-lists (file-field-lengths measure-files)))
90 (dolist (length-list length-lists)
91 (let* ((filename (car length-list))
92 (max-field (cadr length-list))
93 (av-field (caddr length-list))
94 (file (find-umls-file filename)))
96 (if (/= (length max-field) (length (umls-file-fields file)))
97 (format t "Warning: Number of file fields ~A doesn't match length of fields in file structure ~S"
99 (dotimes (i (max (length max-field) (length (umls-file-fields file))))
101 (let* ((field (nth i (umls-file-fields file)))
102 (col (find-umls-col field filename)))
105 (setf (umls-col-max col) (aref max-field i))
106 (setf (umls-col-av col) (aref av-field i))
107 (add-datatype-to-col col (datatype-for-col (umls-col-col col))))
108 (error "can't find column ~A" field)))))))))))
112 ;;; UMLS column/file functions
114 (defun find-col-in-columns (colname filename cols)
115 "Returns list of umls-col structure for a column name and a filename"
117 (when (and (string-equal filename (umls-col-fil col))
118 (string-equal colname (umls-col-col col)))
119 (return-from find-col-in-columns col)))
122 (defun find-or-make-col-in-columns (colname filename cols)
123 (let ((col (find-col-in-columns colname filename cols)))
126 ;; try to find column name without a terminal digit
127 (let* ((last-char (char colname (1- (length colname))))
128 (digit (- (char-code last-char) (char-code #\0))))
129 (if (and (>= digit 0) (<= digit 9))
130 (let ((base-colname (subseq colname 0 (1- (length colname)))))
131 (setq col (find-col-in-columns base-colname filename cols))
133 (let ((new-col (make-umls-col
134 :col (copy-seq colname)
135 :des (copy-seq (umls-col-des col))
136 :ref (copy-seq (umls-col-ref col))
137 :min (umls-col-min col)
138 :max (umls-col-max col)
139 :fil (copy-seq (umls-col-fil col))
140 :sqltype (copy-seq (umls-col-sqltype col))
141 :dty (copy-seq (umls-col-dty col))
142 :parsefunc (umls-col-parsefunc col)
143 :quotechar (copy-seq (umls-col-quotechar col))
144 :datatype (umls-col-datatype col)
145 :custom-value-func (umls-col-custom-value-func col))))
146 (push new-col *umls-cols*)
148 (error "Couldn't find a base column for col ~A in file ~A"
150 (let ((new-col (make-umls-col
151 :col (copy-seq colname)
159 :parsefunc #'add-sql-quotes
162 :custom-value-func nil)))
163 (push new-col *umls-cols*)
166 (defun find-umls-col (colname filename)
167 "Returns list of umls-col structure for a column name and a filename"
168 (find-or-make-col-in-columns colname filename *umls-cols*))
170 (defun find-umls-file (filename)
171 "Returns umls-file structure for a filename"
172 (find-if (lambda (f) (string-equal filename (umls-file-fil f))) *umls-files*))
174 (defun umls-cols-for-umls-file (file)
175 "Returns list of umls-cols for a file structure"
176 (let ((filename (umls-file-fil file)))
177 (mapcar (lambda (col) (find-umls-col col filename))
178 (umls-file-fields file))))