X-Git-Url: http://git.kpe.io/?p=kmrcl.git;a=blobdiff_plain;f=strmatch.lisp;h=e48e2309728be96d07adbdbc2416e09cafbb94a6;hp=a56f3bf75930677c54a82d707991d7a934fc50a0;hb=6bdd43a1b1daa21561706ba5213e1442e4971351;hpb=5738e60dc3724dc7d022d0fd2d5f2dbe337be470 diff --git a/strmatch.lisp b/strmatch.lisp index a56f3bf..e48e230 100644 --- a/strmatch.lisp +++ b/strmatch.lisp @@ -7,7 +7,7 @@ ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Apr 2000 ;;;; -;;;; $Id: strmatch.lisp,v 1.1 2003/07/21 00:52:56 kevin Exp $ +;;;; $Id$ ;;;; ;;;; This file, part of KMRCL, is Copyright (c) 2002 by Kevin M. Rosenberg ;;;; @@ -20,55 +20,61 @@ (defun score-multiword-match (s1 s2) - "Score a match between two strings with s1 being reference string" - (let* ((word-list-1 (split-alphanumeric-string s1)) - (word-list-2 (split-alphanumeric-string s2)) - (n1 (length word-list-1)) - (n2 (length word-list-2)) - (unmatched n1) - (score 0) - (nlong 0) - (nshort 0) - short-list long-list) - (declare (fixnum n1 n2 nshort nlong score unmatched)) - (if (> n1 n2) - (progn - (setq nlong n1) - (setq nshort n2) - (setq long-list word-list-1) - (setq short-list word-list-2)) - (progn - (setq nlong n2) - (setq nshort n1) - (setq long-list word-list-2) - (setq short-list word-list-1))) - (decf score (* 3 (- nlong nshort))) ;; reduce score for extra words - (dotimes (iword nshort) + "Score a match between two strings with s1 being reference string. +S1 can be a string or a list or strings/conses" + (let* ((word-list-1 (if (stringp s1) + (split-alphanumeric-string s1) + s1)) + (word-list-2 (split-alphanumeric-string s2)) + (n1 (length word-list-1)) + (n2 (length word-list-2)) + (unmatched n1) + (score 0)) + (declare (fixnum n1 n2 score unmatched)) + (decf score (* 4 (abs (- n1 n2)))) + (dotimes (iword n1) (declare (fixnum iword)) - (kmrcl:aif (position (nth iword short-list) long-list :test #'string-equal) - (progn - (incf score (- 20 (abs (- kmrcl::it iword)))) - (decf unmatched)))) - (decf score (* 5 unmatched)) + (let ((w1 (nth iword word-list-1)) + pos) + (cond + ((consp w1) + (let ((first t)) + (dotimes (i-alt (length w1)) + (setq pos + (position (nth i-alt w1) word-list-2 + :test #'string-equal)) + (when pos + (incf score (- 30 + (if first 0 5) + (abs (- iword pos)))) + (decf unmatched) + (return)) + (setq first nil)))) + ((stringp w1) + (kmrcl:awhen (position w1 word-list-2 + :test #'string-equal) + (incf score (- 30 (abs (- kmrcl::it iword)))) + (decf unmatched)))))) + (decf score (* 4 unmatched)) score)) (defun multiword-match (s1 s2) "Matches two multiword strings, ignores case, word position, punctuation" (let* ((word-list-1 (split-alphanumeric-string s1)) - (word-list-2 (split-alphanumeric-string s2)) - (n1 (length word-list-1)) - (n2 (length word-list-2))) + (word-list-2 (split-alphanumeric-string s2)) + (n1 (length word-list-1)) + (n2 (length word-list-2))) (when (= n1 n2) ;; remove each word from word-list-2 as walk word-list-1 (dolist (w word-list-1) - (let ((p (position w word-list-2 :test #'string-equal))) - (unless p - (return-from multiword-match nil)) - (setf (nth p word-list-2) ""))) + (let ((p (position w word-list-2 :test #'string-equal))) + (unless p + (return-from multiword-match nil)) + (setf (nth p word-list-2) ""))) t))) - - - + + +