tweak(cli): use fancier string-dist suggestion alg
To improve the quality of "did you mean?"-style suggestions, shift from using Ratcliff-Obershelp similarity to the Restricted Damerau-Levenshtein string distance (also known as Optimal String Alignment). This code is a translation of a Julia implementation that I wrote a while ago: https://github.com/tecosaur/DataToolkitBase.jl/blob/v0.4.1/src/model/utils.jl#L40-L107 See https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance and https://en.wikipedia.org/wiki/Gestalt_pattern_matching for more information on these algorithms.
This commit is contained in:
parent
1b906f6130
commit
bbf8934fd3
1 changed files with 63 additions and 26 deletions
|
@ -183,32 +183,69 @@ OPTIONS:
|
||||||
input (doom-cli-command-string (cdr command)))
|
input (doom-cli-command-string (cdr command)))
|
||||||
command)))))
|
command)))))
|
||||||
|
|
||||||
(defun doom-cli-help--similarity (s1 s2)
|
(defun doom-cli-help--similarity (a b)
|
||||||
;; Ratcliff-Obershelp similarity
|
(- 1 (/ (float (doom-cli-help--string-distance a b))
|
||||||
(let* ((s1 (downcase s1))
|
(max (length a) (length b)))))
|
||||||
(s2 (downcase s2))
|
|
||||||
(s1len (length s1))
|
(defun doom-cli-help--string-distance (a b)
|
||||||
(s2len (length s2)))
|
"Calculate the Restricted Damerau-Levenshtein distance between A and B.
|
||||||
(if (or (zerop s1len)
|
This is also known as the Optimal String Alignment algorithm.
|
||||||
(zerop s2len))
|
|
||||||
0.0
|
It is assumed that A and B are both strings, and before processing both are
|
||||||
(/ (let ((i 0) (j 0) (score 0) jlast)
|
converted to lowercase.
|
||||||
(while (< i s1len)
|
|
||||||
(unless jlast (setq jlast j))
|
This returns the minimum number of edits required to transform A
|
||||||
(if (and (< j s2len)
|
to B, where each edit is a deletion, insertion, substitution, or
|
||||||
(= (aref s1 i) (aref s2 j)))
|
transposition of a character, with the restriction that no
|
||||||
(progn (cl-incf score)
|
substring is edited more than once."
|
||||||
(cl-incf i)
|
(let ((a (downcase a))
|
||||||
(cl-incf j))
|
(b (downcase b))
|
||||||
(setq m 0)
|
(alen (length a))
|
||||||
(cl-incf j)
|
(blen (length b))
|
||||||
(when (>= j s2len)
|
(start 0))
|
||||||
(setq j (or jlast j)
|
(when (> alen blen)
|
||||||
jlast nil)
|
(let ((c a)
|
||||||
(cl-incf i))))
|
(clen alen))
|
||||||
(* 2.0 score))
|
(setq a b alen blen
|
||||||
(+ (length s1)
|
b c blen clen)))
|
||||||
(length s2))))))
|
(while (and (< start (min alen blen))
|
||||||
|
(= (aref a start) (aref b start)))
|
||||||
|
(cl-incf start))
|
||||||
|
(cl-decf start)
|
||||||
|
(if (= (1+ start) alen)
|
||||||
|
(- blen start)
|
||||||
|
(let ((v0 (make-vector (- blen start) 0))
|
||||||
|
(v1 (make-vector (- blen start) 0))
|
||||||
|
(a_i (aref a (max 0 start)))
|
||||||
|
(current 0)
|
||||||
|
a_i-1 b_j b_j-1
|
||||||
|
left transition-next
|
||||||
|
above this-transition)
|
||||||
|
(dotimes (vi (length v0))
|
||||||
|
(aset v0 vi (1+ vi)))
|
||||||
|
(dolist (i (number-sequence (1+ start) (1- alen)))
|
||||||
|
(setq a_i-1 a_i
|
||||||
|
a_i (aref a i)
|
||||||
|
b_j (aref b (max 0 start))
|
||||||
|
left (- i start 1)
|
||||||
|
current (- i start)
|
||||||
|
transition-next 0)
|
||||||
|
(dolist (j (number-sequence (1+ start) (1- blen)))
|
||||||
|
(setq b_j-1 b_j
|
||||||
|
b_j (aref b j)
|
||||||
|
above current
|
||||||
|
current left
|
||||||
|
this-transition transition-next
|
||||||
|
transition-next (aref v1 (- j start)))
|
||||||
|
(aset v1 (- j start) current)
|
||||||
|
(setq left (aref v0 (- j start)))
|
||||||
|
(unless (= a_i b_j)
|
||||||
|
;; Minimum between substitution, deletion, and insertion
|
||||||
|
(setq current (min (1+ current) (1+ above) (1+ left)))
|
||||||
|
(when (and (> i (1+ start)) (> j (1+ start)) (= a_i b_j-1) (= a_i-1 b_j))
|
||||||
|
(setq current (min current (cl-incf this-transition)))))
|
||||||
|
(aset v0 (- j start) current)))
|
||||||
|
current))))
|
||||||
|
|
||||||
;;; Help: printers
|
;;; Help: printers
|
||||||
;; TODO Parameterize optional args with `cl-defun'
|
;; TODO Parameterize optional args with `cl-defun'
|
||||||
|
|
Loading…
Reference in a new issue