strutil/subsequence: rank exact matches above non-exact matches

Signed-off-by: Julien Pivotto <291750+roidelapluie@users.noreply.github.com>
2026-06-11 09:30:13 -04:00 · 2026-04-24 12:25:09 +02:00 · 2026-04-24 12:25:09 +02:00 · a597d2250a
commit a597d2250a
parent 551b5b1c56
2 changed files with 37 additions and 23 deletions
--- a/util/strutil/subsequence.go
+++ b/util/strutil/subsequence.go
@ -19,6 +19,9 @@ package strutil

 import "strings"

+// Non-exact matches are scaled below 1.0 so rounded scores stay distinguishable from exact matches.
+const subsequenceNonExactScoreScale = 0.999
+
 // SubsequenceMatcher pre-computes the encoding of a fixed search pattern so
 // that it can be scored against many candidate strings without repeating the
 // ASCII check or rune conversion on the pattern for every call. The first
@ -52,7 +55,7 @@ func NewSubsequenceMatcher(pattern string) *SubsequenceMatcher {
 // text in more than one way.
 //
 // The raw scoring formula is: Σ(interval_size²) − Σ(gap_size / text_length) − trailing_gap / (2 * text_length).
-// The result is normalized by pattern_length² (the maximum possible raw score).
+// The result is normalized by pattern_length² and scaled below 1.0 for non-exact matches.
 func (m *SubsequenceMatcher) Score(text string) float64 {
 	if m.pattern == "" {
 		return 1.0
@ -184,7 +187,7 @@ func matchSubsequenceString(pattern, text string) float64 {
 	if bestScore < 0 {
 		return 0.0
 	}
-	return bestScore / float64(patternLen*patternLen)
+	return normalizeSubsequenceScore(bestScore, patternLen)
 }

 // matchSubsequenceRunes implements the scoring algorithm over pre-converted
@ -267,6 +270,10 @@ func matchSubsequenceRunes(patternSlice, textSlice []rune) float64 {
 		return 0.0
 	}

-	// Normalize by pattern_length² (the maximum possible raw score).
-	return bestScore / float64(patternLen*patternLen)
+	return normalizeSubsequenceScore(bestScore, patternLen)
+}
+
+func normalizeSubsequenceScore(rawScore float64, patternLen int) float64 {
+	score := rawScore / float64(patternLen*patternLen)
+	return score * subsequenceNonExactScoreScale
 }
--- a/util/strutil/subsequence_test.go
+++ b/util/strutil/subsequence_test.go
@ -135,28 +135,35 @@ func TestSubsequenceScore(t *testing.T) {
 			name:    "prefix match",
 			pattern: "my",
 			text:    "my awesome text",
-			// intervals [0,1], leading=0, trailing=13. raw = 4 - 13/30, normalized by 4.
-			wantScore: 107.0 / 120.0,
+			// Intervals [0,1], leading=0, trailing=13. raw = 4 - 13/30, normalized by 4.
+			wantScore: 107.0 / 120.0 * 0.999,
 		},
 		{
 			name:    "substring match",
 			pattern: "tex",
 			text:    "my awesome text",
-			// intervals [11,13], leading=11, trailing=1. raw = 9 - 11/15 - 1/30, normalized by 9.
-			wantScore: 247.0 / 270.0,
+			// Intervals [11,13], leading=11, trailing=1. raw = 9 - 11/15 - 1/30, normalized by 9.
+			wantScore: 247.0 / 270.0 * 0.999,
 		},
 		{
 			name:    "fuzzy match picks best starting position",
 			pattern: "met",
 			text:    "my awesome text",
-			// intervals [8,9] and [11,11], leading=8, inner gap=1, trailing=3. raw = 5 - 9/15 - 3/30, normalized by 9.
-			wantScore: 43.0 / 90.0,
+			// Intervals [8,9] and [11,11], leading=8, inner gap=1, trailing=3. raw = 5 - 9/15 - 3/30, normalized by 9.
+			wantScore: 43.0 / 90.0 * 0.999,
 		},
 		{
 			name:      "prefers later position with better consecutive run",
 			pattern:   "bac",
 			text:      "babac",
-			wantScore: 43.0 / 45.0, // match at [2,4], leading gap=2, trailing=0. raw = 9 - 2/5, normalized by 9.
+			wantScore: 43.0 / 45.0 * 0.999, // Match at [2,4], leading gap=2, trailing=0. raw = 9 - 2/5, normalized by 9.
+		},
+		{
+			name:    "longer prefix match stays below exact match",
+			pattern: "handler1",
+			text:    "handler10",
+			// Intervals [0,7], leading=0, trailing=1. raw = 64 - 1/18, normalized by 64 and scaled.
+			wantScore: 1149849.0 / 1152000.0,
 		},
 		{
 			name:     "pattern longer than text",
@ -192,8 +199,8 @@ func TestSubsequenceScore(t *testing.T) {
 			name:    "unicode prefix match",
 			pattern: "éà",
 			text:    "éàü",
-			// intervals [0,1], leading=0, trailing=1. raw = 4 - 1/6, normalized by 4.
-			wantScore: 23.0 / 24.0,
+			// Intervals [0,1], leading=0, trailing=1. raw = 4 - 1/6, normalized by 4.
+			wantScore: 23.0 / 24.0 * 0.999,
 		},
 		{
 			name:     "unicode no match",
@ -211,16 +218,16 @@ func TestSubsequenceScore(t *testing.T) {
 			name:    "unicode fuzzy match with gap between intervals",
 			pattern: "éü",
 			text:    "éàü",
-			// intervals [0,0] and [2,2], leading=0, inner gap=1, trailing=0.
-			// raw = 1 + 1 - 1/3, normalized by 4.
-			wantScore: 5.0 / 12.0,
+			// Intervals [0,0] and [2,2], leading=0, inner gap=1, trailing=0.
+			// Raw = 1 + 1 - 1/3, normalized by 4.
+			wantScore: 5.0 / 12.0 * 0.999,
 		},
 		{
 			name:    "mixed ascii and unicode",
 			pattern: "aé",
 			text:    "aéb",
-			// intervals [0,1], leading=0, trailing=1. raw = 4 - 1/6, normalized by 4.
-			wantScore: 23.0 / 24.0,
+			// Intervals [0,1], leading=0, trailing=1. raw = 4 - 1/6, normalized by 4.
+			wantScore: 23.0 / 24.0 * 0.999,
 		},
 		{
 			name: "unicode chars sharing leading utf-8 byte do not match",
@ -241,18 +248,18 @@ func TestSubsequenceScore(t *testing.T) {
 			pattern: "oa",
 			text:    "goat",
 			// 'o'(1),'a'(2) form one interval [1,2], leading gap=1, trailing=1.
-			// raw = 2² - 1/4 - 1/8 = 29/8, normalized by 2² = 4.
-			wantScore: 29.0 / 32.0,
+			// Raw = 2² - 1/4 - 1/8 = 29/8, normalized by 2² = 4.
+			wantScore: 29.0 / 32.0 * 0.999,
 		},
 		{
 			name:    "repeated chars use greedy match",
 			pattern: "abaa",
 			text:    "abbaa",
 			// Matches 'a'(0),'b'(1),'a'(3),'a'(4) as intervals [0,1] and [3,4].
-			// raw = 2² + 2² - 1/5, normalized by 4² = 16.
+			// Raw = 2² + 2² - 1/5, normalized by 4² = 16.
 			// A better match exists at 'a'(0),'b'(2),'a'(3),'a'(4), which would score 49/80,
 			// but this test documents the current greedy behavior.
-			wantScore: 39.0 / 80.0,
+			wantScore: 39.0 / 80.0 * 0.999,
 		},
 	}

@ -271,7 +278,7 @@ func TestSubsequenceScore(t *testing.T) {
 func TestSubsequenceScoreProperties(t *testing.T) {
 	// Prefix match scores below 1.0; only exact match scores 1.0.
 	// "pro" in "prometheus": intervals [0,2], trailing=7. raw = 9 - 7/20, normalized by 9.
-	require.InDelta(t, 173.0/180.0, NewSubsequenceMatcher("pro").Score("prometheus"), 1e-9)
+	require.InDelta(t, 173.0/180.0*0.999, NewSubsequenceMatcher("pro").Score("prometheus"), 1e-9)

 	// Exact match always scores 1.0.
 	require.Equal(t, 1.0, NewSubsequenceMatcher("prometheus").Score("prometheus"))