diff options
author | Phil Jones <philj56@gmail.com> | 2022-10-22 18:32:16 +0100 |
---|---|---|
committer | Phil Jones <philj56@gmail.com> | 2022-10-22 18:32:16 +0100 |
commit | 820fb11b9bb034a1ee2685ff9272f031d27dcd38 (patch) | |
tree | eed9e25434c2394dbfb0d7e0e4cc297694e00443 | |
parent | d1b94b4487c72b13320a281ad6a2fc291e4c79c7 (diff) |
Limit max line length for perfect fuzzy matching.
Fuzzy matching could previously take a very long time for long input
lines, as the number of possible matches quickly explodes. This commit
sets a line length limit of 100 characters - longer lines will only
return the first fuzzy match, rather than the best one.
-rw-r--r-- | src/fuzzy_match.c | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/src/fuzzy_match.c b/src/fuzzy_match.c index b52aa13..2b6518f 100644 --- a/src/fuzzy_match.c +++ b/src/fuzzy_match.c @@ -20,6 +20,7 @@ static int32_t fuzzy_match_recurse( const char *restrict pattern, const char *restrict str, int32_t score, + bool first_match_only, bool first_char); /* @@ -93,8 +94,32 @@ int32_t fuzzy_match(const char *restrict pattern, const char *restrict str) /* We can already penalise any unused letters. */ score += unmatched_letter_penalty * (int32_t)(slen - plen); + /* + * If the string is more than 100 characters, just find the first fuzzy + * match rather than the best. + * + * This is required as the number of possible matches (for patterns and + * strings all consisting of one letter) scales something like: + * + * slen! / (plen! (slen - plen)!) ~ slen^plen for plen << slen + * + * This quickly grinds everything to a halt. 100 is chosen fairly + * arbitrarily from the following logic: + * + * - e is the most common character in English, at around 13% of + * letters. Depending on the context, let's say this be up to 20%. + * - 100 * 0.20 = 20 repeats of the same character. + * - In the worst case here, 20! / (10! 10!) ~200,000 possible matches, + * which is "slow but not frozen" for my machine. + * + * In reality, this worst case shouldn't be hit, and finding the "best" + * fuzzy match in lines of text > 100 characters isn't really in scope + * for a dmenu clone. + */ + bool first_match_only = slen > 100; + /* Perform the match. */ - score = fuzzy_match_recurse(pattern, str, score, true); + score = fuzzy_match_recurse(pattern, str, score, first_match_only, true); return score; } @@ -112,6 +137,7 @@ int32_t fuzzy_match_recurse( const char *restrict pattern, const char *restrict str, int32_t score, + bool first_match_only, bool first_char) { if (*pattern == '\0') { @@ -137,9 +163,14 @@ int32_t fuzzy_match_recurse( utf8_next_char(pattern), utf8_next_char(match), compute_score(jump, first_char, match), + first_match_only, false); best_score = MAX(best_score, subscore); - match++; + match = utf8_next_char(match); + + if (first_match_only) { + break; + } } if (best_score == INT32_MIN) { |