diff options
author | Phil Jones <philj56@gmail.com> | 2023-04-17 23:43:05 +0100 |
---|---|---|
committer | Phil Jones <philj56@gmail.com> | 2023-04-17 23:43:05 +0100 |
commit | 574eff0df1aff9bdc6d32939a03312cc08803de3 (patch) | |
tree | 5aeca72f70314bee3bf95db99f10d89f0a7b4032 | |
parent | 71a4801d20d8904cfcfa5e92c96d53ee06a2c69f (diff) |
Add --matching-algorithm option.
This replaces the --fuzzy-match algorithm. Available choices are normal,
prefix and fuzzy. Levenshtein distance was investigated, but it seems
pretty rubbish for tofi's use case, where you normally want a good match when
you've only typed a small portion of the target string.
-rw-r--r-- | completions/tofi | 1 | ||||
-rw-r--r-- | doc/tofi.5.md | 14 | ||||
-rw-r--r-- | doc/tofi.5.scd | 13 | ||||
-rw-r--r-- | meson.build | 4 | ||||
-rw-r--r-- | src/config.c | 30 | ||||
-rw-r--r-- | src/desktop_vec.c | 21 | ||||
-rw-r--r-- | src/desktop_vec.h | 3 | ||||
-rw-r--r-- | src/fuzzy_match.h | 10 | ||||
-rw-r--r-- | src/input.c | 8 | ||||
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/matching.c (renamed from src/fuzzy_match.c) | 79 | ||||
-rw-r--r-- | src/matching.h | 14 | ||||
-rw-r--r-- | src/string_vec.c | 10 | ||||
-rw-r--r-- | src/string_vec.h | 3 | ||||
-rw-r--r-- | src/tofi.h | 3 | ||||
-rw-r--r-- | test/config.c | 6 | ||||
-rw-r--r-- | test/utf8.c | 36 |
17 files changed, 182 insertions, 75 deletions
diff --git a/completions/tofi b/completions/tofi index 3983090..e588694 100644 --- a/completions/tofi +++ b/completions/tofi @@ -76,6 +76,7 @@ _tofi() --hide-cursor --history --history-file + --matching-algorithm --fuzzy-match --require-match --auto-accept-single diff --git a/doc/tofi.5.md b/doc/tofi.5.md index e976ef7..30ffb98 100644 --- a/doc/tofi.5.md +++ b/doc/tofi.5.md @@ -73,8 +73,22 @@ options. > > > > > > tofi-drun: *\$XDG_STATE_HOME/tofi-drun-history* +**matching-algorithm**=*normal\|prefix\|fuzzy* + +> Select the matching algorithm used. If *normal*, substring matching is +> used, weighted to favour matches closer to the beginning of the +> string. If *prefix*, only substrings at the beginning of the string +> are matched. If *fuzzy*, searching is performed via a simple fuzzy +> matching algorithm. +> +> Default: normal + **fuzzy-match**=*true\|false* +> **WARNING**: This option is deprecated, and may be removed in a future +> version of tofi. You should use the **matching-algorithm** option +> instead. +> > If true, searching is performed via a simple fuzzy matching algorithm. > If false, substring matching is used, weighted to favour matches > closer to the beginning of the string. diff --git a/doc/tofi.5.scd b/doc/tofi.5.scd index 29547e0..013dc57 100644 --- a/doc/tofi.5.scd +++ b/doc/tofi.5.scd @@ -61,8 +61,21 @@ options. - tofi-run: _$XDG_STATE_HOME/tofi-history_ - tofi-drun: _$XDG_STATE_HOME/tofi-drun-history_ +*matching-algorithm*=_normal|prefix|fuzzy_ + Select the matching algorithm used. + If _normal_, substring matching is used, weighted to favour matches + closer to the beginning of the string. + If _prefix_, only substrings at the beginning of the string are matched. + If _fuzzy_, searching is performed via a simple fuzzy matching + algorithm. + + Default: normal *fuzzy-match*=_true|false_ + *WARNING*: This option is deprecated, and may be removed in a future + version of tofi. You should use the *matching-algorithm* + option instead. + If true, searching is performed via a simple fuzzy matching algorithm. If false, substring matching is used, weighted to favour matches closer to the beginning of the string. diff --git a/meson.build b/meson.build index 71f256d..0aa4949 100644 --- a/meson.build +++ b/meson.build @@ -104,7 +104,7 @@ common_sources = files( 'src/entry.c', 'src/entry_backend/pango.c', 'src/entry_backend/harfbuzz.c', - 'src/fuzzy_match.c', + 'src/matching.c', 'src/history.c', 'src/input.c', 'src/lock.c', @@ -121,7 +121,7 @@ common_sources = files( compgen_sources = files( 'src/main_compgen.c', 'src/compgen.c', - 'src/fuzzy_match.c', + 'src/matching.c', 'src/log.c', 'src/mkdirp.c', 'src/string_vec.c', diff --git a/src/config.c b/src/config.c index b78e3af..2b85028 100644 --- a/src/config.c +++ b/src/config.c @@ -79,6 +79,8 @@ static uint32_t fixup_percentage(uint32_t value, uint32_t base, bool is_percent) static uint32_t parse_anchor(const char *filename, size_t lineno, const char *str, bool *err); static enum cursor_style parse_cursor_style(const char *filename, size_t lineno, const char *str, bool *err); +static enum matching_algorithm parse_matching_algorithm(const char *filename, size_t lineno, const char *str, bool *err); + static bool parse_bool(const char *filename, size_t lineno, const char *str, bool *err); static uint32_t parse_char(const char *filename, size_t lineno, const char *str, bool *err); static struct color parse_color(const char *filename, size_t lineno, const char *str, bool *err); @@ -683,10 +685,18 @@ bool parse_option(struct tofi *tofi, const char *filename, size_t lineno, const } } else if (strcasecmp(option, "history-file") == 0) { snprintf(tofi->history_file, N_ELEM(tofi->history_file), "%s", value); + } else if (strcasecmp(option, "matching-algorithm") == 0) { + enum matching_algorithm val = parse_matching_algorithm(filename, lineno, value, &err); + if (!err) { + tofi->matching_algorithm= val; + } } else if (strcasecmp(option, "fuzzy-match") == 0) { + log_warning("The \"fuzzy-match\" option is deprecated, and may be removed in future. Please switch to \"matching-algorithm\".\n"); bool val = parse_bool(filename, lineno, value, &err); if (!err) { - tofi->fuzzy_match = val; + if (val) { + tofi->matching_algorithm = MATCHING_ALGORITHM_FUZZY; + } } } else if (strcasecmp(option, "require-match") == 0) { bool val = parse_bool(filename, lineno, value, &err); @@ -983,6 +993,24 @@ enum cursor_style parse_cursor_style(const char *filename, size_t lineno, const return 0; } +enum matching_algorithm parse_matching_algorithm(const char *filename, size_t lineno, const char *str, bool *err) +{ + if(strcasecmp(str, "normal") == 0) { + return MATCHING_ALGORITHM_NORMAL; + } + if(strcasecmp(str, "fuzzy") == 0) { + return MATCHING_ALGORITHM_FUZZY; + } + if(strcasecmp(str, "prefix") == 0) { + return MATCHING_ALGORITHM_PREFIX; + } + PARSE_ERROR(filename, lineno, "Invalid matching algorithm \"%s\".\n", str); + if (err) { + *err = true; + } + return 0; +} + struct color parse_color(const char *filename, size_t lineno, const char *str, bool *err) { struct color color = hex_to_color(str); diff --git a/src/desktop_vec.c b/src/desktop_vec.c index c9a0b86..1fd0b41 100644 --- a/src/desktop_vec.c +++ b/src/desktop_vec.c @@ -1,7 +1,7 @@ #include <glib.h> #include <stdbool.h> #include "desktop_vec.h" -#include "fuzzy_match.h" +#include "matching.h" #include "log.h" #include "string_vec.h" #include "unicode.h" @@ -150,31 +150,20 @@ struct desktop_entry *desktop_vec_find_sorted(struct desktop_vec *restrict vec, struct string_ref_vec desktop_vec_filter( const struct desktop_vec *restrict vec, const char *restrict substr, - bool fuzzy) + enum matching_algorithm algorithm) { struct string_ref_vec filt = string_ref_vec_create(); for (size_t i = 0; i < vec->count; i++) { int32_t search_score; - if (fuzzy) { - search_score = fuzzy_match_words(substr, vec->buf[i].name); - } else { - search_score = fuzzy_match_simple_words(substr, vec->buf[i].name); - } + search_score = match_words(algorithm, substr, vec->buf[i].name); if (search_score != INT32_MIN) { string_ref_vec_add(&filt, vec->buf[i].name); - /* - * Store the position of the match in the string as - * its search_score, for later sorting. - */ + /* Store the score of the match for later sorting. */ filt.buf[filt.count - 1].search_score = search_score; filt.buf[filt.count - 1].history_score = vec->buf[i].history_score; } else { /* If we didn't match the name, check the keywords. */ - if (fuzzy) { - search_score = fuzzy_match_words(substr, vec->buf[i].keywords); - } else { - search_score = fuzzy_match_simple_words(substr, vec->buf[i].keywords); - } + search_score = match_words(algorithm, substr, vec->buf[i].keywords); if (search_score != INT32_MIN) { string_ref_vec_add(&filt, vec->buf[i].name); /* diff --git a/src/desktop_vec.h b/src/desktop_vec.h index f8d0b79..cdc0d6c 100644 --- a/src/desktop_vec.h +++ b/src/desktop_vec.h @@ -5,6 +5,7 @@ #include <stddef.h> #include <stdio.h> #include <stdint.h> +#include "matching.h" struct desktop_entry { char *id; @@ -37,7 +38,7 @@ struct desktop_entry *desktop_vec_find_sorted(struct desktop_vec *restrict vec, struct string_ref_vec desktop_vec_filter( const struct desktop_vec *restrict vec, const char *restrict substr, - bool fuzzy); + enum matching_algorithm algorithm); struct desktop_vec desktop_vec_load(FILE *file); void desktop_vec_save(struct desktop_vec *restrict vec, FILE *restrict file); diff --git a/src/fuzzy_match.h b/src/fuzzy_match.h deleted file mode 100644 index d427bc6..0000000 --- a/src/fuzzy_match.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef FUZZY_MATCH_H -#define FUZZY_MATCH_H - -#include <stdint.h> - -int32_t fuzzy_match_simple_words(const char *restrict patterns, const char *restrict str); -int32_t fuzzy_match_words(const char *restrict patterns, const char *restrict str); -int32_t fuzzy_match(const char *restrict pattern, const char *restrict str); - -#endif /* FUZZY_MATCH_H */ diff --git a/src/input.c b/src/input.c index 1049fa9..ad89dbd 100644 --- a/src/input.c +++ b/src/input.c @@ -148,12 +148,12 @@ void add_character(struct tofi *tofi, xkb_keycode_t keycode) entry->input_utf8_length += len; if (entry->drun) { - struct string_ref_vec results = desktop_vec_filter(&entry->apps, entry->input_utf8, tofi->fuzzy_match); + struct string_ref_vec results = desktop_vec_filter(&entry->apps, entry->input_utf8, tofi->matching_algorithm); string_ref_vec_destroy(&entry->results); entry->results = results; } else { struct string_ref_vec tmp = entry->results; - entry->results = string_ref_vec_filter(&entry->results, entry->input_utf8, tofi->fuzzy_match); + entry->results = string_ref_vec_filter(&entry->results, entry->input_utf8, tofi->matching_algorithm); string_ref_vec_destroy(&tmp); } @@ -186,9 +186,9 @@ void input_refresh_results(struct tofi *tofi) entry->input_utf8_length = bytes_written; string_ref_vec_destroy(&entry->results); if (entry->drun) { - entry->results = desktop_vec_filter(&entry->apps, entry->input_utf8, tofi->fuzzy_match); + entry->results = desktop_vec_filter(&entry->apps, entry->input_utf8, tofi->matching_algorithm); } else { - entry->results = string_ref_vec_filter(&entry->commands, entry->input_utf8, tofi->fuzzy_match); + entry->results = string_ref_vec_filter(&entry->commands, entry->input_utf8, tofi->matching_algorithm); } reset_selection(tofi); @@ -834,7 +834,6 @@ static void usage(bool err) " --output <name> Name of output to display window on.\n" " --anchor <position> Location on screen to anchor window.\n" " --horizontal <true|false> List results horizontally.\n" -" --fuzzy-match <true|false> Use fuzzy matching for searching.\n" "\n" "All options listed in \"man 5 tofi\" are also accpted in the form \"--key=value\".\n" ); @@ -912,6 +911,7 @@ const struct option long_options[] = { {"history", required_argument, NULL, 0}, {"history-file", required_argument, NULL, 0}, {"fuzzy-match", required_argument, NULL, 0}, + {"matching-algorithm", required_argument, NULL, 0}, {"require-match", required_argument, NULL, 0}, {"auto-accept-single", required_argument, NULL, 0}, {"hide-input", required_argument, NULL, 0}, diff --git a/src/fuzzy_match.c b/src/matching.c index e9f1dcb..b0d184f 100644 --- a/src/fuzzy_match.c +++ b/src/matching.c @@ -4,17 +4,28 @@ #include <stdint.h> #include <string.h> -#include "fuzzy_match.h" +#include "matching.h" #include "unicode.h" #include "xmalloc.h" #undef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) -static int32_t compute_score( - int32_t jump, - bool first_char, - const char *restrict match); +static int32_t simple_match_words( + const char *restrict patterns, + const char *restrict str); + +static int32_t prefix_match_words( + const char *restrict patterns, + const char *restrict str); + +static int32_t fuzzy_match_words( + const char *restrict patterns, + const char *restrict str); + +static int32_t fuzzy_match( + const char *restrict pattern, + const char *restrict str); static int32_t fuzzy_match_recurse( const char *restrict pattern, @@ -23,12 +34,39 @@ static int32_t fuzzy_match_recurse( bool first_match_only, bool first_char); +static int32_t compute_score( + int32_t jump, + bool first_char, + const char *restrict match); + +/* + * Select the appropriate algorithm, and return its score. + * Each algorithm returns larger scores for better matches, + * and returns INT32_MIN if a word is not found. + */ +int32_t match_words( + enum matching_algorithm algorithm, + const char *restrict patterns, + const char *restrict str) +{ + switch (algorithm) { + case MATCHING_ALGORITHM_NORMAL: + return simple_match_words(patterns, str); + case MATCHING_ALGORITHM_PREFIX: + return prefix_match_words(patterns, str); + case MATCHING_ALGORITHM_FUZZY: + return fuzzy_match_words(patterns, str); + default: + return INT32_MIN; + } +} + /* * Split patterns into words, and perform simple matching against str for each. - * Returns the sum of substring distances from the start of str. + * Returns the negative sum of substring distances from the start of str. * If a word is not found, returns INT32_MIN. */ -int32_t fuzzy_match_simple_words(const char *restrict patterns, const char *restrict str) +int32_t simple_match_words(const char *restrict patterns, const char *restrict str) { int32_t score = 0; char *saveptr = NULL; @@ -40,7 +78,32 @@ int32_t fuzzy_match_simple_words(const char *restrict patterns, const char *rest score = INT32_MIN; break; } else { - score += str - c; + score -= c - str; + } + pattern = strtok_r(NULL, " ", &saveptr); + } + free(tmp); + return score; +} + +/* + * Split patterns into words, and perform prefix matching against str for each. + * Returns the negative sum of remaining string suffix lengths. + * If a word is not found, returns INT32_MIN. + */ +int32_t prefix_match_words(const char *restrict patterns, const char *restrict str) +{ + int32_t score = 0; + char *saveptr = NULL; + char *tmp = utf8_normalize(patterns); + char *pattern = strtok_r(tmp, " ", &saveptr); + while (pattern != NULL) { + char *c = utf8_strcasestr(str, pattern); + if (c != str) { + score = INT32_MIN; + break; + } else { + score -= utf8_strlen(str) - utf8_strlen(pattern); } pattern = strtok_r(NULL, " ", &saveptr); } diff --git a/src/matching.h b/src/matching.h new file mode 100644 index 0000000..f619804 --- /dev/null +++ b/src/matching.h @@ -0,0 +1,14 @@ +#ifndef MATCHING_H +#define MATCHING_H + +#include <stdint.h> + +enum matching_algorithm { + MATCHING_ALGORITHM_NORMAL, + MATCHING_ALGORITHM_PREFIX, + MATCHING_ALGORITHM_FUZZY +}; + +int32_t match_words(enum matching_algorithm algorithm, const char *restrict patterns, const char *restrict str); + +#endif /* MATCHING_H */ diff --git a/src/string_vec.c b/src/string_vec.c index 039d191..7a8493f 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -5,8 +5,8 @@ #include <stdlib.h> #include <string.h> #include <sys/mman.h> -#include "fuzzy_match.h" #include "history.h" +#include "matching.h" #include "string_vec.h" #include "unicode.h" #include "xmalloc.h" @@ -181,7 +181,7 @@ struct scored_string_ref *string_ref_vec_find_sorted(struct string_ref_vec *rest struct string_ref_vec string_ref_vec_filter( const struct string_ref_vec *restrict vec, const char *restrict substr, - bool fuzzy) + enum matching_algorithm algorithm) { if (substr[0] == '\0') { return string_ref_vec_copy(vec); @@ -189,11 +189,7 @@ struct string_ref_vec string_ref_vec_filter( struct string_ref_vec filt = string_ref_vec_create(); for (size_t i = 0; i < vec->count; i++) { int32_t search_score; - if (fuzzy) { - search_score = fuzzy_match_words(substr, vec->buf[i].string); - } else { - search_score = fuzzy_match_simple_words(substr, vec->buf[i].string); - } + search_score = match_words(algorithm, substr, vec->buf[i].string); if (search_score != INT32_MIN) { string_ref_vec_add(&filt, vec->buf[i].string); filt.buf[filt.count - 1].search_score = search_score; diff --git a/src/string_vec.h b/src/string_vec.h index 6f54d56..4d77ad3 100644 --- a/src/string_vec.h +++ b/src/string_vec.h @@ -6,6 +6,7 @@ #include <stdint.h> #include <stdio.h> #include "history.h" +#include "matching.h" struct scored_string { char *string; @@ -73,7 +74,7 @@ struct scored_string_ref *string_ref_vec_find_sorted(struct string_ref_vec *rest struct string_ref_vec string_ref_vec_filter( const struct string_ref_vec *restrict vec, const char *restrict substr, - bool fuzzy); + enum matching_algorithm algorithm); [[nodiscard("memory leaked")]] struct string_ref_vec string_ref_vec_from_buffer(char *buffer); @@ -8,6 +8,7 @@ #include "clipboard.h" #include "color.h" #include "entry.h" +#include "matching.h" #include "surface.h" #include "wlr-layer-shell-unstable-v1.h" #include "fractional-scale-v1.h" @@ -90,6 +91,7 @@ struct tofi { /* Options */ uint32_t anchor; + enum matching_algorithm matching_algorithm; bool ascii_input; bool hide_cursor; bool use_history; @@ -97,7 +99,6 @@ struct tofi { bool late_keyboard_init; bool drun_launch; bool drun_print_exec; - bool fuzzy_match; bool require_match; bool auto_accept_single; bool multiple_instance; diff --git a/test/config.c b/test/config.c index 79f90e5..c15d66d 100644 --- a/test/config.c +++ b/test/config.c @@ -45,6 +45,12 @@ int main(int argc, char *argv[]) is_valid("text-cursor-style", "underscore", "Text cursor underscore"); isnt_valid("text-cursor-style", "blocky", "Invalid text cursor style"); + /* Matching algorithms */ + is_valid("matching-algorithm", "normal", "Normal matching"); + is_valid("matching-algorithm", "fuzzy", "Fuzzy matching"); + is_valid("matching-algorithm", "prefix", "Prefix matching"); + isnt_valid("matching-algorithm", "regex", "Regex matching"); + /* Bools */ is_valid("horizontal", "tRuE", "Boolean true"); is_valid("horizontal", "fAlSe", "Boolean false"); diff --git a/test/utf8.c b/test/utf8.c index f18eca0..5d75cb6 100644 --- a/test/utf8.c +++ b/test/utf8.c @@ -3,43 +3,33 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include "fuzzy_match.h" +#include "matching.h" #include "tap.h" -void is_simple_match(const char *pattern, const char *str, const char *message) +void is_single_match(enum matching_algorithm algorithm, const char *pattern, const char *str, const char *message) { - int32_t res = fuzzy_match_simple_words(pattern, str); + int32_t res = match_words(algorithm, pattern, str); tap_isnt(res, INT32_MIN, message); } -void isnt_simple_match(const char *pattern, const char *str, const char *message) +void isnt_single_match(enum matching_algorithm algorithm, const char *pattern, const char *str, const char *message) { - int32_t res = fuzzy_match_simple_words(pattern, str); - tap_is(res, INT32_MIN, message); -} - -void is_fuzzy_match(const char *pattern, const char *str, const char *message) -{ - int32_t res = fuzzy_match_words(pattern, str); - tap_isnt(res, INT32_MIN, message); -} - -void isnt_fuzzy_match(const char *pattern, const char *str, const char *message) -{ - int32_t res = fuzzy_match_words(pattern, str); + int32_t res = match_words(algorithm, pattern, str); tap_is(res, INT32_MIN, message); } void is_match(const char *pattern, const char *str, const char *message) { - is_simple_match(pattern, str, message); - is_fuzzy_match(pattern, str, message); + is_single_match(MATCHING_ALGORITHM_NORMAL, pattern, str, message); + is_single_match(MATCHING_ALGORITHM_PREFIX, pattern, str, message); + is_single_match(MATCHING_ALGORITHM_FUZZY, pattern, str, message); } void isnt_match(const char *pattern, const char *str, const char *message) { - isnt_simple_match(pattern, str, message); - isnt_fuzzy_match(pattern, str, message); + isnt_single_match(MATCHING_ALGORITHM_NORMAL, pattern, str, message); + isnt_single_match(MATCHING_ALGORITHM_PREFIX, pattern, str, message); + isnt_single_match(MATCHING_ALGORITHM_FUZZY, pattern, str, message); } int main(int argc, char *argv[]) @@ -56,9 +46,9 @@ int main(int argc, char *argv[]) /* Combining diacritics. */ isnt_match("o", "ọ", "Single character with composed diacritic"); - isnt_simple_match("ạ", "aọ", "Decomposed diacritics, character mismatch"); + isnt_single_match(MATCHING_ALGORITHM_NORMAL, "ạ", "aọ", "Decomposed diacritics, character mismatch"); tap_todo("Needs composed character comparison"); - isnt_fuzzy_match("ạ", "aọ", "Decomposed diacritics, character mismatch"); + isnt_single_match(MATCHING_ALGORITHM_FUZZY, "ạ", "aọ", "Decomposed diacritics, character mismatch"); tap_plan(); |