diff options
Diffstat (limited to 'src/fuzzy_match.c')
-rw-r--r-- | src/fuzzy_match.c | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/src/fuzzy_match.c b/src/fuzzy_match.c new file mode 100644 index 0000000..1283151 --- /dev/null +++ b/src/fuzzy_match.c @@ -0,0 +1,92 @@ +#include <ctype.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include "fuzzy_match.h" + +#undef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* + * Returns score if each character in pattern is found sequentially within str. + * Returns INT32_MIN otherwise. + * + * The scoring system is taken from fts_fuzzy_match v0.1.0 by Forrest Smith, + * which is licensed to the public domain. + * + * The factors affecting score are: + * - Bonuses: + * - If there are multiple adjacent matches. + * - If a match occurs after a separator character. + * - If a match is uppercase, and the previous character is lowercase. + * + * - Penalties: + * - If there are letters before the first match. + * - If there are superfluous characters in str. + */ +int32_t fuzzy_match(const char *restrict pattern, const char *restrict str) +{ + if (*pattern == '\0') { + return 0; + } + if (strlen(str) < strlen(pattern)) { + return INT32_MIN; + } + + const int adjacency_bonus = 5; + const int separator_bonus = 10; + const int camel_bonus = 10; + + const int leading_letter_penalty = -3; + const int max_leading_letter_penalty = -9; + const int unmatched_letter_penalty = -1; + + int32_t score = 0; + const char *last_match = str; + + /* + * Iterate through pattern, jumping to the next matching character in + * str. This will only ever find the first fuzzy match, not the best, + * but it's good enough and very quick. + */ + for (size_t pidx = 0; pattern[pidx] != '\0'; pidx++) { + char search[2] = { pattern[pidx], '\0' }; + char *match = strcasestr(last_match, search); + if (match == NULL) { + return INT32_MIN; + } + + int32_t jump = match - last_match; + + /* Apply bonuses. */ + if (pidx > 0 && jump == 0) { + score += adjacency_bonus; + } + if (pidx > 0 || jump > 0) { + if (isupper(*match) && islower(*(match - 1))) { + score += camel_bonus; + } + if (isalnum(*match) && !isalnum(*(match - 1))) { + score += separator_bonus; + } + } + if (pidx == 0 && jump == 0) { + /* Match at start of string gets separator bonus. */ + score += separator_bonus; + } + + /* Apply penalties. */ + if (pidx == 0) { + score += MAX(leading_letter_penalty * jump, + max_leading_letter_penalty); + } + + last_match = match + 1; + } + + score += unmatched_letter_penalty * (strlen(str) - strlen(pattern)); + + return score; +} |