summaryrefslogtreecommitdiff
path: root/src/fuzzy_match.c
blob: 1283151abe39670490e30ddcf23a667ab7e1a3c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>

#include "fuzzy_match.h"

#undef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))

/*
 * Returns score if each character in pattern is found sequentially within str.
 * Returns INT32_MIN otherwise.
 *
 * The scoring system is taken from fts_fuzzy_match v0.1.0 by Forrest Smith,
 * which is licensed to the public domain.
 *
 * The factors affecting score are:
 *   - Bonuses:
 *     - If there are multiple adjacent matches.
 *     - If a match occurs after a separator character.
 *     - If a match is uppercase, and the previous character is lowercase.
 *
 *   - Penalties:
 *     - If there are letters before the first match.
 *     - If there are superfluous characters in str.
 */
int32_t fuzzy_match(const char *restrict pattern, const char *restrict str)
{
	if (*pattern == '\0') {
		return 0;
	}
	if (strlen(str) < strlen(pattern)) {
		return INT32_MIN;
	}

	const int adjacency_bonus = 5;
	const int separator_bonus = 10;
	const int camel_bonus = 10;

	const int leading_letter_penalty = -3;
	const int max_leading_letter_penalty = -9;
	const int unmatched_letter_penalty = -1;

	int32_t score = 0;
	const char *last_match = str;

	/*
	 * Iterate through pattern, jumping to the next matching character in
	 * str. This will only ever find the first fuzzy match, not the best,
	 * but it's good enough and very quick.
	 */
	for (size_t pidx = 0; pattern[pidx] != '\0'; pidx++) {
		char search[2] = { pattern[pidx], '\0' };
		char *match = strcasestr(last_match, search);
		if (match == NULL) {
			return INT32_MIN;
		}

		int32_t jump = match - last_match;

		/* Apply bonuses. */
		if (pidx > 0 && jump == 0) {
			score += adjacency_bonus;
		}
		if (pidx > 0 || jump > 0) {
			if (isupper(*match) && islower(*(match - 1))) {
				score += camel_bonus;
			}
			if (isalnum(*match) && !isalnum(*(match - 1))) {
				score += separator_bonus;
			}
		}
		if (pidx == 0 && jump == 0) {
			/* Match at start of string gets separator bonus. */
			score += separator_bonus;
		}

		/* Apply penalties. */
		if (pidx == 0) {
			score += MAX(leading_letter_penalty * jump,
					max_leading_letter_penalty);
		}

		last_match = match + 1;
	}

	score += unmatched_letter_penalty * (strlen(str) - strlen(pattern));

	return score;
}