diff options
author | Phil Jones <philj56@gmail.com> | 2022-10-18 19:33:41 +0100 |
---|---|---|
committer | Phil Jones <philj56@gmail.com> | 2022-10-18 19:33:41 +0100 |
commit | 5482f0be746a98bdd6b2c54183b54dd2ff2a0192 (patch) | |
tree | dc58c6a1c486432f853bd3a5f9f7c78767c7292a /src/string_vec.c | |
parent | 8872f664671711b97e02fe97f31746b5e158e627 (diff) |
Improve UTF-8 handling.
This should allow case-insensitive matching for non-Latin characters,
and fix matching for characters with diacritics.
Diffstat (limited to 'src/string_vec.c')
-rw-r--r-- | src/string_vec.c | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/src/string_vec.c b/src/string_vec.c index 50dd813..2ef40a2 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -6,6 +6,7 @@ #include <sys/mman.h> #include "fuzzy_match.h" #include "string_vec.h" +#include "utf8.h" #include "xmalloc.h" static int cmpstringp(const void *restrict a, const void *restrict b) @@ -80,7 +81,10 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str) vec->size *= 2; vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); } - vec->buf[vec->count].string = xstrdup(str); + vec->buf[vec->count].string = utf8_normalize(str); + if (vec->buf[vec->count].string == NULL) { + vec->buf[vec->count].string = xstrdup(str); + } vec->buf[vec->count].search_score = 0; vec->buf[vec->count].history_score = 0; vec->count++; |