From 5482f0be746a98bdd6b2c54183b54dd2ff2a0192 Mon Sep 17 00:00:00 2001 From: Phil Jones Date: Tue, 18 Oct 2022 19:33:41 +0100 Subject: Improve UTF-8 handling. This should allow case-insensitive matching for non-Latin characters, and fix matching for characters with diacritics. --- src/string_vec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/string_vec.c') diff --git a/src/string_vec.c b/src/string_vec.c index 50dd813..2ef40a2 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -6,6 +6,7 @@ #include #include "fuzzy_match.h" #include "string_vec.h" +#include "utf8.h" #include "xmalloc.h" static int cmpstringp(const void *restrict a, const void *restrict b) @@ -80,7 +81,10 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str) vec->size *= 2; vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); } - vec->buf[vec->count].string = xstrdup(str); + vec->buf[vec->count].string = utf8_normalize(str); + if (vec->buf[vec->count].string == NULL) { + vec->buf[vec->count].string = xstrdup(str); + } vec->buf[vec->count].search_score = 0; vec->buf[vec->count].history_score = 0; vec->count++; -- cgit v1.2.3