summaryrefslogtreecommitdiff
path: root/src/string_vec.c
diff options
context:
space:
mode:
authorPhil Jones <philj56@gmail.com>2022-10-18 19:33:41 +0100
committerPhil Jones <philj56@gmail.com>2022-10-18 19:33:41 +0100
commit5482f0be746a98bdd6b2c54183b54dd2ff2a0192 (patch)
treedc58c6a1c486432f853bd3a5f9f7c78767c7292a /src/string_vec.c
parent8872f664671711b97e02fe97f31746b5e158e627 (diff)
Improve UTF-8 handling.
This should allow case-insensitive matching for non-Latin characters, and fix matching for characters with diacritics.
Diffstat (limited to 'src/string_vec.c')
-rw-r--r--src/string_vec.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/string_vec.c b/src/string_vec.c
index 50dd813..2ef40a2 100644
--- a/src/string_vec.c
+++ b/src/string_vec.c
@@ -6,6 +6,7 @@
#include <sys/mman.h>
#include "fuzzy_match.h"
#include "string_vec.h"
+#include "utf8.h"
#include "xmalloc.h"
static int cmpstringp(const void *restrict a, const void *restrict b)
@@ -80,7 +81,10 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str)
vec->size *= 2;
vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0]));
}
- vec->buf[vec->count].string = xstrdup(str);
+ vec->buf[vec->count].string = utf8_normalize(str);
+ if (vec->buf[vec->count].string == NULL) {
+ vec->buf[vec->count].string = xstrdup(str);
+ }
vec->buf[vec->count].search_score = 0;
vec->buf[vec->count].history_score = 0;
vec->count++;