diff options
-rw-r--r-- | src/string_vec.c | 27 | ||||
-rw-r--r-- | src/unicode.c | 6 | ||||
-rw-r--r-- | src/unicode.h | 2 |
3 files changed, 33 insertions, 2 deletions
diff --git a/src/string_vec.c b/src/string_vec.c index 8c59607..342fd9f 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -83,6 +83,9 @@ struct string_vec string_vec_copy(const struct string_vec *restrict vec) void string_vec_add(struct string_vec *restrict vec, const char *restrict str) { + if (!utf8_validate(str)) { + return; + } if (vec->count == vec->size) { vec->size *= 2; vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); @@ -96,6 +99,19 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str) vec->count++; } +/* Same as string_vec_add(), but assume str is normalized for speed. */ +static void string_vec_add_normalized(struct string_vec *restrict vec, const char *restrict str) +{ + if (vec->count == vec->size) { + vec->size *= 2; + vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); + } + vec->buf[vec->count].string = xstrdup(str); + vec->buf[vec->count].search_score = 0; + vec->buf[vec->count].history_score = 0; + vec->count++; +} + void string_vec_sort(struct string_vec *restrict vec) { qsort(vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp); @@ -160,7 +176,11 @@ struct string_vec string_vec_filter( search_score = fuzzy_match_simple_words(substr, vec->buf[i].string); } if (search_score != INT32_MIN) { - string_vec_add(&filt, vec->buf[i].string); + /* + * Assume that the vector we're filtering is already + * normalized. + */ + string_vec_add_normalized(&filt, vec->buf[i].string); /* * Store the position of the match in the string as * its search_score, for later sorting. @@ -191,7 +211,10 @@ struct string_vec string_vec_load(FILE *file) if (line[bytes_read - 1] == '\n') { line[bytes_read - 1] = '\0'; } - string_vec_add(&vec, line); + /* + * Assume that the vector we're loading is already normalized. + */ + string_vec_add_normalized(&vec, line); } free(line); diff --git a/src/unicode.c b/src/unicode.c index 3833fb6..7ddc0d5 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -1,3 +1,4 @@ +#include <stdbool.h> #include <string.h> #include "unicode.h" @@ -115,3 +116,8 @@ char *utf8_compose(const char *s) { return g_utf8_normalize(s, -1, G_NORMALIZE_DEFAULT_COMPOSE); } + +bool utf8_validate(const char *s) +{ + return g_utf8_validate(s, -1, NULL); +} diff --git a/src/unicode.h b/src/unicode.h index e198231..d32303d 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -2,6 +2,7 @@ #define UNICODE_H #include <glib.h> +#include <stdbool.h> #include <stdint.h> uint8_t utf32_to_utf8(uint32_t c, char *buf); @@ -24,5 +25,6 @@ size_t utf8_strlen(const char *s); char *utf8_strcasestr(const char * restrict haystack, const char * restrict needle); char *utf8_normalize(const char *s); char *utf8_compose(const char *s); +bool utf8_validate(const char *s); #endif /* UNICODE_H */ |