diff options
author | Phil Jones <philj56@gmail.com> | 2022-11-24 12:48:39 +0000 |
---|---|---|
committer | Phil Jones <philj56@gmail.com> | 2022-11-24 12:48:39 +0000 |
commit | 03fc67cc74cc08fff5eb61e855b54174683614ff (patch) | |
tree | dc8376aa977a0cff31ebc0d4283cb77cce0e934e | |
parent | e19d821a7ec5d8b8d528be93626bbc2effd15320 (diff) |
Speed up string vector filtering and loading.
We can assume that string vectors generated by tofi are already
normalised, so don't waste time normalising them again.
Also make sure to validate strings from the user.
-rw-r--r-- | src/string_vec.c | 27 | ||||
-rw-r--r-- | src/unicode.c | 6 | ||||
-rw-r--r-- | src/unicode.h | 2 |
3 files changed, 33 insertions, 2 deletions
diff --git a/src/string_vec.c b/src/string_vec.c index 8c59607..342fd9f 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -83,6 +83,9 @@ struct string_vec string_vec_copy(const struct string_vec *restrict vec) void string_vec_add(struct string_vec *restrict vec, const char *restrict str) { + if (!utf8_validate(str)) { + return; + } if (vec->count == vec->size) { vec->size *= 2; vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); @@ -96,6 +99,19 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str) vec->count++; } +/* Same as string_vec_add(), but assume str is normalized for speed. */ +static void string_vec_add_normalized(struct string_vec *restrict vec, const char *restrict str) +{ + if (vec->count == vec->size) { + vec->size *= 2; + vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); + } + vec->buf[vec->count].string = xstrdup(str); + vec->buf[vec->count].search_score = 0; + vec->buf[vec->count].history_score = 0; + vec->count++; +} + void string_vec_sort(struct string_vec *restrict vec) { qsort(vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp); @@ -160,7 +176,11 @@ struct string_vec string_vec_filter( search_score = fuzzy_match_simple_words(substr, vec->buf[i].string); } if (search_score != INT32_MIN) { - string_vec_add(&filt, vec->buf[i].string); + /* + * Assume that the vector we're filtering is already + * normalized. + */ + string_vec_add_normalized(&filt, vec->buf[i].string); /* * Store the position of the match in the string as * its search_score, for later sorting. @@ -191,7 +211,10 @@ struct string_vec string_vec_load(FILE *file) if (line[bytes_read - 1] == '\n') { line[bytes_read - 1] = '\0'; } - string_vec_add(&vec, line); + /* + * Assume that the vector we're loading is already normalized. + */ + string_vec_add_normalized(&vec, line); } free(line); diff --git a/src/unicode.c b/src/unicode.c index 3833fb6..7ddc0d5 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -1,3 +1,4 @@ +#include <stdbool.h> #include <string.h> #include "unicode.h" @@ -115,3 +116,8 @@ char *utf8_compose(const char *s) { return g_utf8_normalize(s, -1, G_NORMALIZE_DEFAULT_COMPOSE); } + +bool utf8_validate(const char *s) +{ + return g_utf8_validate(s, -1, NULL); +} diff --git a/src/unicode.h b/src/unicode.h index e198231..d32303d 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -2,6 +2,7 @@ #define UNICODE_H #include <glib.h> +#include <stdbool.h> #include <stdint.h> uint8_t utf32_to_utf8(uint32_t c, char *buf); @@ -24,5 +25,6 @@ size_t utf8_strlen(const char *s); char *utf8_strcasestr(const char * restrict haystack, const char * restrict needle); char *utf8_normalize(const char *s); char *utf8_compose(const char *s); +bool utf8_validate(const char *s); #endif /* UNICODE_H */ |