From 03fc67cc74cc08fff5eb61e855b54174683614ff Mon Sep 17 00:00:00 2001 From: Phil Jones Date: Thu, 24 Nov 2022 12:48:39 +0000 Subject: Speed up string vector filtering and loading. We can assume that string vectors generated by tofi are already normalised, so don't waste time normalising them again. Also make sure to validate strings from the user. --- src/string_vec.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'src/string_vec.c') diff --git a/src/string_vec.c b/src/string_vec.c index 8c59607..342fd9f 100644 --- a/src/string_vec.c +++ b/src/string_vec.c @@ -83,6 +83,9 @@ struct string_vec string_vec_copy(const struct string_vec *restrict vec) void string_vec_add(struct string_vec *restrict vec, const char *restrict str) { + if (!utf8_validate(str)) { + return; + } if (vec->count == vec->size) { vec->size *= 2; vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); @@ -96,6 +99,19 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str) vec->count++; } +/* Same as string_vec_add(), but assume str is normalized for speed. */ +static void string_vec_add_normalized(struct string_vec *restrict vec, const char *restrict str) +{ + if (vec->count == vec->size) { + vec->size *= 2; + vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0])); + } + vec->buf[vec->count].string = xstrdup(str); + vec->buf[vec->count].search_score = 0; + vec->buf[vec->count].history_score = 0; + vec->count++; +} + void string_vec_sort(struct string_vec *restrict vec) { qsort(vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp); @@ -160,7 +176,11 @@ struct string_vec string_vec_filter( search_score = fuzzy_match_simple_words(substr, vec->buf[i].string); } if (search_score != INT32_MIN) { - string_vec_add(&filt, vec->buf[i].string); + /* + * Assume that the vector we're filtering is already + * normalized. + */ + string_vec_add_normalized(&filt, vec->buf[i].string); /* * Store the position of the match in the string as * its search_score, for later sorting. @@ -191,7 +211,10 @@ struct string_vec string_vec_load(FILE *file) if (line[bytes_read - 1] == '\n') { line[bytes_read - 1] = '\0'; } - string_vec_add(&vec, line); + /* + * Assume that the vector we're loading is already normalized. + */ + string_vec_add_normalized(&vec, line); } free(line); -- cgit v1.2.3