From 3861e8289ae40bac168275ce6f10b231e11baa55 Mon Sep 17 00:00:00 2001 From: Phil Jones Date: Mon, 28 Nov 2022 22:19:12 +0000 Subject: Refactor string vector code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, string vectors were built by reading input line-by line, and multiple copies of string vectors were made when searching. Now, input is read into one big buffer, and string vectors only contain references to the strings in this buffer. This both speeds up reading of input, and avoids unnecessary copying of strings in various places. The main downside currently is that input read from stdin is no longer UTF-8 normalised. This means, for example, that a search for `e` won't necessarily match `é`. Normalisation is very slow relative to the rest of tofi, however, and not needed for most use-cases. This could either be solved by accepting the slowdown, or making this an option, such as --unicode or --unicode-normalize. --- src/string_vec.h | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'src/string_vec.h') diff --git a/src/string_vec.h b/src/string_vec.h index f04b385..6f54d56 100644 --- a/src/string_vec.h +++ b/src/string_vec.h @@ -24,27 +24,58 @@ struct string_vec string_vec_create(void); void string_vec_destroy(struct string_vec *restrict vec); -[[nodiscard("memory leaked")]] -struct string_vec string_vec_copy(const struct string_vec *restrict vec); - void string_vec_add(struct string_vec *restrict vec, const char *restrict str); void string_vec_sort(struct string_vec *restrict vec); -void string_vec_history_sort(struct string_vec *restrict vec, struct history *history); +struct scored_string *string_vec_find_sorted(struct string_vec *restrict vec, const char *str); + + +/* + * Like a string_vec, but only store a reference to the corresponding string + * rather than copying it. Although compatible with the string_vec struct, we + * create a new struct to make the compiler complain if we mix them up. + */ +struct scored_string_ref { + char *string; + int32_t search_score; + int32_t history_score; +}; + +struct string_ref_vec { + size_t count; + size_t size; + struct scored_string_ref *buf; +}; + +/* + * Although some of these functions are identical to the corresponding + * string_vec ones, we create new functions to avoid potentially mixing up + * the two. + */ +[[nodiscard("memory leaked")]] +struct string_ref_vec string_ref_vec_create(void); + +void string_ref_vec_destroy(struct string_ref_vec *restrict vec); + +[[nodiscard("memory leaked")]] +struct string_ref_vec string_ref_vec_copy(const struct string_ref_vec *restrict vec); + +void string_ref_vec_add(struct string_ref_vec *restrict vec, char *restrict str); + +void string_ref_vec_history_sort(struct string_ref_vec *restrict vec, struct history *history); void string_vec_uniq(struct string_vec *restrict vec); -struct scored_string *string_vec_find_sorted(struct string_vec *restrict vec, const char *str); +struct scored_string_ref *string_ref_vec_find_sorted(struct string_ref_vec *restrict vec, const char *str); [[nodiscard("memory leaked")]] -struct string_vec string_vec_filter( - const struct string_vec *restrict vec, +struct string_ref_vec string_ref_vec_filter( + const struct string_ref_vec *restrict vec, const char *restrict substr, bool fuzzy); [[nodiscard("memory leaked")]] -struct string_vec string_vec_load(FILE *file); -void string_vec_save(struct string_vec *restrict vec, FILE *restrict file); +struct string_ref_vec string_ref_vec_from_buffer(char *buffer); #endif /* STRING_VEC_H */ -- cgit v1.2.3