summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/string_vec.c27
-rw-r--r--src/unicode.c6
-rw-r--r--src/unicode.h2
3 files changed, 33 insertions, 2 deletions
diff --git a/src/string_vec.c b/src/string_vec.c
index 8c59607..342fd9f 100644
--- a/src/string_vec.c
+++ b/src/string_vec.c
@@ -83,6 +83,9 @@ struct string_vec string_vec_copy(const struct string_vec *restrict vec)
void string_vec_add(struct string_vec *restrict vec, const char *restrict str)
{
+ if (!utf8_validate(str)) {
+ return;
+ }
if (vec->count == vec->size) {
vec->size *= 2;
vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0]));
@@ -96,6 +99,19 @@ void string_vec_add(struct string_vec *restrict vec, const char *restrict str)
vec->count++;
}
+/* Same as string_vec_add(), but assume str is normalized for speed. */
+static void string_vec_add_normalized(struct string_vec *restrict vec, const char *restrict str)
+{
+ if (vec->count == vec->size) {
+ vec->size *= 2;
+ vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0]));
+ }
+ vec->buf[vec->count].string = xstrdup(str);
+ vec->buf[vec->count].search_score = 0;
+ vec->buf[vec->count].history_score = 0;
+ vec->count++;
+}
+
void string_vec_sort(struct string_vec *restrict vec)
{
qsort(vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp);
@@ -160,7 +176,11 @@ struct string_vec string_vec_filter(
search_score = fuzzy_match_simple_words(substr, vec->buf[i].string);
}
if (search_score != INT32_MIN) {
- string_vec_add(&filt, vec->buf[i].string);
+ /*
+ * Assume that the vector we're filtering is already
+ * normalized.
+ */
+ string_vec_add_normalized(&filt, vec->buf[i].string);
/*
* Store the position of the match in the string as
* its search_score, for later sorting.
@@ -191,7 +211,10 @@ struct string_vec string_vec_load(FILE *file)
if (line[bytes_read - 1] == '\n') {
line[bytes_read - 1] = '\0';
}
- string_vec_add(&vec, line);
+ /*
+ * Assume that the vector we're loading is already normalized.
+ */
+ string_vec_add_normalized(&vec, line);
}
free(line);
diff --git a/src/unicode.c b/src/unicode.c
index 3833fb6..7ddc0d5 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -1,3 +1,4 @@
+#include <stdbool.h>
#include <string.h>
#include "unicode.h"
@@ -115,3 +116,8 @@ char *utf8_compose(const char *s)
{
return g_utf8_normalize(s, -1, G_NORMALIZE_DEFAULT_COMPOSE);
}
+
+bool utf8_validate(const char *s)
+{
+ return g_utf8_validate(s, -1, NULL);
+}
diff --git a/src/unicode.h b/src/unicode.h
index e198231..d32303d 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -2,6 +2,7 @@
#define UNICODE_H
#include <glib.h>
+#include <stdbool.h>
#include <stdint.h>
uint8_t utf32_to_utf8(uint32_t c, char *buf);
@@ -24,5 +25,6 @@ size_t utf8_strlen(const char *s);
char *utf8_strcasestr(const char * restrict haystack, const char * restrict needle);
char *utf8_normalize(const char *s);
char *utf8_compose(const char *s);
+bool utf8_validate(const char *s);
#endif /* UNICODE_H */