1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include "fuzzy_match.h"
#include "string_vec.h"
#include "unicode.h"
#include "xmalloc.h"
static int cmpstringp(const void *restrict a, const void *restrict b)
{
/*
* For qsort we receive pointers to the array elements (which are
* pointers to char), so convert and dereference them for comparison.
*/
const char *restrict str1 = *(const char **)a;
const char *restrict str2 = *(const char **)b;
/*
* Ensure any NULL strings are shoved to the end.
*/
if (str1 == NULL) {
return 1;
}
if (str2 == NULL) {
return -1;
}
return strcmp(str1, str2);
}
static int cmpscorep(const void *restrict a, const void *restrict b)
{
struct scored_string *restrict str1 = (struct scored_string *)a;
struct scored_string *restrict str2 = (struct scored_string *)b;
int hist_diff = str2->history_score - str1->history_score;
int search_diff = str2->search_score - str1->search_score;
return hist_diff + search_diff;
}
struct string_vec string_vec_create(void)
{
struct string_vec vec = {
.count = 0,
.size = 128,
.buf = xcalloc(128, sizeof(*vec.buf)),
};
return vec;
}
void string_vec_destroy(struct string_vec *restrict vec)
{
for (size_t i = 0; i < vec->count; i++) {
free(vec->buf[i].string);
}
free(vec->buf);
}
struct string_vec string_vec_copy(const struct string_vec *restrict vec)
{
struct string_vec copy = {
.count = vec->count,
.size = vec->size,
.buf = xcalloc(vec->size, sizeof(*copy.buf)),
};
for (size_t i = 0; i < vec->count; i++) {
copy.buf[i].string = xstrdup(vec->buf[i].string);
copy.buf[i].search_score = vec->buf[i].search_score;
copy.buf[i].history_score = vec->buf[i].history_score;
}
return copy;
}
void string_vec_add(struct string_vec *restrict vec, const char *restrict str)
{
if (vec->count == vec->size) {
vec->size *= 2;
vec->buf = xrealloc(vec->buf, vec->size * sizeof(vec->buf[0]));
}
vec->buf[vec->count].string = utf8_normalize(str);
if (vec->buf[vec->count].string == NULL) {
vec->buf[vec->count].string = xstrdup(str);
}
vec->buf[vec->count].search_score = 0;
vec->buf[vec->count].history_score = 0;
vec->count++;
}
void string_vec_sort(struct string_vec *restrict vec)
{
qsort(vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp);
}
void string_vec_uniq(struct string_vec *restrict vec)
{
size_t count = vec->count;
for (size_t i = 1; i < vec->count; i++) {
if (!strcmp(vec->buf[i].string, vec->buf[i-1].string)) {
free(vec->buf[i-1].string);
vec->buf[i-1].string = NULL;
count--;
}
}
string_vec_sort(vec);
vec->count = count;
}
char **string_vec_find(struct string_vec *restrict vec, const char * str)
{
return bsearch(&str, vec->buf, vec->count, sizeof(vec->buf[0]), cmpstringp);
}
struct string_vec string_vec_filter(
const struct string_vec *restrict vec,
const char *restrict substr,
bool fuzzy)
{
if (substr[0] == '\0') {
return string_vec_copy(vec);
}
struct string_vec filt = string_vec_create();
for (size_t i = 0; i < vec->count; i++) {
int32_t search_score;
if (fuzzy) {
search_score = fuzzy_match_words(substr, vec->buf[i].string);
} else {
search_score = fuzzy_match_simple_words(substr, vec->buf[i].string);
}
if (search_score != INT32_MIN) {
string_vec_add(&filt, vec->buf[i].string);
/*
* Store the position of the match in the string as
* its search_score, for later sorting.
*/
filt.buf[filt.count - 1].search_score = search_score;
filt.buf[filt.count - 1].history_score = vec->buf[i].history_score;
}
}
/*
* Sort the results by this search_score. This moves matches at the beginnings
* of words to the front of the result list.
*/
qsort(filt.buf, filt.count, sizeof(filt.buf[0]), cmpscorep);
return filt;
}
struct string_vec string_vec_load(FILE *file)
{
struct string_vec vec = string_vec_create();
if (file == NULL) {
return vec;
}
ssize_t bytes_read;
char *line = NULL;
size_t len;
while ((bytes_read = getline(&line, &len, file)) != -1) {
if (line[bytes_read - 1] == '\n') {
line[bytes_read - 1] = '\0';
}
string_vec_add(&vec, line);
}
free(line);
return vec;
}
void string_vec_save(struct string_vec *restrict vec, FILE *restrict file)
{
for (size_t i = 0; i < vec->count; i++) {
fputs(vec->buf[i].string, file);
fputc('\n', file);
}
}
|