From 5482f0be746a98bdd6b2c54183b54dd2ff2a0192 Mon Sep 17 00:00:00 2001 From: Phil Jones Date: Tue, 18 Oct 2022 19:33:41 +0100 Subject: Improve UTF-8 handling. This should allow case-insensitive matching for non-Latin characters, and fix matching for characters with diacritics. --- src/utf8.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/utf8.h (limited to 'src/utf8.h') diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..b6ee986 --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,22 @@ +#ifndef UTF8_H +#define UTF8_H + +#include +#include + +uint32_t utf8_isupper(uint32_t c); +uint32_t utf8_islower(uint32_t c); +uint32_t utf8_isalnum(uint32_t c); +uint32_t utf8_toupper(uint32_t c); +uint32_t utf8_tolower(uint32_t c); + +uint32_t utf8_get_char(const char *s); +char *utf8_next_char(const char *s); +char *utf8_prev_char(const char *s); +char *utf8_strchr(const char *s, uint32_t c); +char *utf8_strcasechr(const char *s, uint32_t c); +size_t utf8_strlen(const char *s); +char *utf8_strcasestr(const char * restrict haystack, const char * restrict needle); +char *utf8_normalize(const char *s); + +#endif /* UTF8_H */ -- cgit v1.2.3