diff options
author | ZachIR <zachir@librem.one> | 2025-07-08 21:48:39 -0500 |
---|---|---|
committer | ZachIR <zachir@librem.one> | 2025-07-08 21:48:39 -0500 |
commit | cbffff166748e7dada4b66ed1c4f10f495eefa97 (patch) | |
tree | e5b15a872bb79b4beb56c8177b229c58b356bdb5 /generate-rowcolumn-helpers.py | |
parent | d79cc4cea16572f7f80c3842031406172630f697 (diff) |
Add in st-graphics repo
Diffstat (limited to 'generate-rowcolumn-helpers.py')
-rwxr-xr-x | generate-rowcolumn-helpers.py | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/generate-rowcolumn-helpers.py b/generate-rowcolumn-helpers.py new file mode 100755 index 0000000..fd8d48c --- /dev/null +++ b/generate-rowcolumn-helpers.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 + +# This script generates functions to convert row/column numbers encoded as +# diacritics to actual numbers. +# It reads the file rowcolumn-diacritics.txt from the currend directory and +# produces the following files: +# - rowcolumn_diacritics_helpers.c - contains a helper function to convert from +# diacritics to row/column numbers. +# - rowcolumn_diacritics.sh - contains an array of row/column diacritics (can be +# used by shell scripts to generate image placeholders). +# +# The script also checks some desirable properties of row/column diacritics, +# e.g. that image placeholders are in normal form. + +import unicodedata +import sys + +# codes of all row/column diacritics +codes = [] + +with open("./rowcolumn-diacritics.txt", "r") as file: + for line in file.readlines(): + if line.startswith('#'): + continue + code = int(line.split(";")[0], 16) + char = chr(code) + assert unicodedata.combining(char) == 230 + codes.append(code) + +print("Generating ./rowcolumn_diacritics_helpers.c") +with open("./rowcolumn_diacritics_helpers.c", "w") as file: + range_start_num = 1 + range_start = 0 + range_end = 0 + + def print_range(): + if range_start >= range_end: + return + for code in range(range_start, range_end): + print("\tcase " + hex(code) + ":", file=file) + print("\t\treturn code - " + hex(range_start) + " + " + + str(range_start_num) + ";", + file=file) + + print("#include <stdint.h>\n", file=file) + print("uint16_t diacritic_to_num(uint32_t code)\n{", file=file) + print("\tswitch (code) {", file=file) + + for code in codes: + if range_end == code: + range_end += 1 + else: + print_range() + range_start_num += range_end - range_start + range_start = code + range_end = code + 1 + print_range() + + print("\t}", file=file) + print("\treturn 0;", file=file) + print("}", file=file) + +print("Generating ./rowcolumn_diacritics.sh") +with open("./rowcolumn_diacritics.sh", "w") as file: + print("ROWCOLUMN_DIACRITICS=(", file=file, end="") + for code in codes: + print('"\\U' + format(code, 'x') + '" ', file=file, end="") + print(")", file=file) + +print("Checking that image placeholder cannot be normalized further") + +img_char = chr(0x10EEEE) +for row_code in codes: + row_char = chr(row_code) + for col_code in codes: + col_char = chr(col_code) + cell = img_char + row_char + col_char + for nf in ["NFC", "NFKC", "NFD", "NFKD"]: + if not unicodedata.is_normalized(nf, cell): + print(cell) + print("unnormalized!", nf, [hex(ord(img_char)), hex(row_code), hex(col_code)]) + normalized = unicodedata.normalize(nf, cell) + print("normalized:", [hex(ord(c)) for c in normalized]) + exit(1) + +print("Checking that the row/column marks are not fused with anything " + "letter-like during normalization") + +# Collect somewhat normal characters. +normal_symbols = [] +for i in range(sys.maxunicode): + string = chr(i) + if unicodedata.category(string)[0] not in ['L', 'P', 'N', 'S']: + continue + is_normalized = True + for nf in ["NFC", "NFKC", "NFD", "NFKD"]: + if not unicodedata.is_normalized(nf, string): + is_normalized = False + if is_normalized: + normal_symbols.append(i) + +for code in codes: + print("Checking " + hex(code), end="\r") + for num in normal_symbols: + string = chr(num) + chr(code) + for nf in ["NFC", "NFKC", "NFD", "NFKD"]: + if not unicodedata.is_normalized(nf, string): + normalized = unicodedata.normalize(nf, string) + print("WARNING: " + hex(num) + " + " + hex(code) + + " is normalized to " + normalized, + " ".join(hex(ord(c)) for c in normalized)) |