summaryrefslogtreecommitdiff
path: root/generate-rowcolumn-helpers.py
diff options
context:
space:
mode:
Diffstat (limited to 'generate-rowcolumn-helpers.py')
-rwxr-xr-xgenerate-rowcolumn-helpers.py111
1 files changed, 111 insertions, 0 deletions
diff --git a/generate-rowcolumn-helpers.py b/generate-rowcolumn-helpers.py
new file mode 100755
index 0000000..fd8d48c
--- /dev/null
+++ b/generate-rowcolumn-helpers.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+# This script generates functions to convert row/column numbers encoded as
+# diacritics to actual numbers.
+# It reads the file rowcolumn-diacritics.txt from the currend directory and
+# produces the following files:
+# - rowcolumn_diacritics_helpers.c - contains a helper function to convert from
+# diacritics to row/column numbers.
+# - rowcolumn_diacritics.sh - contains an array of row/column diacritics (can be
+# used by shell scripts to generate image placeholders).
+#
+# The script also checks some desirable properties of row/column diacritics,
+# e.g. that image placeholders are in normal form.
+
+import unicodedata
+import sys
+
+# codes of all row/column diacritics
+codes = []
+
+with open("./rowcolumn-diacritics.txt", "r") as file:
+ for line in file.readlines():
+ if line.startswith('#'):
+ continue
+ code = int(line.split(";")[0], 16)
+ char = chr(code)
+ assert unicodedata.combining(char) == 230
+ codes.append(code)
+
+print("Generating ./rowcolumn_diacritics_helpers.c")
+with open("./rowcolumn_diacritics_helpers.c", "w") as file:
+ range_start_num = 1
+ range_start = 0
+ range_end = 0
+
+ def print_range():
+ if range_start >= range_end:
+ return
+ for code in range(range_start, range_end):
+ print("\tcase " + hex(code) + ":", file=file)
+ print("\t\treturn code - " + hex(range_start) + " + " +
+ str(range_start_num) + ";",
+ file=file)
+
+ print("#include <stdint.h>\n", file=file)
+ print("uint16_t diacritic_to_num(uint32_t code)\n{", file=file)
+ print("\tswitch (code) {", file=file)
+
+ for code in codes:
+ if range_end == code:
+ range_end += 1
+ else:
+ print_range()
+ range_start_num += range_end - range_start
+ range_start = code
+ range_end = code + 1
+ print_range()
+
+ print("\t}", file=file)
+ print("\treturn 0;", file=file)
+ print("}", file=file)
+
+print("Generating ./rowcolumn_diacritics.sh")
+with open("./rowcolumn_diacritics.sh", "w") as file:
+ print("ROWCOLUMN_DIACRITICS=(", file=file, end="")
+ for code in codes:
+ print('"\\U' + format(code, 'x') + '" ', file=file, end="")
+ print(")", file=file)
+
+print("Checking that image placeholder cannot be normalized further")
+
+img_char = chr(0x10EEEE)
+for row_code in codes:
+ row_char = chr(row_code)
+ for col_code in codes:
+ col_char = chr(col_code)
+ cell = img_char + row_char + col_char
+ for nf in ["NFC", "NFKC", "NFD", "NFKD"]:
+ if not unicodedata.is_normalized(nf, cell):
+ print(cell)
+ print("unnormalized!", nf, [hex(ord(img_char)), hex(row_code), hex(col_code)])
+ normalized = unicodedata.normalize(nf, cell)
+ print("normalized:", [hex(ord(c)) for c in normalized])
+ exit(1)
+
+print("Checking that the row/column marks are not fused with anything "
+ "letter-like during normalization")
+
+# Collect somewhat normal characters.
+normal_symbols = []
+for i in range(sys.maxunicode):
+ string = chr(i)
+ if unicodedata.category(string)[0] not in ['L', 'P', 'N', 'S']:
+ continue
+ is_normalized = True
+ for nf in ["NFC", "NFKC", "NFD", "NFKD"]:
+ if not unicodedata.is_normalized(nf, string):
+ is_normalized = False
+ if is_normalized:
+ normal_symbols.append(i)
+
+for code in codes:
+ print("Checking " + hex(code), end="\r")
+ for num in normal_symbols:
+ string = chr(num) + chr(code)
+ for nf in ["NFC", "NFKC", "NFD", "NFKD"]:
+ if not unicodedata.is_normalized(nf, string):
+ normalized = unicodedata.normalize(nf, string)
+ print("WARNING: " + hex(num) + " + " + hex(code) +
+ " is normalized to " + normalized,
+ " ".join(hex(ord(c)) for c in normalized))