[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Arnold Robbins |
Date: |
Thu, 24 Nov 2022 12:42:35 -0500 (EST) |
branch: texindex-unicode
commit 4cc6ebc49bf99d1aad7a528f8a0924947581caae
Author: Arnold D. Robbins <arnold@skeeve.com>
AuthorDate: Thu Nov 24 12:42:02 2022 -0500
Add unicode.awk POC file for unicode characters.
---
texindex/unicode.awk | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/texindex/unicode.awk b/texindex/unicode.awk
new file mode 100644
index 0000000000..c03baaa531
--- /dev/null
+++ b/texindex/unicode.awk
@@ -0,0 +1,20 @@
+BEGIN {
+ printf("0x10FFFF is %d\n", 0x10FFFF)
+ printf("0xFFFD is %d\n", 0xFFFD)
+ printf("0xFFFD is '%c'\n", 0xFFFD)
+ invalid = sprintf("%c\n", 0xFFFD)
+
+ count = 0
+ for (i = 0; i <= 0x10FFFF; i++) {
+ char = sprintf("%c", i)
+ if (isvalid(char))
+ count++
+ }
+
+ printf("%d valid characters between 1 and %d\n", count, 0x10FFFF)
+}
+
+function isvalid(c)
+{
+ return c ~
/[[:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:]]/
+}