Code relating to the ZSCII character set.


§1. The old Inform 7 template defined the following constant for the benefit, presumably, of conditional compilation. It's probably no longer useful.

Constant ZSCII_Tables;

§2. We must create two arrays:

Each array is a sequence of three-word records, consisting of the start of a character range, the size of the range (the number of characters in it), and the numerical offset to convert to the opposite case. For instance, the sequence \((97, 26, -32)\) means the 26 lower-case letters "a" to "z", and marks them as convertible to upper case by subtracting 32 from the character code (so "a", 97, becomes "A", 65). If the size of the range is negative, this indicates that only every alternate code is valid. (This makes for efficient storage since there are large parts of the Unicode number-space in which upper and lower case letters alternate.)

An offset of UNIC_NCT means no case change is possible; and any character not included in the ranges below is not a letter.

Constant UNIC_NCT = 10000;

§3. ZSCII Casing Tables.

Array CharCasingChart0 -->
    $0061 (  26) (     -32) $009b (   3) (       3) $00a1 (   1)   UNIC_NCT
    $00a4 (   2) (       3) $00a6 (   1)   UNIC_NCT $00a9 (   6) (       6)
    $00b5 (   5) (       5) $00bf (   5) (       5) $00c9 (  -3) (       1)
    $00cd (   3) (       3) $00d3 (  -3) (       1) $00d7 (   2) (       2)
    $00dc (   1) (       1) $0000
;

Array CharCasingChart1 -->
    $0041 (  26) (      32) $009e (   3) (      -3) $00a7 (   2) (      -3)
    $00af (   6) (      -6) $00ba (   5) (      -5) $00c4 (   5) (      -5)
    $00ca (  -3) (      -1) $00d0 (   3) (      -3) $00d4 (  -3) (      -1)
    $00d9 (   2) (      -2) $00dd (   1) (      -1) $0000
;

§4. The following are the equivalent of tolower and toupper, the traditional C library functions for forcing letters into lower and upper case form, for the ZSCII character set.

[ VM_UpperToLowerCase c;
    if (c < 'A') return c;
    if (c <= 'Z') return c + 32;
    if (c < 158) return c;
    if (c <= 160) return c - 3;
    if (c < 167) return c;
    if (c <= 168) return c - 3;
    if (c < 175) return c;
    if (c <= 180) return c - 6;
    if (c < 186) return c;
    if (c <= 190) return c - 5;
    if (c < 196) return c;
    if (c <= 200) return c - 5;
    if (c == 202) return c - 1;
    if (c == 204) return c - 1;
    if (c < 208) return c;
    if (c <= 210) return c - 3;
    if (c == 212) return c - 1;
    if (c == 214) return c - 1;
    if (c == 217) return c - 2;
    if (c == 218) return c - 2;
    if (c == 221) return c - 1;
    return c;
];

[ VM_LowerToUpperCase c;
    if (c < 'a') return c;
    if (c <= 'z') return c - 32;
    if (c < 155) return c;
    if (c <= 157) return c + 3;
    if (c < 164) return c;
    if (c <= 165) return c + 3;
    if (c < 169) return c;
    if (c <= 174) return c + 6;
    if (c < 181) return c;
    if (c <= 185) return c + 5;
    if (c < 191) return c;
    if (c <= 195) return c + 5;
    if (c == 201) return c + 1;
    if (c == 203) return c + 1;
    if (c < 205) return c;
    if (c <= 207) return c + 3;
    if (c == 211) return c + 1;
    if (c == 213) return c + 1;
    if (c == 215) return c + 2;
    if (c == 216) return c + 2;
    if (c == 220) return c + 1;
    return c;
];

§5. It's convenient to provide this relatively fast routine to reverse the case of a letter, since this is an operation used frequently in regular expression matching.

[ TEXT_TY_RevCase ch;
    if (ch<'A') return ch;
    if ((ch >= 'a') && (ch <= 'z')) return ch-'a'+'A';
    if ((ch >= 'A') && (ch <= 'Z')) return ch-'A'+'a';
    if (ch<128) return ch;
    if ((ch >= 155) && (ch <= 157)) return ch+3; a, o, u umlaut in ZSCII
    if ((ch >= 158) && (ch <= 160)) return ch-3; A, O, U umlaut
    if ((ch >= 164) && (ch <= 165)) return ch+3; e, i umlaut
    if ((ch >= 167) && (ch <= 168)) return ch-3; E, I umlaut
    if ((ch >= 169) && (ch <= 174)) return ch+6; a, e, i, o, u, y acute
    if ((ch >= 175) && (ch <= 180)) return ch-6; A, E, I, O, U, Y acute
    if ((ch >= 181) && (ch <= 185)) return ch+5; a, e, i, o, u grave
    if ((ch >= 186) && (ch <= 190)) return ch-5; A, E, I, O, U grave
    if ((ch >= 191) && (ch <= 195)) return ch+5; a, e, i, o, u circumflex
    if ((ch >= 196) && (ch <= 200)) return ch-5; A, E, I, O, U circumflex
    if (ch == 201) return 202; a circle
    if (ch == 202) return 201; A circle
    if (ch == 203) return 204; o slash
    if (ch == 204) return 203; O slash
    if ((ch >= 205) && (ch <= 207)) return ch+3; a, n, o tilde
    if ((ch >= 208) && (ch <= 210)) return ch-3; A, N, O tilde
    if (ch == 211) return 212; ae ligature
    if (ch == 212) return 211; AE ligature
    if (ch == 213) return 214; c cedilla
    if (ch == 214) return 213; C cedilla
    if (ch == 215 or 216) return ch+2; thorn, eth
    if (ch == 217 or 218) return ch-2; Thorn, Eth
    if (ch == 220) return 221; oe ligature
    if (ch == 221) return 220; OE ligature
    return ch;
];