Support for parsing integers.


§1. Understanding. In our target virtual machines, numbers are stored in twos-complement form, so that a 16-bit VM can hold the range of integers \(-2^{15} = -32768\) to \(2^{15}-1 = +32767\), while a 32-bit VM can hold \(-2^{31} = -2147483648\) to \(2^{31}-1 = +2147483647\): the token below accepts exactly those ranges.

[ DECIMAL_TOKEN wnc wna r n wa wl sign base digit digit_count original_wn group_wn ch;
    wnc = wn; original_wn = wn; group_wn = wn;
    r = DECIMAL_TOKEN_INNER();
    if (r ~= GPR_FAIL) return r;
    wn = wnc;
    r = ParseTokenStopped(ELEMENTARY_TT, NUMBER_TOKEN);
    if ((r == GPR_NUMBER) && (parsed_number ~= 10000)) return r;
    wn = wnc;
    wa = WordAddress(wn);
    wl = WordLength(wn);
    sign = 1; base = 10; digit_count = 0;
    #Iftrue CHARSIZE == 1;
    ch = wa->0;
    #Ifnot;
    ch = wa-->0;
    #Endif; CHARSIZE
    if (ch ~= '-' or '$' or '0' or '1' or '2' or '3' or '4'
        or '5' or '6' or '7' or '8' or '9')
        return GPR_FAIL;
    if (ch == '-') {
        sign = -1; wl--;
        #Iftrue CHARSIZE == 1;
        wa++;
        #Ifnot;
        wa = wa + WORDSIZE;
        #Endif; CHARSIZE
    }
    if (wl == 0) return GPR_FAIL;
    n = 0;
    while (wl > 0) {
        #Iftrue CHARSIZE == 1;
        ch = wa->0;
        #Ifnot;
        ch = wa-->0;
        #Endif; CHARSIZE
        if (ch >= 'a') digit = ch - 'a' + 10;
        else digit = ch - '0';
        digit_count++;
        switch (base) {
            2:  if (digit_count == 17) return GPR_FAIL;
            10:
                #Iftrue WORDSIZE == 2;
                if (digit_count == 6) return GPR_FAIL;
                if (digit_count == 5) {
                    if (n > 3276) return GPR_FAIL;
                    if (n == 3276) {
                        if (sign == 1 && digit > 7) return GPR_FAIL;
                        if (sign == -1 && digit > 8) return GPR_FAIL;
                    }
                }
                #Ifnot; i.e., if (WORDSIZE == 4)
                if (digit_count == 11) return GPR_FAIL;
                if (digit_count == 10) {
                    if (n > 214748364) return GPR_FAIL;
                    if (n == 214748364) {
                        if (sign == 1 && digit > 7) return GPR_FAIL;
                        if (sign == -1 && digit > 8) return GPR_FAIL;
                    }
                }
                #Endif;
            16: if (digit_count == 5) return GPR_FAIL;
        }
        if (digit >= 0 && digit < base) n = base*n + digit;
        else return GPR_FAIL;
        wl--;
        #iftrue (CHARSIZE == 1);
        wa++;
        #Ifnot;
        wa = wa + WORDSIZE;
        #Endif; CHARSIZE
    }
    parsed_number = n*sign; wn++;
    return GPR_NUMBER;
];

§2. Truth states. And although truth states are not strictly speaking numbers, this seems as good a point as any to parse them:

[ TRUTH_STATE_TOKEN original_wn wd;
    original_wn = wn;
    wd = TRUTH_STATE_TOKEN_INNER();
    if (wd ~= GPR_FAIL) return wd;
    wn = original_wn;
    wd = NextWordStopped();
    if (wd == 'true') { parsed_number = 1; return GPR_NUMBER; }
    if (wd == 'false') { parsed_number = 0; return GPR_NUMBER; }
    wn = original_wn;
    return GPR_FAIL;
];

§3. Real numbers. An I6 grammar routine (GPR) for floats. On success, this returns GPR_NUMBER and stores a value in the global parsed_number.

This is quite a nuisance, actually, because "." is a word separator. Also, we want to accept command sequences like "type 4. look". So we need to collect a set of words made up of digits, signs, periods, and the letter "e", but without any intervening whitespace, and excluding a trailing period.

(This will fail to correctly parse "type 4.e", but I think that is a small flaw. A player would more likely try "type 4. e" or, really, not concatenate commands at all. It will also parse "type 4. on keyboard" as two commands, even though "4." is a legitimate float literal. Contrariwise, "type 4. x me" will be taken as one command. (Because the "x" *could* be a continuation of the float, and I don't back up when it turns out not to be.) I don't plan to worry about these cases.)

#Iftrue WORDSIZE > 2;
[ FLOAT_TOKEN buf bufend ix ch firstwd newstart newlen lastchar lastwasdot;
    if (wn > num_words)
        return GPR_FAIL;

    We're going to collect a set of words. Start with zero words.
    firstwd = wn;
    buf = WordAddress(wn);
    bufend = buf;
    lastchar = 0;

    while (wn <= num_words) {
        newstart = WordAddress(wn);
        if (newstart ~= bufend) {
            There's whitespace between the previous word and this one.
            Whitespace is okay around an asterisk...
            if ((lastchar ~= '*' or 'x' or 'X' or $D7)
                && (newstart-->0 ~= '*' or 'x' or 'X' or $D7)) {
                But around any other character, it's not.
                Don't include the new word.
                break;
            }
        }
        newlen = WordLength(wn);
        for (ix=0 : ix<newlen : ix++) {
            ch = newstart-->ix;
            if (~~((ch >= '0' && ch <= '9')
                || (ch == '-' or '+' or 'E' or 'e' or '.' or 'x' or 'X' or '*' or $D7 or $5E)))
                break;
        }
        if (ix < newlen) {
            This word contains an invalid character.
            Don't include the new word.
            break;
        }
        Okay, include it.
        bufend = newstart + newlen*WORDSIZE;
        wn++;
        lastchar = (bufend-WORDSIZE)-->0;
        lastwasdot = (newlen == 1 && lastchar == '.');
    }

    if (wn > firstwd && lastwasdot) {
        Exclude a trailing period.
        wn--;
        bufend = bufend - WORDSIZE;
    }

    if (wn == firstwd) {
        No words accepted.
        return GPR_FAIL;
    }

    parsed_number = FloatParse(buf, (bufend-buf)/WORDSIZE, true);
    if (parsed_number == FLOAT_NAN)
        return GPR_FAIL;
    return GPR_NUMBER;
];
#Endif; WORDSIZE