Defining the programming languages supported by Inweb, loading in their definitions from files.


§1. Languages. Programming languages are identified by name: for example, C++ or Perl.

§2.

pathname *default_programming_language_path = NULL;

void Languages::set_default_directory(pathname *P) {
    default_programming_language_path = P;
}

programming_language *Languages::find_by_name(text_stream *lname, pathname *P,
    int error_if_not_found) {
    programming_language *pl;
    If this is the name of a language already known, return that2.1;
    Read the language definition file with this name2.2;
    if (Str::ne_insensitive(pl->language_name, lname))
        Errors::fatal_with_text(
            "definition of programming language '%S' is for something else", lname);
    return pl;
}

§2.1. If this is the name of a language already known, return that2.1 =

    LOOP_OVER(pl, programming_language)
        if (Str::eq_insensitive(lname, pl->language_name))
            return pl;

§2.2. Read the language definition file with this name2.2 =

    filename *F = NULL;
    if (P) Try P2.2.1;
    P = default_programming_language_path;
    if (P) Try P2.2.1;
    if (F == NULL) {
        if (error_if_not_found)
            Errors::fatal_with_text(
                "unsupported programming language '%S'", lname);
        return NULL;
    }
    pl = Languages::read_definition(F);

§2.2.1. Try P2.2.1 =

    if (F == NULL) {
        TEMPORARY_TEXT(leaf)
        WRITE_TO(leaf, "%S.ildf", lname);
        F = Filenames::in(P, leaf);
        DISCARD_TEXT(leaf)
        if (TextFiles::exists(F) == FALSE) F = NULL;
    }

§3.

void Languages::show(OUTPUT_STREAM) {
    WRITE("I can see the following programming language definitions:\n\n");
    int N = NUMBER_CREATED(programming_language);
    programming_language **sorted_table =
        Memory::calloc(N, (int) sizeof(programming_language *), ARRAY_SORTING_MREASON);
    int i=0; programming_language *pl;
    LOOP_OVER(pl, programming_language) sorted_table[i++] = pl;
    qsort(sorted_table, (size_t) N, sizeof(programming_language *), Languages::compare_names);

    for (int i=0; i<N; i++) {
        programming_language *pl = sorted_table[i];
        WRITE("%S: %S\n", pl->language_name, pl->language_details);
    }
    Memory::I7_free(sorted_table, ARRAY_SORTING_MREASON, N*((int) sizeof(programming_language *)));
}

§4.

int Languages::compare_names(const void *ent1, const void *ent2) {
    text_stream *tx1 = (*((const programming_language **) ent1))->language_name;
    text_stream *tx2 = (*((const programming_language **) ent2))->language_name;
    return Str::cmp_insensitive(tx1, tx2);
}

§5. We can read every language in a directory:

void Languages::read_definitions(pathname *P) {
    if (P == NULL) P = default_programming_language_path;
    if (P == NULL) internal_error("no path for definitions");
    scan_directory *D = Directories::open(P);
    TEMPORARY_TEXT(leafname)
    while (Directories::next(D, leafname)) {
        if (Platform::is_folder_separator(Str::get_last_char(leafname)) == FALSE) {
            filename *F = Filenames::in(P, leafname);
            Languages::read_definition(F);
        }
    }
    DISCARD_TEXT(leafname)
    Directories::close(D);
}

§6. So, then, languages are defined by files which are read in, and parsed into the following structure (one per language):

typedef struct programming_language {
    text_stream *language_name;  identifies it: see above

     then a great many fields set directly in the definition file:
    text_stream *file_extension;  by default output to a file whose name has this extension
    text_stream *language_details;  brief explanation of what language is
    int supports_namespaces;
    text_stream *line_comment;
    text_stream *whole_line_comment;
    text_stream *multiline_comment_open;
    text_stream *multiline_comment_close;
    text_stream *string_literal;
    text_stream *string_literal_escape;
    text_stream *character_literal;
    text_stream *character_literal_escape;
    text_stream *binary_literal_prefix;
    text_stream *octal_literal_prefix;
    text_stream *hexadecimal_literal_prefix;
    text_stream *negative_literal_prefix;
    text_stream *shebang;
    text_stream *line_marker;
    text_stream *before_macro_expansion;
    text_stream *after_macro_expansion;
    text_stream *start_definition;
    text_stream *prolong_definition;
    text_stream *end_definition;
    text_stream *start_ifdef;
    text_stream *end_ifdef;
    text_stream *start_ifndef;
    text_stream *end_ifndef;
    inchar32_t type_notation[MAX_ILDF_REGEXP_LENGTH];
    inchar32_t function_notation[MAX_ILDF_REGEXP_LENGTH];

    int suppress_disclaimer;
    int C_like;  languages with this set have access to extra features

    struct linked_list *reserved_words;  of reserved_word
    struct hash_table built_in_keywords;
    struct colouring_language_block *program;  algorithm for syntax colouring
    struct method_set *methods;
    CLASS_DEFINITION
} programming_language;

§7. This is a simple one-pass compiler. The language_reader_state provides the only state preserved as we work through line by line, except of course that we are also working on the programming language it is defining. The current_block is the braced block of colouring instructions we are currently inside.

typedef struct language_reader_state {
    struct programming_language *defining;
    struct colouring_language_block *current_block;
} language_reader_state;

programming_language *Languages::read_definition(filename *F) {
    programming_language *pl = CREATE(programming_language);
    Initialise the language to a plain-text state7.1;
    language_reader_state lrs;
    lrs.defining = pl;
    lrs.current_block = NULL;
    TextFiles::read(F, FALSE, "can't open programming language definition file",
        TRUE, Languages::read_definition_line, NULL, (void *) &lrs);
    Add method calls to the language7.2;
    return pl;
}

§7.1. Initialise the language to a plain-text state7.1 =

    pl->language_name = NULL;
    pl->file_extension = NULL;
    pl->supports_namespaces = FALSE;
    pl->line_comment = NULL;
    pl->whole_line_comment = NULL;
    pl->multiline_comment_open = NULL;
    pl->multiline_comment_close = NULL;
    pl->string_literal = NULL;
    pl->string_literal_escape = NULL;
    pl->character_literal = NULL;
    pl->character_literal_escape = NULL;
    pl->binary_literal_prefix = NULL;
    pl->octal_literal_prefix = NULL;
    pl->hexadecimal_literal_prefix = NULL;
    pl->negative_literal_prefix = NULL;
    pl->shebang = NULL;
    pl->line_marker = NULL;
    pl->before_macro_expansion = NULL;
    pl->after_macro_expansion = NULL;
    pl->start_definition = NULL;
    pl->prolong_definition = NULL;
    pl->end_definition = NULL;
    pl->start_ifdef = NULL;
    pl->end_ifdef = NULL;
    pl->start_ifndef = NULL;
    pl->end_ifndef = NULL;
    pl->C_like = FALSE;
    pl->suppress_disclaimer = FALSE;
    pl->type_notation[0] = 0;
    pl->function_notation[0] = 0;

    pl->reserved_words = NEW_LINKED_LIST(reserved_word);
    ReservedWords::initialise_hash_table(&(pl->built_in_keywords));
    pl->program = NULL;
    pl->methods = Methods::new_set();

§7.2. Note that there are two levels of extra privilege: any language calling itself C-like has functionality for function and structure definitions; the language whose name is InC gets even more, without having to ask.

Languages have effect through their method calls, which is how those extra features are provided. The call to ACMESupport::add_fallbacks adds generic method calls to give effect to the settings in the definition.

Add method calls to the language7.2 =

    #ifdef THIS_IS_INWEB
    if (pl->C_like) CLike::make_c_like(pl);
    if (Str::eq(pl->language_name, I"InC")) InCSupport::add_features(pl);
    ACMESupport::add_fallbacks(pl);
    #endif

§8. So, then, the above reads the file and feeds it line by line to this:

void Languages::read_definition_line(text_stream *line, text_file_position *tfp, void *v_state) {
    language_reader_state *state = (language_reader_state *) v_state;
    programming_language *pl = state->defining;

    Str::trim_white_space(line);  ignore trailing space
    if (Str::len(line) == 0) return;  ignore blank lines
    if (Str::get_first_char(line) == '#') return;  lines opening with # are comments

    match_results mr = Regexp::create_mr();
    if (state->current_block) Syntax inside a colouring program8.2
    else Syntax outside a colouring program8.1;
    Regexp::dispose_of(&mr);
}

§8.1. Outside a colouring program, you can do three things: start a program, declare a reserved keyword, or set a key to a value.

Syntax outside a colouring program8.1 =

    if (Regexp::match(&mr, line, U"colouring {")) {
        if (pl->program) Errors::in_text_file("duplicate colouring program", tfp);
        pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
        state->current_block = pl->program;
    } else if (Regexp::match(&mr, line, U"keyword (%C+) of (%c+?)")) {
        Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
    } else if (Regexp::match(&mr, line, U"keyword (%C+)")) {
        Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
    } else if (Regexp::match(&mr, line, U"(%c+) *: *(%c+?)")) {
        text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
        if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Details"))
            pl->language_details = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Extension"))
            pl->file_extension = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Line Comment"))
            pl->line_comment = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Whole Line Comment"))
            pl->whole_line_comment = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Multiline Comment Open"))
            pl->multiline_comment_open = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Multiline Comment Close"))
            pl->multiline_comment_close = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"String Literal"))
            pl->string_literal = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"String Literal Escape"))
            pl->string_literal_escape = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Character Literal"))
            pl->character_literal = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Character Literal Escape"))
            pl->character_literal_escape = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Binary Literal Prefix"))
            pl->binary_literal_prefix = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Octal Literal Prefix"))
            pl->octal_literal_prefix = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Hexadecimal Literal Prefix"))
            pl->hexadecimal_literal_prefix = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Negative Literal Prefix"))
            pl->negative_literal_prefix = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Shebang"))
            pl->shebang = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Line Marker"))
            pl->line_marker = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Before Named Paragraph Expansion"))
            pl->before_macro_expansion = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"After Named Paragraph Expansion"))
            pl->after_macro_expansion = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Start Definition"))
            pl->start_definition = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Prolong Definition"))
            pl->prolong_definition = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"End Definition"))
            pl->end_definition = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Start Ifdef"))
            pl->start_ifdef = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"Start Ifndef"))
            pl->start_ifndef = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"End Ifdef"))
            pl->end_ifdef = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"End Ifndef"))
            pl->end_ifndef = Languages::text(value, tfp, TRUE);
        else if (Str::eq(key, I"C-Like"))
            pl->C_like = Languages::boolean(value, tfp);
        else if (Str::eq(key, I"Suppress Disclaimer"))
            pl->suppress_disclaimer = Languages::boolean(value, tfp);
        else if (Str::eq(key, I"Supports Namespaces"))
            pl->supports_namespaces = Languages::boolean(value, tfp);
        else if (Str::eq(key, I"Function Declaration Notation"))
            Languages::regexp(pl->function_notation, value, tfp);
        else if (Str::eq(key, I"Type Declaration Notation"))
            Languages::regexp(pl->type_notation, value, tfp);
        else {
            Errors::in_text_file("unknown property name before ':'", tfp);
        }
    } else {
        Errors::in_text_file("line in language definition illegible", tfp);
    }

§8.2. Inside a colouring program, you can close the current block (which may be the entire program), open a new block to apply to each character or to runs of a given colour, or give an if-X-then-Y rule:

Syntax inside a colouring program8.2 =

    if (Str::eq(line, I"}")) {
        state->current_block = state->current_block->parent;
    } else if (Regexp::match(&mr, line, U"characters {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        rule->execute_block =
            Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, line, U"characters in (%c+) {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        rule->execute_block =
            Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
        rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, line, U"runs of (%c+) {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        inchar32_t r = UNQUOTED_COLOUR;
        if (Str::ne(mr.exp[0], I"unquoted")) r = Languages::colour(mr.exp[0], tfp);
        rule->execute_block = Languages::new_block(state->current_block, (int) r);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, line, U"instances of (%c+) {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
        rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, line, U"matches of (%c+) {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
        Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, line, U"brackets in (%c+) {")) {
        colouring_rule *rule = Languages::new_rule(state->current_block);
        rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
        Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
        state->current_block = rule->execute_block;
    } else {
        int at = -1, quoted = FALSE;
        for (int i=0; i<Str::len(line)-1; i++) {
            if (Str::get_at(line, i) == '"') quoted = quoted?FALSE:TRUE;
            if ((quoted) && (Str::get_at(line, i) == '\\')) i++;
            if ((quoted == FALSE) &&
                (Str::get_at(line, i) == '=') && (Str::get_at(line, i+1) == '>')) at = i;
        }
        if (at >= 0) {
            TEMPORARY_TEXT(premiss)
            TEMPORARY_TEXT(conclusion)
            Str::substr(premiss, Str::start(line), Str::at(line, at));
            Str::substr(conclusion, Str::at(line, at+2), Str::end(line));
            Languages::parse_rule(state, premiss, conclusion, tfp);
            DISCARD_TEXT(conclusion)
            DISCARD_TEXT(premiss)
        } else {
            Errors::in_text_file("line in colouring block illegible", tfp);
        }
    }

§9. Blocks. These are code blocks of colouring instructions. A block whose parent is NULL represents a complete program.

define WHOLE_LINE_CRULE_RUN -1  This block applies to the whole snippet being coloured
define CHARACTERS_CRULE_RUN -2  This block applies to each character in turn
define CHARACTERS_IN_CRULE_RUN -3  This block applies to each character from a set in turn
define INSTANCES_CRULE_RUN -4  This block applies to each instance in turn
define MATCHES_CRULE_RUN -5  This block applies to each match against a regexp in turn
define BRACKETS_CRULE_RUN -6  This block applies to bracketed subexpressions in a regexp
typedef struct colouring_language_block {
    struct linked_list *rules;  of colouring_rule
    struct colouring_language_block *parent;  or NULL for the topmost one
    int run;  one of the *_CRULE_RUN values, or else a colour
    struct text_stream *run_instance;  used only for INSTANCES_CRULE_RUN
    struct text_stream *char_set;  used only for CHARACTERS_IN_CRULE_RUN
    inchar32_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];  used for MATCHES_CRULE_RUN, BRACKETS_CRULE_RUN

     workspace during painting
    struct match_results mr;  of a regular expression
    CLASS_DEFINITION
} colouring_language_block;

§10.

colouring_language_block *Languages::new_block(colouring_language_block *within, int r) {
    colouring_language_block *block = CREATE(colouring_language_block);
    block->rules = NEW_LINKED_LIST(colouring_rule);
    block->parent = within;
    block->run = r;
    block->run_instance = NULL;
    block->char_set = NULL;
    block->match_regexp_text[0] = 0;
    block->mr = Regexp::create_mr();
    return block;
}

§11. Colouring Rules. Each individual rule has the form: if a premiss, then a conclusion. It will be applied to a snippet of text, and the premiss can test that, together with a little context before it (where available).

Note that rules can be unconditional, in that the premiss always passes.

define NOT_A_RULE_PREFIX 1  this isn't a prefix rule
define UNSPACED_RULE_PREFIX 2  for prefix P
define SPACED_RULE_PREFIX 3  for spaced prefix P
define OPTIONALLY_SPACED_RULE_PREFIX 4  for optionally spaced prefix P
define UNSPACED_RULE_SUFFIX 5  for suffix P
define SPACED_RULE_SUFFIX 6  for spaced suffix P
define OPTIONALLY_SPACED_RULE_SUFFIX 7  for optionally spaced suffix P
define MAX_ILDF_REGEXP_LENGTH 64
typedef struct colouring_rule {
     the premiss:
    int sense;  FALSE to negate the condition
    inchar32_t match_colour;  for coloured C, or else NOT_A_COLOUR
    inchar32_t match_keyword_of_colour;  for keyword C, or else NOT_A_COLOUR
    struct text_stream *match_text;  or length 0 to mean "anything"
    int match_prefix;  one of the *_RULE_PREFIX values above
    inchar32_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
    int number;  for number N rules; 0 for others
    int number_of;  for number N of M rules; 0 for others

     the conclusion:
    struct colouring_language_block *execute_block;  or NULL, in which case...
    inchar32_t set_to_colour;  ...paint the snippet in this colour
    inchar32_t set_prefix_to_colour;  ...also paint this (same for suffix)
    int debug;  ...or print debugging text to console

     workspace during painting
    int fix_position;  where the prefix or suffix started
    struct match_results mr;  of a regular expression
    CLASS_DEFINITION
} colouring_rule;

§12.

colouring_rule *Languages::new_rule(colouring_language_block *within) {
    if (within == NULL) internal_error("rule outside block");
    colouring_rule *rule = CREATE(colouring_rule);
    ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
    rule->sense = TRUE;
    rule->match_colour = NOT_A_COLOUR;
    rule->match_text = NULL;
    rule->match_prefix = NOT_A_RULE_PREFIX;
    rule->match_keyword_of_colour = NOT_A_COLOUR;
    rule->match_regexp_text[0] = 0;
    rule->number = 0;
    rule->number_of = 0;

    rule->set_to_colour = NOT_A_COLOUR;
    rule->set_prefix_to_colour = NOT_A_COLOUR;
    rule->execute_block = NULL;
    rule->debug = FALSE;

    rule->fix_position = 0;
    rule->mr = Regexp::create_mr();
    return rule;
}

§13.

void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
    text_stream *action, text_file_position *tfp) {
    match_results mr = Regexp::create_mr();
    colouring_rule *rule = Languages::new_rule(state->current_block);
    Str::trim_white_space(premiss); Str::trim_white_space(action);
    Parse the premiss13.1;
    Parse the conclusion13.2;
    Regexp::dispose_of(&mr);
}

§13.1. Parse the premiss13.1 =

    while (Regexp::match(&mr, premiss, U"not (%c+)")) {
        rule->sense = (rule->sense)?FALSE:TRUE;
        Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
    }
    if (Regexp::match(&mr, premiss, U"number (%d+)")) {
        rule->number = Str::atoi(mr.exp[0], 0);
    } else if (Regexp::match(&mr, premiss, U"number (%d+) of (%d+)")) {
        rule->number = Str::atoi(mr.exp[0], 0);
        rule->number_of = Str::atoi(mr.exp[1], 0);
    } else if (Regexp::match(&mr, premiss, U"keyword of (%c+)")) {
        rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
    } else if (Regexp::match(&mr, premiss, U"keyword")) {
        Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
    } else if (Regexp::match(&mr, premiss, U"prefix (%c+)")) {
        rule->match_prefix = UNSPACED_RULE_PREFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"matching (%c+)")) {
        Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
    } else if (Regexp::match(&mr, premiss, U"spaced prefix (%c+)")) {
        rule->match_prefix = SPACED_RULE_PREFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"optionally spaced prefix (%c+)")) {
        rule->match_prefix = OPTIONALLY_SPACED_RULE_PREFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"suffix (%c+)")) {
        rule->match_prefix = UNSPACED_RULE_SUFFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"spaced suffix (%c+)")) {
        rule->match_prefix = SPACED_RULE_SUFFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"optionally spaced suffix (%c+)")) {
        rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
        rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
    } else if (Regexp::match(&mr, premiss, U"coloured (%c+)")) {
        rule->match_colour = Languages::colour(mr.exp[0], tfp);
    } else if (Str::len(premiss) > 0) {
        rule->match_text = Languages::text(premiss, tfp, FALSE);
    }

§13.2. Parse the conclusion13.2 =

    if (Str::eq(action, I"{")) {
        rule->execute_block =
            Languages::new_block(state->current_block, WHOLE_LINE_CRULE_RUN);
        state->current_block = rule->execute_block;
    } else if (Regexp::match(&mr, action, U"(!%c+) on prefix")) {
        rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
    } else if (Regexp::match(&mr, action, U"(!%c+) on suffix")) {
        rule->set_prefix_to_colour = Languages::colour(mr.exp[0], tfp);
    } else if (Regexp::match(&mr, action, U"(!%c+) on both")) {
        rule->set_to_colour = Languages::colour(mr.exp[0], tfp);
        rule->set_prefix_to_colour = rule->set_to_colour;
    } else if (Str::get_first_char(action) == '!') {
        rule->set_to_colour = Languages::colour(action, tfp);
    } else if (Str::eq(action, I"debug")) {
        rule->debug = TRUE;
    } else {
        Errors::in_text_file("action after '=>' illegible", tfp);
    }

§14. Reserved words. Note that these can come in any colour, though usually it's !reserved.

typedef struct reserved_word {
    struct text_stream *word;
    int colour;
    CLASS_DEFINITION
} reserved_word;

reserved_word *Languages::reserved(programming_language *pl, text_stream *W, inchar32_t C,
    text_file_position *tfp) {
    reserved_word *rw;
    LOOP_OVER_LINKED_LIST(rw, reserved_word, pl->reserved_words)
        if (Str::eq(rw->word, W)) {
            Errors::in_text_file("duplicate reserved word", tfp);
        }
    rw = CREATE(reserved_word);
    rw->word = Str::duplicate(W);
    rw->colour = (int) C;
    ADD_TO_LINKED_LIST(rw, reserved_word, pl->reserved_words);
    ReservedWords::mark_reserved_word(&(pl->built_in_keywords), rw->word, (int) C);
    return rw;
}

§15. Expressions. Language definition files have three types of data: colours, booleans, and text. Colours first. Note that there are two pseudo-colours used above, but which are not expressible in the syntax of this file.

define DEFINITION_COLOUR   'd'
define FUNCTION_COLOUR     'f'
define RESERVED_COLOUR     'r'
define ELEMENT_COLOUR      'e'
define IDENTIFIER_COLOUR   'i'
define CHARACTER_COLOUR     'c'
define CONSTANT_COLOUR     'n'
define STRING_COLOUR       's'
define PLAIN_COLOUR            'p'
define EXTRACT_COLOUR      'x'
define COMMENT_COLOUR      '!'
define NEWLINE_COLOUR      '\n'
define NOT_A_COLOUR ' '
define UNQUOTED_COLOUR '_'
inchar32_t Languages::colour(text_stream *T, text_file_position *tfp) {
    if (Str::get_first_char(T) != '!') {
        Errors::in_text_file("colour names must begin with !", tfp);
        return PLAIN_COLOUR;
    }
    if (Str::eq(T, I"!string")) return STRING_COLOUR;
    else if (Str::eq(T, I"!function")) return FUNCTION_COLOUR;
    else if (Str::eq(T, I"!definition")) return DEFINITION_COLOUR;
    else if (Str::eq(T, I"!reserved")) return RESERVED_COLOUR;
    else if (Str::eq(T, I"!element")) return ELEMENT_COLOUR;
    else if (Str::eq(T, I"!identifier")) return IDENTIFIER_COLOUR;
    else if (Str::eq(T, I"!character")) return CHARACTER_COLOUR;
    else if (Str::eq(T, I"!constant")) return CONSTANT_COLOUR;
    else if (Str::eq(T, I"!plain")) return PLAIN_COLOUR;
    else if (Str::eq(T, I"!extract")) return EXTRACT_COLOUR;
    else if (Str::eq(T, I"!comment")) return COMMENT_COLOUR;
    else {
        Errors::in_text_file("no such !colour", tfp);
        return PLAIN_COLOUR;
    }
}

§16. A boolean must be written as true or false.

int Languages::boolean(text_stream *T, text_file_position *tfp) {
    if (Str::eq(T, I"true")) return TRUE;
    else if (Str::eq(T, I"false")) return FALSE;
    else {
        Errors::in_text_file("must be true or false", tfp);
        return FALSE;
    }
}

§17. In text, \n represents a newline, \s a space and \t a tab. Spaces can be given in the ordinary way inside a text in any case. \\ is a literal backslash.

text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow) {
    text_stream *V = Str::new();
    if (Str::len(T) > 0) {
        int bareword = TRUE, spaced = FALSE, from = 0, to = Str::len(T)-1;
        if ((to > from) &&
            (Str::get_at(T, from) == '"') && (Str::get_at(T, to) == '"')) {
            bareword = FALSE; from++; to--;
        }
        for (int i=from; i<=to; i++) {
            inchar32_t c = Str::get_at(T, i);
            if (c == ' ') spaced = TRUE;
            if ((c == '\\') && (Str::get_at(T, i+1) == 'n')) {
                PUT_TO(V, '\n');
                i++;
            } else if ((c == '\\') && (Str::get_at(T, i+1) == 's')) {
                PUT_TO(V, ' ');
                i++;
            } else if ((c == '\\') && (Str::get_at(T, i+1) == 't')) {
                PUT_TO(V, '\t');
                i++;
            } else if ((c == '\\') && (Str::get_at(T, i+1) == '\\')) {
                PUT_TO(V, '\\');
                i++;
            } else if ((bareword == FALSE) && (c == '\\') && (Str::get_at(T, i+1) == '"')) {
                PUT_TO(V, '"');
                i++;
            } else if ((bareword == FALSE) && (c == '"')) {
                Errors::in_text_file(
                    "backslash needed before internal double-quotation mark", tfp);
            } else if ((bareword) && (c == '!') && (i == from)) {
                Errors::in_text_file(
                    "a literal starting with ! must be in double-quotation marks", tfp);
            } else if ((bareword) && (c == '/')) {
                Errors::in_text_file(
                    "forward slashes can only be used in quoted strings", tfp);
            } else if ((bareword) && (c == '"')) {
                Errors::in_text_file(
                    "double-quotation marks can only be used in quoted strings", tfp);
            } else {
                PUT_TO(V, c);
            }
        }
        if ((bareword) && (spaced) && (allow == FALSE)) {
            TEMPORARY_TEXT(err)
            WRITE_TO(err, "'%S' seems to be literal text, but if so it needs double-quotation marks", T);
            Errors::in_text_file_S(err, tfp);
            DISCARD_TEXT(err)
        }
        if (bareword) {
            int rw = FALSE;
            if (Str::eq(V, I"both")) rw = TRUE;
            if (Str::eq(V, I"brackets")) rw = TRUE;
            if (Str::eq(V, I"characters")) rw = TRUE;
            if (Str::eq(V, I"coloured")) rw = TRUE;
            if (Str::eq(V, I"colouring")) rw = TRUE;
            if (Str::eq(V, I"debug")) rw = TRUE;
            if (Str::eq(V, I"false")) rw = TRUE;
            if (Str::eq(V, I"in")) rw = TRUE;
            if (Str::eq(V, I"instances")) rw = TRUE;
            if (Str::eq(V, I"keyword")) rw = TRUE;
            if (Str::eq(V, I"matches")) rw = TRUE;
            if (Str::eq(V, I"matching")) rw = TRUE;
            if (Str::eq(V, I"not")) rw = TRUE;
            if (Str::eq(V, I"of")) rw = TRUE;
            if (Str::eq(V, I"on")) rw = TRUE;
            if (Str::eq(V, I"optionally")) rw = TRUE;
            if (Str::eq(V, I"prefix")) rw = TRUE;
            if (Str::eq(V, I"runs")) rw = TRUE;
            if (Str::eq(V, I"spaced")) rw = TRUE;
            if (Str::eq(V, I"suffix")) rw = TRUE;
            if (Str::eq(V, I"true")) rw = TRUE;
            if (Str::eq(V, I"unquoted")) rw = TRUE;

            if (rw) {
                TEMPORARY_TEXT(err)
                WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
                Errors::in_text_file_S(err, tfp);
                DISCARD_TEXT(err)
            }
        }
    }
    return V;
}

§18. And regular expressions.

void Languages::regexp(inchar32_t *write_to, text_stream *T, text_file_position *tfp) {
    if (write_to == NULL) internal_error("no buffer");
    write_to[0] = 0;
    if (Str::len(T) > 0) {
        int from = 0, to = Str::len(T)-1, x = 0;
        if ((to > from) &&
            (Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
            from++; to--;
            for (int i=from; i<=to; i++) {
                inchar32_t c = Str::get_at(T, i);
                if (c == '\\') {
                    inchar32_t w = Str::get_at(T, i+1);
                    if (w == '\\') {
                        x = Languages::add_to_regexp(write_to, x, w);
                    } else if (w == 'd') {
                        x = Languages::add_escape_to_regexp(write_to, x, 'd');
                    } else if (w == 't') {
                        x = Languages::add_escape_to_regexp(write_to, x, 't');
                    } else if (w == 's') {
                        x = Languages::add_to_regexp(write_to, x, ' ');
                    } else if (w == 'S') {
                        x = Languages::add_escape_to_regexp(write_to, x, 'C');
                    } else if (w == '"') {
                        x = Languages::add_escape_to_regexp(write_to, x, 'q');
                    } else {
                        x = Languages::add_escape_to_regexp(write_to, x, w);
                    }
                    i++;
                    continue;
                }
                if (c == '.') {
                    x = Languages::add_escape_to_regexp(write_to, x, 'c');
                    continue;
                }
                if (c == '%') {
                    x = Languages::add_escape_to_regexp(write_to, x, '%');
                    continue;
                }
                x = Languages::add_to_regexp(write_to, x, c);
            }
        } else {
            Errors::in_text_file(
                "the expression to match must be in slashes '/'", tfp);
        }
        if (x >= MAX_ILDF_REGEXP_LENGTH)
            Errors::in_text_file(
                "the expression to match is too long", tfp);
    }
}

int Languages::add_to_regexp(inchar32_t *write_to, int i, inchar32_t c) {
    if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
    return i;
}

int Languages::add_escape_to_regexp(inchar32_t *write_to, int i, inchar32_t c) {
    i = Languages::add_to_regexp(write_to, i, '%');
    i = Languages::add_to_regexp(write_to, i, c);
    return i;
}