A simple, general-purpose preprocessor for text files, expanding macros and performing repetitions.


§1. Scanner. Writing a general-purpose preprocessor really is coding like it's 1974, but it turns out to be useful for multiple applications in the Inform project, and saves us having to have dependencies on behemoths like the mighty m4.

To use the preprocessor, call:

Preprocessor::preprocess(from, to, header, special_macros, specifics, encoding)

where from and to are filenames, header is text to place at the top of the file (if any), special_macros is a linked_list of preprocessor_macros set up with special meanings to the situation, and specifics is a general pointer to any data those special meanings need to use. encoding should be one of UTF8_ENC or ISO_ENC.

If to is NULL, we write to standard output instead of a file.

define PROTECTED_OPEN_BRACE_PPCHAR 0x25A0
define PROTECTED_CLOSE_BRACE_PPCHAR 0x25A1
define PROTECTED_BLANK_PPCHAR 0x25A2
linked_list *Preprocessor::preprocess(filename *prototype, filename *F, text_stream *header,
    linked_list *special_macros, general_pointer specifics, inchar32_t comment_char,
    int encoding) {
    struct text_stream processed_file;
    struct text_stream *OUT = &processed_file;
    if (F == NULL) OUT = STDOUT;
    else if (STREAM_OPEN_TO_FILE(&processed_file, F, encoding) == FALSE)
        Errors::fatal_with_file("unable to write tangled file", F);
    WRITE("%S", header);

    preprocessor_state PPS;
    Initialise the preprocessor state1.1;
    TextFiles::read(prototype, FALSE, "can't open prototype file",
        TRUE, Preprocessor::scan_line, NULL, &PPS);
    for (int i=0; i<Str::len(PPS.dest); i++) {
        inchar32_t c = Str::get_at(PPS.dest, i);
        if (c == PROTECTED_OPEN_BRACE_PPCHAR) PUT('{');
        else if (c == PROTECTED_CLOSE_BRACE_PPCHAR) PUT('}');
        else if (c != PROTECTED_BLANK_PPCHAR) PUT(c);
    }
    if (F) STREAM_CLOSE(OUT);
    return PPS.errors;
}

§2. The following imposing-looking set of state data is used as we work through the prototype file line-by-line:

define MAX_PREPROCESSOR_LOOP_DEPTH 8
typedef struct preprocessor_state {
    struct text_stream *dest;
    struct preprocessor_macro *defining; /* a "define" body being scanned */
    int repeat_sp;
    int shadow_sp;
    struct preprocessor_loop repeat_data[MAX_PREPROCESSOR_LOOP_DEPTH];
    int suppress_newline; /* at the end of this line */
    int last_line_was_blank; /* used to suppress runs of multiple blank lines */
    struct preprocessor_variable_set *global_variables;
    struct preprocessor_variable_set *stack_frame;
    struct linked_list *known_macros; /* of |preprocessor_macro| */
    struct general_pointer specifics;
    struct linked_list *errors; /* of |preprocessor_error| */
    inchar32_t comment_character;
} preprocessor_state;

typedef struct preprocessor_loop {
    struct text_stream *loop_var_name;
    struct linked_list *iterations; /* of |text_stream| */
    int repeat_is_block;
    struct text_stream *repeat_saved_dest;
} preprocessor_loop;

§1.1. Initialise the preprocessor state1.1 =

    PPS.dest = Str::new();
    PPS.suppress_newline = FALSE;
    PPS.last_line_was_blank = TRUE;
    PPS.defining = NULL;
    PPS.repeat_sp = 0;
    PPS.shadow_sp = 0;
    PPS.global_variables = Preprocessor::new_variable_set(NULL);
    PPS.stack_frame = PPS.global_variables;
    PPS.known_macros = Preprocessor::list_of_reserved_macros(special_macros);
    PPS.specifics = specifics;
    PPS.errors = NEW_LINKED_LIST(preprocessor_error);
    PPS.comment_character = comment_char;

§3. Conceptually, each loop runs a variable with a given name through a series of textual values in sequence, and we store that data here:

void Preprocessor::set_loop_var_name(preprocessor_loop *loop, text_stream *name) {
    loop->loop_var_name = Str::duplicate(name);
}
void Preprocessor::add_loop_iteration(preprocessor_loop *loop, text_stream *value) {
    ADD_TO_LINKED_LIST(Str::duplicate(value), text_stream, loop->iterations);
}

§4. Lines from the prototype (or sometimes from files spliced in) are read, one at a time, by the following.

Note that define and end-define are not themselves macros, and are handled directly here. So you cannot use repeat loops to define multiple macros with parametrised names: but then, nor should you.

void Preprocessor::scan_line(text_stream *line, text_file_position *tfp, void *X) {
    preprocessor_state *PPS = (preprocessor_state *) X;
    Skip comments4.1;
    Make backslash literals safe4.2;
    Deal with textual definitions of new macros4.3;
    Preprocessor::expand(line, tfp, PPS);
    Sometimes, but only sometimes, output a newline4.4;
}

§4.1. A line is a comment to the preprocessor if its first non-whitespace character is the special comment character: often #, but not necessarily.

Skip comments4.1 =

    LOOP_THROUGH_TEXT(pos, line) {
        inchar32_t c = Str::get(pos);
        if (c == PPS->comment_character) return;
        if (Characters::is_whitespace(c) == FALSE) break;
    }

§4.2. Make backslash literals safe4.2 =

    for (int i = 0; i < Str::len(line); i++) {
        inchar32_t c = Str::get_at(line, i);
        if (c == '\\') {
            inchar32_t d = Str::get_at(line, i+1);
            switch (d) {
                case '{':
                    Str::put_at(line, i, PROTECTED_OPEN_BRACE_PPCHAR);
                    Str::put_at(line, i+1, PROTECTED_BLANK_PPCHAR);
                    break;
                case '}':
                    Str::put_at(line, i, PROTECTED_CLOSE_BRACE_PPCHAR);
                    Str::put_at(line, i+1, PROTECTED_BLANK_PPCHAR);
                    break;
                case '\\':
                    Str::put_at(line, i+1, PROTECTED_BLANK_PPCHAR);
                    break;
                case 'n':
                    Str::put_at(line, i, '\n');
                    Str::put_at(line, i+1, PROTECTED_BLANK_PPCHAR);
                    break;
                case 't':
                    Str::put_at(line, i, '\t');
                    Str::put_at(line, i+1, PROTECTED_BLANK_PPCHAR);
                    break;
                case ' ': case '\t': case '\n': case '\r': case 0: break;
                default:
                    Preprocessor::error(PPS, tfp,
                        I"backslash '\\' must be followed by 'n', 't', '{', '}' or '\\'");
                    break;
            }
        }
    }

§4.3. Deal with textual definitions of new macros4.3 =

    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, line, U" *{define: *(%C+) *} *")) Begin a bare definition4.3.1;
    if (Regexp::match(&mr, line, U" *{define: *(%C+) (%c*)} *")) Begin a definition4.3.2;
    if (Regexp::match(&mr, line, U" *{end-define} *")) End a definition4.3.4;
    if (PPS->defining) Continue a definition4.3.3;
    Regexp::dispose_of(&mr);

§4.3.1. Begin a bare definition4.3.1 =

    if (PPS->defining)
        Preprocessor::error(PPS, tfp, I"nested definitions are not allowed");
    text_stream *name = mr.exp[0];
    text_stream *parameter_specification = Str::new();
    PPS->defining = Preprocessor::new_internal_macro(PPS, PPS->known_macros, name,
        parameter_specification, Preprocessor::default_expander, tfp);
    Regexp::dispose_of(&mr);
    return;

§4.3.2. Begin a definition4.3.2 =

    if (PPS->defining)
        Preprocessor::error(PPS, tfp, I"nested definitions are not allowed");
    text_stream *name = mr.exp[0];
    text_stream *parameter_specification = mr.exp[1];
    PPS->defining = Preprocessor::new_internal_macro(PPS, PPS->known_macros, name,
        parameter_specification, Preprocessor::default_expander, tfp);
    Regexp::dispose_of(&mr);
    return;

§4.3.3. Continue a definition4.3.3 =

    Preprocessor::add_line_to_macro(PPS, PPS->defining, line, tfp);
    Regexp::dispose_of(&mr);
    return;

§4.3.4. End a definition4.3.4 =

    if (PPS->defining == NULL)
        Preprocessor::error(PPS, tfp, I"{end-define} without {define: ...}");
    PPS->defining = NULL;
    Regexp::dispose_of(&mr);
    return;

§4.4. Sometimes, but only sometimes, output a newline4.4 =

    if (PPS->suppress_newline == FALSE) {
        text_stream *OUT = PPS->dest;
        if (Str::len(line) == 0) {
            if (PPS->last_line_was_blank == FALSE) WRITE("\n");
            PPS->last_line_was_blank = TRUE;
        } else {
            PPS->last_line_was_blank = FALSE;
            WRITE("\n");
        }
    }
    PPS->suppress_newline = FALSE;

§5. The expander works on material fed to it which:

  1. Does not contain any newlines;
  2. Contains braces { ... } used in nested pairs (unless there is a syntax error in the prototype, in which case we must complain).

The idea is to pass everything straight through except any braced matter, which needs special attention.

void Preprocessor::expand(text_stream *text, text_file_position *tfp, preprocessor_state *PPS) {
    TEMPORARY_TEXT(before_matter)
    TEMPORARY_TEXT(braced_matter)
    TEMPORARY_TEXT(after_matter)
    int bl = 0, after_times = FALSE;
    for (int i = 0; i < Str::len(text); i++) {
        inchar32_t c = Str::get_at(text, i);
        if (after_times) PUT_TO(after_matter, c);
        else if (c == '{') {
            bl++;
            if (bl > 1) PUT_TO(braced_matter, c);
        } else if (c == '}') {
            bl--;
            if (bl == 0) after_times = TRUE;
            else PUT_TO(braced_matter, c);
        } else {
            if (bl < 0) Preprocessor::error(PPS, tfp, I"too many '}'s");
            if (bl == 0) PUT_TO(before_matter, c);
            else PUT_TO(braced_matter, c);
        }
    }
    if (bl > 0) Preprocessor::error(PPS, tfp, I"too many '{'s");
    if (after_times) {
        Expand braced matter5.1;
    } else {
        WRITE_TO(PPS->dest, "%S", text);
    }
    DISCARD_TEXT(before_matter)
    DISCARD_TEXT(braced_matter)
    DISCARD_TEXT(after_matter)
}

§5.1. Suppose we are expanding the text this {ADJECTIVE} ocean {BEHAVIOUR}: then the before_matter will be this , the braced_matter will be ADJECTIVE, and the after_matter will be ocean {BEHAVIOUR}.

Expand braced matter5.1 =

    if (Preprocessor::acceptable_variable_name(braced_matter)) {
        Expand a variable name5.1.2;
    } else {
        text_stream *identifier = braced_matter;
        text_stream *parameter_settings = NULL;
        match_results mr = Regexp::create_mr();
        if (Regexp::match(&mr, identifier, U"(%C+)(: %c*)")) {
            identifier = mr.exp[0];
            parameter_settings = mr.exp[1];
        } else if (Regexp::match(&mr, identifier, U"(%C+) (%c*)")) {
            identifier = mr.exp[0];
            parameter_settings = mr.exp[1];
        }
        Work out which macro identifier is meant by a loop name5.1.1;

        preprocessor_macro *mm = Preprocessor::find_macro(PPS->known_macros, identifier);
        if (mm == NULL) {
            TEMPORARY_TEXT(erm)
            WRITE_TO(erm, "unknown macro '%S'", identifier);
            Preprocessor::error(PPS, tfp, erm);
            DISCARD_TEXT(erm)
        } else {
            Expand a macro5.1.3;
        }
        Regexp::dispose_of(&mr);
    }

§5.1.1. So, for example, the identifier repeat would be changed here either to repeat-block or repeat-span: see above for an explanation.

Work out which macro identifier is meant by a loop name5.1.1 =

    preprocessor_macro *loop_mm;
    LOOP_OVER_LINKED_LIST(loop_mm, preprocessor_macro, PPS->known_macros)
        if (Str::len(loop_mm->loop_name) > 0) {
            if (Str::eq(identifier, loop_mm->loop_name)) {
                if (Str::is_whitespace(after_matter)) {
                    if ((loop_mm->span == FALSE) && (loop_mm->begins_loop))
                        identifier = loop_mm->identifier;
                } else {
                    if ((loop_mm->span) && (loop_mm->begins_loop))
                        identifier = loop_mm->identifier;
                }
            }
            TEMPORARY_TEXT(end_name)
            WRITE_TO(end_name, "end-%S", loop_mm->loop_name);
            if (Str::eq(identifier, end_name)) {
                if ((PPS->repeat_sp > 0) &&
                    (PPS->repeat_data[PPS->repeat_sp-1].repeat_is_block)) {
                    if ((loop_mm->span == FALSE) && (loop_mm->ends_loop))
                        identifier = loop_mm->identifier;
                } else {
                    if ((loop_mm->span) && (loop_mm->ends_loop))
                        identifier = loop_mm->identifier;
                }
            }
            DISCARD_TEXT(end_name)
        }

§5.1.2. Note that if we are inside a loop, we do not perform expansion on the variable name, and instead pass it through unchanged -- still as, say, {NAME}. This is because it won't be expanded until later, when the expander reaches the end of the loop body.

Expand a variable name5.1.2 =

    Preprocessor::expand(before_matter, tfp, PPS);
    if (PPS->repeat_sp > 0) {
        WRITE_TO(PPS->dest, "{%S}", braced_matter);
    } else {
        Definitely expand a variable name5.1.2.1;
    }
    Preprocessor::expand(after_matter, tfp, PPS);

§5.1.3. Similarly, we don't expand macros inside the body of a loop, except that we need to expand the {end-repeat-block} (or similar) which closes that loop body, so that we can escape back into normal mode. Because loop constructs may be nested, we need to react to (but not expand) loop openings, too. The "shadow stack pointer" shows how deep we are inside these shadowy, not-yet-acted-on, loops.

Expand a macro5.1.3 =

    if (mm->suppress_whitespace_when_expanding) {
        while (Characters::is_whitespace(Str::get_last_char(before_matter)))
            Str::delete_last_character(before_matter);
        while (Characters::is_whitespace(Str::get_first_char(after_matter)))
            Str::delete_first_character(after_matter);
    }
    Preprocessor::expand(before_matter, tfp, PPS);
    int divert_if_repeating = TRUE;
    if ((mm) && (mm->begins_loop)) {
        PPS->shadow_sp++;
    }
    if ((mm) && (mm->ends_loop)) {
        PPS->shadow_sp--;
        if (PPS->shadow_sp == 0) divert_if_repeating = FALSE;
    }

    if ((divert_if_repeating) && (PPS->repeat_sp > 0)) {
        WRITE_TO(PPS->dest, "{%S}", braced_matter);
    } else {
        Definitely expand a macro5.1.3.1;
        if (mm->suppress_newline_after_expanding) PPS->suppress_newline = TRUE;
    }
    Preprocessor::expand(after_matter, tfp, PPS);

§5.1.2.1. We can now forget about the before_matter, the after_matter, or whether we ought not to expand after all: that's all taken care of. A variable expands to its value:

Definitely expand a variable name5.1.2.1 =

    preprocessor_variable *var =
        Preprocessor::find_variable(braced_matter, PPS->stack_frame);
    if (var) {
        WRITE_TO(PPS->dest, "%S", Preprocessor::read_variable(var));
    } else {
        TEMPORARY_TEXT(erm)
        WRITE_TO(erm, "unknown variable '%S'", braced_matter);
        Preprocessor::error(PPS, tfp, erm);
        DISCARD_TEXT(erm)
    }

§5.1.3.1. This looks fussy, but really it delegates the work by calling a function attached to the macro, the expander.

Definitely expand a macro5.1.3.1 =

    text_stream *parameter_values[MAX_PP_MACRO_PARAMETERS];
    for (int i=0; i<MAX_PP_MACRO_PARAMETERS; i++) parameter_values[i] = NULL;
    Parse the parameters supplied5.1.3.1.1;
    Check that all compulsory parameters have been supplied5.1.3.1.2;

    preprocessor_loop *loop = NULL;
    if (mm->begins_loop) Initialise repetition data for the loop5.1.3.1.3;

    (*(mm->expander))(mm, PPS, parameter_values, loop, tfp);

§5.1.3.1.1. Note that textual values of the parameters are themselves expanded before use: they might contain variables, or even macros. Parameter names are not. So you can have in: {WHATEVER} but not {WHATEVER}: this.

Parse the parameters supplied5.1.3.1.1 =

    match_results mr = Regexp::create_mr();
    while (Regexp::match(&mr, parameter_settings, U" *(%C*): *(%c*)")) {
        text_stream *setting = mr.exp[0];
        text_stream *value = mr.exp[1];
        text_stream *remainder = NULL;
        match_results mr3 = Regexp::create_mr();
        if (Regexp::match(&mr3, value, U"(%c+?) *(%C+:[^/]%c*)")) {
            value = mr3.exp[0];
            remainder = mr3.exp[1];
        }
        int found = FALSE;
        for (int i=0; i<mm->no_parameters; i++)
            if (Str::eq(setting, mm->parameters[i]->name)) {
                found = TRUE;
                parameter_values[i] = Str::new();
                text_stream *saved = PPS->dest;
                PPS->dest = parameter_values[i];
                Preprocessor::expand(value, tfp, PPS);
                PPS->dest = saved;
            }
        if (found == FALSE) {
            TEMPORARY_TEXT(erm)
            WRITE_TO(erm, "unknown parameter '%S:' of '%S'", setting, mm->identifier);
            Preprocessor::error(PPS, tfp, erm);
            DISCARD_TEXT(erm)
        }
        Str::clear(parameter_settings);
        Str::copy(parameter_settings, remainder);
        Regexp::dispose_of(&mr3);
    }
    Regexp::dispose_of(&mr);
    if (Str::is_whitespace(parameter_settings) == FALSE)
        Preprocessor::error(PPS, tfp, I"parameter list is malformed");

§5.1.3.1.2. Check that all compulsory parameters have been supplied5.1.3.1.2 =

    for (int i=0; i<mm->no_parameters; i++)
        if (parameter_values[i] == NULL)
            if (mm->parameters[i]->optional == FALSE) {
                TEMPORARY_TEXT(erm)
                WRITE_TO(erm, "compulsory parameter '%S:' of '%S' not given",
                    mm->parameters[i]->name, mm->identifier);
                Preprocessor::error(PPS, tfp, erm);
                DISCARD_TEXT(erm)
            }

§5.1.3.1.3. The following code is a little misleading. At present, PPS->repeat_sp is always either 0 or 1, no matter how deep loop nesting is: but that's just an artefact of the current scanning algorithm, which might some day change.

Initialise repetition data for the loop5.1.3.1.3 =

    if (PPS->repeat_sp >= MAX_PREPROCESSOR_LOOP_DEPTH) {
        Preprocessor::error(PPS, tfp, I"repetition too deep");
    } else {
        loop = &(PPS->repeat_data[PPS->repeat_sp++]);
        PPS->shadow_sp = 1;
        Preprocessor::set_loop_var_name(loop, I"NAME");
        loop->iterations = NEW_LINKED_LIST(text_stream);
        loop->repeat_is_block = TRUE;
        if (mm->span) loop->repeat_is_block = FALSE;
        loop->repeat_saved_dest = PPS->dest;
        PPS->dest = Str::new();
    }

§6. Errors.

typedef struct preprocessor_error {
    struct text_stream *message;
    struct text_file_position at;
    CLASS_DEFINITION
} preprocessor_error;

void Preprocessor::error(preprocessor_state *PPS, text_file_position *tfp, text_stream *text) {
    if (PPS) {
        preprocessor_error *err = CREATE(preprocessor_error);
        err->message = Str::duplicate(text);
        err->at = *tfp;
        ADD_TO_LINKED_LIST(err, preprocessor_error, PPS->errors);
    } else {
        Errors::in_text_file_S(text, tfp);
    }
}

§7. Variables. Names of variables should conform to:

int Preprocessor::acceptable_variable_name(text_stream *name) {
    LOOP_THROUGH_TEXT(pos, name) {
        inchar32_t c = Str::get(pos);
        if ((c >= '0') && (c <= '9')) continue;
        if ((c >= 'A') && (c <= 'Z')) continue;
        if (c == '_') continue;
        return FALSE;
    }
    return TRUE;
}

§8. Variables are all textual:

typedef struct preprocessor_variable {
    struct text_stream *name;
    struct text_stream *value;
    CLASS_DEFINITION
} preprocessor_variable;

text_stream *Preprocessor::read_variable(preprocessor_variable *var) {
    if (var == NULL) internal_error("no such pp variable");
    return var->value;
}
void Preprocessor::write_variable(preprocessor_variable *var, text_stream *val) {
    if (var == NULL) internal_error("no such pp variable");
    var->value = Str::duplicate(val);
}

§9. Each variable belongs to a single "set". If EXAMPLE has one meaning outside a definition and another insider, that's two variables with a common name, not one variable belonging to two sets at once.

typedef struct preprocessor_variable_set {
    struct linked_list *variables; /* of |preprocessor_variable| */
    struct preprocessor_variable_set *outer;
    CLASS_DEFINITION
} preprocessor_variable_set;

preprocessor_variable_set *Preprocessor::new_variable_set(preprocessor_variable_set *outer) {
    preprocessor_variable_set *set = CREATE(preprocessor_variable_set);
    set->variables = NEW_LINKED_LIST(preprocessor_variable);
    set->outer = outer;
    return set;
}

preprocessor_variable *Preprocessor::find_variable_in_one(text_stream *name,
    preprocessor_variable_set *set) {
    if (set == NULL) return NULL;
    preprocessor_variable *var;
    LOOP_OVER_LINKED_LIST(var, preprocessor_variable, set->variables)
        if (Str::eq(name, var->name))
            return var;
    return NULL;
}

preprocessor_variable *Preprocessor::find_variable(text_stream *name,
    preprocessor_variable_set *set) {
    while (set) {
        preprocessor_variable *var = Preprocessor::find_variable_in_one(name, set);
        if (var) return var;
        set = set->outer;
    }
    return NULL;
}

§10. This creates a variable if it doesn't already exist in the given set. (If it exists in some outer set, that doesn't count.)

preprocessor_variable *Preprocessor::ensure_variable(text_stream *name,
    preprocessor_variable_set *in_set) {
    if (in_set == NULL) internal_error("variable without set");
    preprocessor_variable *var = Preprocessor::find_variable_in_one(name, in_set);
    if (var == NULL) {
        var = CREATE(preprocessor_variable);
        var->name = Str::duplicate(name);
        Preprocessor::write_variable(var, I"");
        ADD_TO_LINKED_LIST(var, preprocessor_variable, in_set->variables);
    }
    return var;
}

§11. Macros. For the most part, each macro seen by users corresponds to a single preprocessor_macro, but loop constructs are an exception. When the user types {repeat ...}, this is a reference to repeat-block if the body of what to repeat occupies multiple lines, but to repeat-span if only one.

For example, the first repeat loop here uses the macros repeat-block and end-repeat-block, and the second uses repeat-span and end-repeat-span.

    {repeat with SEA in Black, Caspian}
    Welcome to the {SEA} Sea.
    {end-repeat}
    ...
    Seas available:{repeat with SEA in Sargasso, Libyan} {SEA} Sea;{end-repeat}

§12. There are (for now, anyway) hard but harmlessly large limits on the number of parameters and the length of a macro:

define MAX_PP_MACRO_PARAMETERS 8
define MAX_PP_MACRO_LINES 128
typedef struct preprocessor_macro {
    /* syntax */
    struct text_stream *identifier;
    struct preprocessor_macro_parameter *parameters[MAX_PP_MACRO_PARAMETERS];
    int no_parameters;

    /* meaning */
    struct text_stream *lines[MAX_PP_MACRO_LINES];
    int no_lines;
    void (*expander)(struct preprocessor_macro *, struct preprocessor_state *, struct text_stream **, struct preprocessor_loop *, struct text_file_position *);

    /* loop construct if any */
    int begins_loop;               /* |TRUE| for e.g. |repeat-block| or |repeat-span| */
    int ends_loop;                 /* |TRUE| for e.g. |end-repeat-block| */
    struct text_stream *loop_name; /* e.g. |repeat| */
    int span;                      /* |TRUE| for e.g. |end-repeat-span| or |repeat-span| */

    /* textual behaviour */
    int suppress_newline_after_expanding;
    int suppress_whitespace_when_expanding;

    CLASS_DEFINITION
} preprocessor_macro;

typedef struct preprocessor_macro_parameter {
    struct text_stream *name;
    struct text_stream *definition_token;
    int optional;
    CLASS_DEFINITION
} preprocessor_macro_parameter;

§13. The following creates a new macro and adds it to the list L. By default, it has an empty definition (i.e., no lines), but may have a meaning provided by its expander function regardless. The parameter_specification is as in the textual declaration: for example, in: IN ?towards: WAY would be valid, with in being compulsory and towards optional when the macro is used.

If we expected 10000 macros, a dictionary would be better than a list. But in fact we expect more like 10.

preprocessor_macro *Preprocessor::new_macro(linked_list *L, text_stream *name,
    text_stream *parameter_specification,
    void (*expander)(preprocessor_macro *, preprocessor_state *, text_stream **, preprocessor_loop *, text_file_position *),
    text_file_position *tfp) {
    return Preprocessor::new_internal_macro(NULL, L, name, parameter_specification,
        expander, tfp);
}

preprocessor_macro *Preprocessor::new_internal_macro(preprocessor_state *PPS,
    linked_list *L, text_stream *name, text_stream *parameter_specification,
    void (*expander)(preprocessor_macro *, preprocessor_state *, text_stream **, preprocessor_loop *, text_file_position *),
    text_file_position *tfp) {
    if (Preprocessor::find_macro(L, name))
        Preprocessor::error(PPS, tfp, I"a macro with this name already exists");
    preprocessor_macro *new_macro = CREATE(preprocessor_macro);
    Initialise the macro13.1;
    Parse the parameter list13.2;
    ADD_TO_LINKED_LIST(new_macro, preprocessor_macro, L);
    return new_macro;
}

§13.1. Initialise the macro13.1 =

    new_macro->identifier = Str::duplicate(name);
    new_macro->no_parameters = 0;

    new_macro->no_lines = 0;
    new_macro->expander = expander;
    new_macro->begins_loop = FALSE;
    new_macro->ends_loop = FALSE;
    new_macro->loop_name = NULL;
    new_macro->span = FALSE;

    new_macro->suppress_newline_after_expanding = TRUE;
    new_macro->suppress_whitespace_when_expanding = TRUE;

§13.2. Parse the parameter list13.2 =

    text_stream *spec = Str::duplicate(parameter_specification);
    match_results mr = Regexp::create_mr();
    while (Regexp::match(&mr, spec, U" *(%C*): *(%C+) *(%c*)")) {
        text_stream *par_name = mr.exp[0];
        text_stream *token_name = mr.exp[1];
        Str::clear(spec);
        Str::copy(spec, mr.exp[2]);
        if (new_macro->no_parameters >= MAX_PP_MACRO_PARAMETERS) {
            Preprocessor::error(PPS, tfp, I"too many parameters in this definition");
        } else {
            Add parameter to macro13.2.1;
        }
    }
    Regexp::dispose_of(&mr);
    if (Str::is_whitespace(spec) == FALSE)
        Preprocessor::error(PPS, tfp, I"parameter list for this definition is malformed");

§13.2.1. Add parameter to macro13.2.1 =

    preprocessor_macro_parameter *new_parameter = CREATE(preprocessor_macro_parameter);
    new_parameter->name = Str::duplicate(par_name);
    new_parameter->definition_token = Str::duplicate(token_name);
    new_parameter->optional = FALSE;
    if (Str::get_first_char(new_parameter->name) == '?') {
        new_parameter->optional = TRUE;
        Str::delete_first_character(new_parameter->name);
    }
    new_macro->parameters[new_macro->no_parameters++] = new_parameter;

§14. We can then add lines to a macro (though this will only have an effect if its expander function is Preprocessor::default_expander).

void Preprocessor::add_line_to_macro(preprocessor_state *PPS, preprocessor_macro *mm,
    text_stream *line, text_file_position *tfp) {
    if (mm->no_lines >= MAX_PP_MACRO_LINES) {
        Preprocessor::error(PPS, tfp, I"too many lines in this definition");
    } else {
        mm->lines[mm->no_lines++] = Str::duplicate(line);
    }
}

§15. Reserved macros. A few macros are "reserved", that is, have built-in meanings, and use expander functions other than Preprocessor::default_expander.

Some of these, the special_macros, are supplied by the code calling the preprocessor. Those will provide domain-specific functionality. But a few are built in here and therefore work in every domain:

linked_list *Preprocessor::list_of_reserved_macros(linked_list *special_macros) {
    linked_list *L = NEW_LINKED_LIST(preprocessor_macro);
    Preprocessor::new_loop_macro(L, I"repeat", I"with: WITH in: IN",
        Preprocessor::repeat_expander, NULL);
    Preprocessor::new_macro(L, I"set", I"name: NAME value: VALUE",
        Preprocessor::set_expander, NULL);

    preprocessor_macro *mm;
    LOOP_OVER_LINKED_LIST(mm, preprocessor_macro, special_macros)
        ADD_TO_LINKED_LIST(mm, preprocessor_macro, L);
    return L;
}

void Preprocessor::do_not_suppress_whitespace(preprocessor_macro *mm) {
    mm->suppress_newline_after_expanding = FALSE;
    mm->suppress_whitespace_when_expanding = FALSE;
}

void Preprocessor::new_loop_macro(linked_list *L, text_stream *name,
    text_stream *parameter_specification,
    void (*expander)(preprocessor_macro *, preprocessor_state *, text_stream **, preprocessor_loop *, text_file_position *),
    text_file_position *tfp) {
    TEMPORARY_TEXT(subname)

    WRITE_TO(subname, "%S-block", name);
    preprocessor_macro *mm = Preprocessor::new_macro(L, subname, parameter_specification, expander, tfp);
    mm->begins_loop = TRUE;
    mm->loop_name = Str::duplicate(name);

    Str::clear(subname);
    WRITE_TO(subname, "end-%S-block", name);
    mm = Preprocessor::new_macro(L, subname, NULL, Preprocessor::end_loop_expander, tfp);
    mm->ends_loop = TRUE;
    mm->loop_name = Str::duplicate(name);

    Str::clear(subname);
    WRITE_TO(subname, "%S-span", name);
    mm = Preprocessor::new_macro(L, subname, parameter_specification, expander, tfp);
    mm->begins_loop = TRUE;
    mm->loop_name = Str::duplicate(name);
    mm->span = TRUE;
    Preprocessor::do_not_suppress_whitespace(mm);

    Str::clear(subname);
    WRITE_TO(subname, "end-%S-span", name);
    mm = Preprocessor::new_macro(L, subname, NULL, Preprocessor::end_loop_expander, tfp);
    mm->ends_loop = TRUE;
    mm->loop_name = Str::duplicate(name);
    mm->span = TRUE;
    Preprocessor::do_not_suppress_whitespace(mm);

    DISCARD_TEXT(subname)
}

§16. Finding a macro in a list:

preprocessor_macro *Preprocessor::find_macro(linked_list *L, text_stream *name) {
    preprocessor_macro *mm;
    LOOP_OVER_LINKED_LIST(mm, preprocessor_macro, L)
        if (Str::eq(mm->identifier, name))
            return mm;
    return NULL;
}

§17. The expander for user-defined macros. All macros created by {define: ...} are expanded by the following function. It creates a local "stack frame" making the parameters available as variables, then runs the definition lines through the scanner, then dismantles the stack frame again.

void Preprocessor::default_expander(preprocessor_macro *mm, preprocessor_state *PPS,
    text_stream **parameter_values, preprocessor_loop *loop, text_file_position *tfp) {
    PPS->stack_frame = Preprocessor::new_variable_set(PPS->stack_frame);
    for (int i=0; i<mm->no_parameters; i++) {
        preprocessor_variable *var =
            Preprocessor::ensure_variable(mm->parameters[i]->definition_token, PPS->stack_frame);
        Preprocessor::write_variable(var, parameter_values[i]);
    }
    for (int i=0; i<mm->no_lines; i++)
        Preprocessor::scan_line(mm->lines[i], tfp, (void *) PPS);
    PPS->stack_frame = PPS->stack_frame->outer;
}

§18. The set expander. An easy one.

void Preprocessor::set_expander(preprocessor_macro *mm, preprocessor_state *PPS,
    text_stream **parameter_values, preprocessor_loop *loop, text_file_position *tfp) {
    text_stream *name = parameter_values[0];
    text_stream *value = parameter_values[1];

    if (Preprocessor::acceptable_variable_name(name) == FALSE)
        Preprocessor::error(PPS, tfp, I"improper variable name");

    preprocessor_variable *var = Preprocessor::ensure_variable(name, PPS->stack_frame);
    Preprocessor::write_variable(var, value);
}

§19. The repeat expander.

void Preprocessor::repeat_expander(preprocessor_macro *mm, preprocessor_state *PPS,
    text_stream **parameter_values, preprocessor_loop *loop, text_file_position *tfp) {
    text_stream *with = parameter_values[0];
    text_stream *in = parameter_values[1];
    Preprocessor::set_loop_var_name(loop, with);
    match_results mr = Regexp::create_mr();
    while (Regexp::match(&mr, in, U"(%c*?),(%c*)")) {
        text_stream *value = mr.exp[0];
        Str::trim_white_space(value);
        Preprocessor::add_loop_iteration(loop, value);
        Str::clear(in);
        Str::copy(in, mr.exp[1]);
    }
    Regexp::dispose_of(&mr);
    text_stream *value = in;
    Str::trim_white_space(value);
    Preprocessor::add_loop_iteration(loop, value);
}

§20. The expander used for all loop ends. The macros which open a loop just store up the name of the variable and the range of its values: otherwise, they do nothing. It's only when the end of a loop is reached that any expansion happens, and this is where.

We create a new stack frame inside the current one, and put the loop variable into it. Then we run through the iteration values, setting the variable to each in turn, and expand the material.

void Preprocessor::end_loop_expander(preprocessor_macro *mm, preprocessor_state *PPS,
    text_stream **parameter_values, preprocessor_loop *loop, text_file_position *tfp) {
    PPS->shadow_sp = 0;
    if (PPS->repeat_sp == 0) Preprocessor::error(PPS, tfp, I"{end-repeat} without {repeat}");
    else {
        preprocessor_loop *loop = &(PPS->repeat_data[--(PPS->repeat_sp)]);
        text_stream *matter = PPS->dest;
        PPS->dest = loop->repeat_saved_dest;
        PPS->stack_frame = Preprocessor::new_variable_set(PPS->stack_frame);
        preprocessor_variable *loop_var =
            Preprocessor::ensure_variable(loop->loop_var_name, PPS->stack_frame);
        text_stream *value;
        LOOP_OVER_LINKED_LIST(value, text_stream, loop->iterations)
            Iterate with this value20.1;
        PPS->stack_frame = PPS->stack_frame->outer;
    }
}

§20.1. Iterate with this value20.1 =

    Preprocessor::write_variable(loop_var, value);
    if (mm->span) {
        Preprocessor::expand(matter, tfp, PPS);
    } else {
        TEMPORARY_TEXT(line)
        LOOP_THROUGH_TEXT(pos, matter) {
            if (Str::get(pos) == '\n') {
                Preprocessor::scan_line(line, tfp, (void *) PPS);
                Str::clear(line);
            } else {
                PUT_TO(line, Str::get(pos));
            }
        }
        DISCARD_TEXT(line)
    }