To parse pipelines from text files.


§1. How pipelines are stored. An inter_pipeline is a linked list of //pipeline_step//s, together with some associated storage used when it runs: this is for storing variables and pointers to trees being worked on.

typedef struct inter_pipeline {
    struct linked_list *steps;  of pipeline_step
    struct dictionary *variables;
    int erroneous;  a syntax error occurred when parsing this
    struct pipeline_ephemera ephemera;  temporary storage when running
    struct linked_list *search_list;  used when parsing only
    struct pathname *local;
    int run_depth;
    CLASS_DEFINITION
} inter_pipeline;

inter_pipeline *ParsingPipelines::new_pipeline(dictionary *D, linked_list *L, pathname *local) {
    inter_pipeline *S = CREATE(inter_pipeline);
    S->variables = D;
    S->steps = NEW_LINKED_LIST(pipeline_step);
    S->erroneous = FALSE;
    S->search_list = L;
    S->local = local;
    S->run_depth = 0;
    RunningPipelines::clean_pipeline(S);
    return S;
}

§2. A pipeline_step is really only a choice of pipeline_stage, but comes along with a wide variety of options and parameter settings, so that it looks much more complicated than it actually is.

typedef struct pipeline_step {
    struct inter_pipeline *pipeline;
    struct pipeline_stage *step_stage;
    struct text_stream *step_argument;
    struct code_generator *generator_argument;
    int take_generator_argument_from_VM;
    struct text_stream *package_URL_argument;
    int tree_argument;
    struct pipeline_step_ephemera ephemera;  temporary storage when running
    CLASS_DEFINITION
} pipeline_step;

pipeline_step *ParsingPipelines::new_step(inter_pipeline *pipeline) {
    pipeline_step *step = CREATE(pipeline_step);
    step->pipeline = pipeline;
    step->step_stage = NULL;
    step->step_argument = NULL;
    step->package_URL_argument = NULL;
    step->tree_argument = 0;
    step->generator_argument = NULL;
    step->take_generator_argument_from_VM = FALSE;
    RunningPipelines::clean_step(step);
    return step;
}

§3. And a pipeline_stage is simply a choice of what to do. For example, eliminate-redundant-labels is a pipeline stage. This would need to be combined with details of what tree to apply to in order to become a step.

enum NO_STAGE_ARG from 1
enum GENERAL_STAGE_ARG
enum FILE_STAGE_ARG
enum TEXT_OUT_STAGE_ARG
enum OPTIONAL_TEXT_OUT_STAGE_ARG
enum EXT_FILE_STAGE_ARG
enum EXT_TEXT_OUT_STAGE_ARG
enum KIT_STAGE_ARG
typedef struct pipeline_stage {
    struct text_stream *stage_name;
    int (*execute)(void *);
    int stage_arg;  one of the *_ARG values above
    int takes_tree;
    CLASS_DEFINITION
} pipeline_stage;

pipeline_stage *ParsingPipelines::new_stage(text_stream *name,
    int (*X)(struct pipeline_step *), int arg, int tr) {
    pipeline_stage *stage = CREATE(pipeline_stage);
    stage->stage_name = Str::duplicate(name);
    stage->execute = (int (*)(void *)) X;
    stage->stage_arg = arg;
    stage->takes_tree = tr;
    return stage;
}

§4. Lumping some of those argument types together:

int ParsingPipelines::will_read_a_file(pipeline_step *step) {
    if ((step->step_stage->stage_arg == FILE_STAGE_ARG) ||
        (step->step_stage->stage_arg == EXT_FILE_STAGE_ARG)) return TRUE;
    return FALSE;
}

int ParsingPipelines::will_write_a_file(pipeline_step *step) {
    if ((step->step_stage->stage_arg == TEXT_OUT_STAGE_ARG) ||
        (step->step_stage->stage_arg == OPTIONAL_TEXT_OUT_STAGE_ARG) ||
        (step->step_stage->stage_arg == EXT_TEXT_OUT_STAGE_ARG)) return TRUE;
    return FALSE;
}

§5. Parsing. All pipelines originate as textual descriptions, either from a text file or supplied on the command line. Here, we turn such a description — in effect a program for a very simple programming language — into an inter_pipeline.

inter_pipeline *ParsingPipelines::from_file(filename *F, dictionary *D,
    linked_list *search_list) {
    inter_pipeline *S = ParsingPipelines::new_pipeline(D, search_list, Filenames::up(F));
    TextFiles::read(F, FALSE, "can't open inter pipeline file",
        TRUE, ParsingPipelines::scan_line, NULL, (void *) S);
    if (S->erroneous) return NULL;
    return S;
}

void ParsingPipelines::scan_line(text_stream *line, text_file_position *tfp, void *X) {
    inter_pipeline *S = (inter_pipeline *) X;
    ParsingPipelines::parse_line(S, line, tfp);
}

inter_pipeline *ParsingPipelines::from_text(text_stream *instructions, dictionary *D) {
    inter_pipeline *S = ParsingPipelines::new_pipeline(D, NULL, NULL);
    ParsingPipelines::parse_line(S, instructions, NULL);
    if (S->erroneous) return NULL;
    return S;
}

§6. Either way, then, a sequence of 1 or more textual lines of description is passed to the following. It breaks down the line into 1 or more instructions, divided by commas.

void ParsingPipelines::parse_line(inter_pipeline *pipeline, text_stream *instructions,
    text_file_position *tfp) {
    TEMPORARY_TEXT(T)
    LOOP_THROUGH_TEXT(P, instructions)
        if (Characters::is_babel_whitespace(Str::get(P)))
            PUT_TO(T, ' ');
        else
            PUT_TO(T, Str::get(P));
    match_results mr = Regexp::create_mr();
    while (Regexp::match(&mr, T, U" *(%c+?) *,+ *(%c*?) *")) {
        ParsingPipelines::parse_instruction(pipeline, mr.exp[0], tfp);
        Str::copy(T, mr.exp[1]);
    }
    if (Regexp::match(&mr, T, U" *(%c+?) *"))
        ParsingPipelines::parse_instruction(pipeline, mr.exp[0], tfp);
    Regexp::dispose_of(&mr);
    DISCARD_TEXT(T)
}

§7. Instructions are mostly steps, but:

void ParsingPipelines::parse_instruction(inter_pipeline *pipeline, text_stream *T,
    text_file_position *tfp) {
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, T, U"!%c*")) {
        ;
    } else if (Regexp::match(&mr, T, U"run pipeline (%c*)")) {
        filename *F = NULL;
        #ifdef SUPERVISOR_MODULE
        F = InterSkill::filename_of_pipeline(mr.exp[0], pipeline->search_list);
        #endif
        if (F == NULL) {
            text_stream *leafname = Str::new();
            WRITE_TO(leafname, "%S.interpipeline", mr.exp[0]);
            F = Filenames::in(pipeline->local, leafname);
        }
        if (F == NULL) {
            PipelineErrors::syntax_with(tfp, T,
                "unable to find the pipeline '%S'", mr.exp[0]);
            pipeline->erroneous = TRUE;
        } else {
            if (pipeline->run_depth++ > 100) {
                PipelineErrors::syntax_with(tfp, T,
                    "pipeline seems to have become circular: '%S'", mr.exp[0]);
                pipeline->erroneous = TRUE;
            } else {
                TextFiles::read(F, FALSE, "can't open inter pipeline file",
                    TRUE, ParsingPipelines::scan_line, NULL, (void *) pipeline);
            }
            pipeline->run_depth--;
        }
    } else {
        pipeline_step *ST = ParsingPipelines::parse_step(pipeline, T, tfp);
        if (ST) ADD_TO_LINKED_LIST(ST, pipeline_step, pipeline->steps);
        else pipeline->erroneous = TRUE;
    }
    Regexp::dispose_of(&mr);
}

§8. Finally, an individual textual description S of a step is turned into a pipeline_step.

For documentation on the syntax here, see Pipelines and Stages (in inter).

pipeline_step *ParsingPipelines::parse_step(inter_pipeline *pipeline, text_stream *S,
    text_file_position *tfp) {
    dictionary *D = pipeline->variables;
    pipeline_step *step = ParsingPipelines::new_step(pipeline);
    text_stream *syntax = Str::duplicate(S);
    match_results mr = Regexp::create_mr();

    int allow_unknown = FALSE;
    if (Regexp::match(&mr, S, U"optionally-%c+")) allow_unknown = TRUE;

    int left_arrow_used = FALSE;
    if (Regexp::match(&mr, S,      U"(%c+?) *<- *(%c*)"))       Left arrow notation8.1
    else if (Regexp::match(&mr, S, U"(%c+?) (%C+) *-> *(%c*)")) Right arrow notation with generator8.2
    else if (Regexp::match(&mr, S, U"(%c+?) *-> *(%c*)"))       Right arrow notation without generator8.3;

    if (Regexp::match(&mr, S,      U"(%C+?) (%d)"))             Tree number as argument8.4
    else if (Regexp::match(&mr, S, U"(%C+?) (%d):(%c*)"))       Tree number and package as arguments8.5
    else if (Regexp::match(&mr, S, U"(%C+?) (%c+)"))            Package as argument8.6;

    step->step_stage = ParsingPipelines::parse_stage(S);
    Make consistency checks8.7;

    Regexp::dispose_of(&mr);
    return step;
}

§8.1. Left arrow notation8.1 =

    if (Str::len(mr.exp[1]) > 0) {
        step->step_argument = ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown);
        if (step->step_argument == NULL) return NULL;
    } else {
        PipelineErrors::syntax(tfp, syntax, "no source to right of arrow");
        return NULL;
    }
    Str::copy(S, mr.exp[0]);
    left_arrow_used = TRUE;

§8.2. Right arrow notation with generator8.2 =

    code_generator *cgt = Generators::find(mr.exp[1]);
    if (cgt == NULL) {
        PipelineErrors::syntax_with(tfp, syntax,
            "no such code generation format as '%S'", mr.exp[1]);
        return NULL;
    } else {
        step->generator_argument = cgt;
    }
    step->step_argument = ParsingPipelines::text_arg(mr.exp[2], D, tfp, syntax, allow_unknown);
    if (step->step_argument == NULL) return NULL;
    Str::copy(S, mr.exp[0]);

§8.3. Right arrow notation without generator8.3 =

    step->generator_argument = NULL;
    step->take_generator_argument_from_VM = TRUE;
    step->step_argument = ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown);
    if (step->step_argument == NULL) return NULL;
    Str::copy(S, mr.exp[0]);

§8.4. Tree number as argument8.4 =

    step->tree_argument = Str::atoi(mr.exp[1], 0);
    Str::copy(S, mr.exp[0]);

§8.5. Tree number and package as arguments8.5 =

    step->tree_argument = Str::atoi(mr.exp[1], 0);
    if (Str::len(mr.exp[2]) > 0) {
        step->package_URL_argument =
            ParsingPipelines::text_arg(mr.exp[2], D, tfp, syntax, allow_unknown);
        if (step->package_URL_argument == NULL) return NULL;
    }
    Str::copy(S, mr.exp[0]);

§8.6. Package as argument8.6 =

    step->package_URL_argument =
        ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown);
    if (step->package_URL_argument == NULL) return NULL;
    Str::copy(S, mr.exp[0]);

§8.7. Make consistency checks8.7 =

    if (step->step_stage == NULL) {
        PipelineErrors::syntax_with(tfp, syntax, "no such stage as '%S'", S);
        return NULL;
    }
    if (step->step_stage->takes_tree) {
        if (left_arrow_used == FALSE) {
            PipelineErrors::syntax(tfp, syntax,
                "this stage should take a left arrow and a source");
            return NULL;
        }
    } else {
        if (left_arrow_used) {
            PipelineErrors::syntax(tfp, syntax,
                "this stage should not take a left arrow and a source");
            return NULL;
        }
    }

§9. A textual argument beginning with an asterisk means "expand to the value of this variable", which is required to exist unless allow_unknown is set. If it is, then an empty text results as the argument.

text_stream *ParsingPipelines::text_arg(text_stream *from, dictionary *D,
    text_file_position *tfp, text_stream *syntax, int allow_unknown) {
    if (Str::get_first_char(from) == '*') {
        text_stream *find = Dictionaries::get_text(D, from);
        if (find) return Str::duplicate(find);
        if (allow_unknown == FALSE) {
            PipelineErrors::syntax_with(tfp, syntax,
                "no such pipeline variable as '%S'", from);
        } else {
            return I"";
        }
    }
    return Str::duplicate(from);
}

§10. Stages. Stages are a fixed set within this compiler: there's no way for a pipeline file to specify a new one.

pipeline_stage *ParsingPipelines::parse_stage(text_stream *from) {
    static int stages_made = FALSE;
    if (stages_made == FALSE) {
        stages_made = TRUE;
        SimpleStages::create_pipeline_stages();
        CodeGen::create_pipeline_stage();
        NewStage::create_pipeline_stage();
        LoadBinaryKitsStage::create_pipeline_stage();
        CompileSplatsStage::create_pipeline_stage();
        DetectIndirectCallsStage::create_pipeline_stage();
        EliminateRedundantMatterStage::create_pipeline_stage();
        ShortenWiringStage::create_pipeline_stage();
        EliminateRedundantLabelsStage::create_pipeline_stage();
        EliminateRedundantOperationsStage::create_pipeline_stage();
        MakeSynopticModuleStage::create_pipeline_stage();
        ParsingStages::create_pipeline_stage();
        ResolveConditionalsStage::create_pipeline_stage();
        ReconcileVerbsStage::create_pipeline_stage();
        MakeIdentifiersUniqueStage::create_pipeline_stage();
        IndexStage::create_pipeline_stage();
    }
    pipeline_stage *stage;
    LOOP_OVER(stage, pipeline_stage)
        if (Str::eq(from, stage->stage_name))
            return stage;
    return NULL;
}

§11. Starting a variables dictionary. Note that the above ways to create a pipeline all expect a dictionary of variable names and their values to exist. These dictionaries are typically very small, and by convention the main variable is *out, the leafname to write output to. So the following utility is convenient for getting started.

dictionary *ParsingPipelines::basic_dictionary(text_stream *leafname) {
    dictionary *D = Dictionaries::new(16, TRUE);
    if (Str::len(leafname) > 0) Str::copy(Dictionaries::create_text(D, I"*out"), leafname);
    Str::copy(Dictionaries::create_text(D, I"*log"), I"*log");
    return D;
}

§12. Back to text. Here we write a textual description to a string, which is useful for logging:

void ParsingPipelines::write_step(OUTPUT_STREAM, pipeline_step *step) {
    WRITE("%S", step->step_stage->stage_name);
    if (step->step_stage->stage_arg != NO_STAGE_ARG) {
        if (step->tree_argument > 0) {
            WRITE(" %d", step->tree_argument);
            if (Str::len(step->package_URL_argument) > 0)
                WRITE(":%S", step->package_URL_argument);
        } else {
            if (Str::len(step->package_URL_argument) > 0)
                WRITE(" %S", step->package_URL_argument);
        }
        if (step->step_stage->takes_tree)
            WRITE(" <- %S", step->step_argument);
        if (step->generator_argument)
            WRITE(" %S -> %S",
                step->generator_argument->generator_name, step->step_argument);
    }
}