To parse pipelines from text files.
- §1. How pipelines are stored
- §5. Parsing
- §10. Stages
- §11. Starting a variables dictionary
- §12. Back to text
§1. How pipelines are stored. An inter_pipeline is a linked list of //pipeline_step//s, together with some associated storage used when it runs: this is for storing variables and pointers to trees being worked on.
typedef struct inter_pipeline { struct linked_list *steps; of pipeline_step struct dictionary *variables; int erroneous; a syntax error occurred when parsing this struct pipeline_ephemera ephemera; temporary storage when running struct linked_list *search_list; used when parsing only struct pathname *local; int run_depth; CLASS_DEFINITION } inter_pipeline; inter_pipeline *ParsingPipelines::new_pipeline(dictionary *D, linked_list *L, pathname *local) { inter_pipeline *S = CREATE(inter_pipeline); S->variables = D; S->steps = NEW_LINKED_LIST(pipeline_step); S->erroneous = FALSE; S->search_list = L; S->local = local; S->run_depth = 0; RunningPipelines::clean_pipeline(S); return S; }
- The structure inter_pipeline is accessed in 2/rp, 2/pe, 2/ns, 2/rmss, 3/ps, 3/rccs, 3/css, 4/lbks, 4/sws, 4/dics, 4/mius, 4/rvs, 5/msms, 6/erms, 6/erls, 6/eros and here.
§2. A pipeline_step is really only a choice of pipeline_stage, but comes along with a wide variety of options and parameter settings, so that it looks much more complicated than it actually is.
typedef struct pipeline_step { struct inter_pipeline *pipeline; struct pipeline_stage *step_stage; struct text_stream *step_argument; struct code_generator *generator_argument; int take_generator_argument_from_VM; struct text_stream *package_URL_argument; int tree_argument; struct pipeline_step_ephemera ephemera; temporary storage when running CLASS_DEFINITION } pipeline_step; pipeline_step *ParsingPipelines::new_step(inter_pipeline *pipeline) { pipeline_step *step = CREATE(pipeline_step); step->pipeline = pipeline; step->step_stage = NULL; step->step_argument = NULL; step->package_URL_argument = NULL; step->tree_argument = 0; step->generator_argument = NULL; step->take_generator_argument_from_VM = FALSE; RunningPipelines::clean_step(step); return step; }
- The structure pipeline_step is accessed in 2/rp, 2/pe, 2/ns, 2/rmss, 3/ps, 3/rccs, 3/css, 4/lbks, 4/sws, 4/dics, 4/mius, 4/rvs, 5/msms, 6/erms, 6/erls, 6/eros and here.
§3. And a pipeline_stage is simply a choice of what to do. For example, eliminate-redundant-labels is a pipeline stage. This would need to be combined with details of what tree to apply to in order to become a step.
enum NO_STAGE_ARG from 1 enum GENERAL_STAGE_ARG enum FILE_STAGE_ARG enum TEXT_OUT_STAGE_ARG enum OPTIONAL_TEXT_OUT_STAGE_ARG enum EXT_FILE_STAGE_ARG enum EXT_TEXT_OUT_STAGE_ARG enum KIT_STAGE_ARG
typedef struct pipeline_stage { struct text_stream *stage_name; int (*execute)(void *); int stage_arg; one of the *_ARG values above int takes_tree; CLASS_DEFINITION } pipeline_stage; pipeline_stage *ParsingPipelines::new_stage(text_stream *name, int (*X)(struct pipeline_step *), int arg, int tr) { pipeline_stage *stage = CREATE(pipeline_stage); stage->stage_name = Str::duplicate(name); stage->execute = (int (*)(void *)) X; stage->stage_arg = arg; stage->takes_tree = tr; return stage; }
- The structure pipeline_stage is accessed in 2/rp and here.
§4. Lumping some of those argument types together:
int ParsingPipelines::will_read_a_file(pipeline_step *step) { if ((step->step_stage->stage_arg == FILE_STAGE_ARG) || (step->step_stage->stage_arg == EXT_FILE_STAGE_ARG)) return TRUE; return FALSE; } int ParsingPipelines::will_write_a_file(pipeline_step *step) { if ((step->step_stage->stage_arg == TEXT_OUT_STAGE_ARG) || (step->step_stage->stage_arg == OPTIONAL_TEXT_OUT_STAGE_ARG) || (step->step_stage->stage_arg == EXT_TEXT_OUT_STAGE_ARG)) return TRUE; return FALSE; }
§5. Parsing. All pipelines originate as textual descriptions, either from a text file or supplied on the command line. Here, we turn such a description — in effect a program for a very simple programming language — into an inter_pipeline.
inter_pipeline *ParsingPipelines::from_file(filename *F, dictionary *D, linked_list *search_list) { inter_pipeline *S = ParsingPipelines::new_pipeline(D, search_list, Filenames::up(F)); TextFiles::read(F, FALSE, "can't open inter pipeline file", TRUE, ParsingPipelines::scan_line, NULL, (void *) S); if (S->erroneous) return NULL; return S; } void ParsingPipelines::scan_line(text_stream *line, text_file_position *tfp, void *X) { inter_pipeline *S = (inter_pipeline *) X; ParsingPipelines::parse_line(S, line, tfp); } inter_pipeline *ParsingPipelines::from_text(text_stream *instructions, dictionary *D) { inter_pipeline *S = ParsingPipelines::new_pipeline(D, NULL, NULL); ParsingPipelines::parse_line(S, instructions, NULL); if (S->erroneous) return NULL; return S; }
§6. Either way, then, a sequence of 1 or more textual lines of description is passed to the following. It breaks down the line into 1 or more instructions, divided by commas.
void ParsingPipelines::parse_line(inter_pipeline *pipeline, text_stream *instructions, text_file_position *tfp) { TEMPORARY_TEXT(T) LOOP_THROUGH_TEXT(P, instructions) if (Characters::is_babel_whitespace(Str::get(P))) PUT_TO(T, ' '); else PUT_TO(T, Str::get(P)); match_results mr = Regexp::create_mr(); while (Regexp::match(&mr, T, U" *(%c+?) *,+ *(%c*?) *")) { ParsingPipelines::parse_instruction(pipeline, mr.exp[0], tfp); Str::copy(T, mr.exp[1]); } if (Regexp::match(&mr, T, U" *(%c+?) *")) ParsingPipelines::parse_instruction(pipeline, mr.exp[0], tfp); Regexp::dispose_of(&mr); DISCARD_TEXT(T) }
§7. Instructions are mostly steps, but:
- (a) A line beginning with an ! is a comment,
- (b) run pipeline X means to incorporate pipeline X here.
void ParsingPipelines::parse_instruction(inter_pipeline *pipeline, text_stream *T, text_file_position *tfp) { match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, T, U"!%c*")) { ; } else if (Regexp::match(&mr, T, U"run pipeline (%c*)")) { filename *F = NULL; #ifdef SUPERVISOR_MODULE F = InterSkill::filename_of_pipeline(mr.exp[0], pipeline->search_list); #endif if (F == NULL) { text_stream *leafname = Str::new(); WRITE_TO(leafname, "%S.interpipeline", mr.exp[0]); F = Filenames::in(pipeline->local, leafname); } if (F == NULL) { PipelineErrors::syntax_with(tfp, T, "unable to find the pipeline '%S'", mr.exp[0]); pipeline->erroneous = TRUE; } else { if (pipeline->run_depth++ > 100) { PipelineErrors::syntax_with(tfp, T, "pipeline seems to have become circular: '%S'", mr.exp[0]); pipeline->erroneous = TRUE; } else { TextFiles::read(F, FALSE, "can't open inter pipeline file", TRUE, ParsingPipelines::scan_line, NULL, (void *) pipeline); } pipeline->run_depth--; } } else { pipeline_step *ST = ParsingPipelines::parse_step(pipeline, T, tfp); if (ST) ADD_TO_LINKED_LIST(ST, pipeline_step, pipeline->steps); else pipeline->erroneous = TRUE; } Regexp::dispose_of(&mr); }
§8. Finally, an individual textual description S of a step is turned into a pipeline_step.
For documentation on the syntax here, see Pipelines and Stages (in inter).
pipeline_step *ParsingPipelines::parse_step(inter_pipeline *pipeline, text_stream *S, text_file_position *tfp) { dictionary *D = pipeline->variables; pipeline_step *step = ParsingPipelines::new_step(pipeline); text_stream *syntax = Str::duplicate(S); match_results mr = Regexp::create_mr(); int allow_unknown = FALSE; if (Regexp::match(&mr, S, U"optionally-%c+")) allow_unknown = TRUE; int left_arrow_used = FALSE; if (Regexp::match(&mr, S, U"(%c+?) *<- *(%c*)")) Left arrow notation8.1 else if (Regexp::match(&mr, S, U"(%c+?) (%C+) *-> *(%c*)")) Right arrow notation with generator8.2 else if (Regexp::match(&mr, S, U"(%c+?) *-> *(%c*)")) Right arrow notation without generator8.3; if (Regexp::match(&mr, S, U"(%C+?) (%d)")) Tree number as argument8.4 else if (Regexp::match(&mr, S, U"(%C+?) (%d):(%c*)")) Tree number and package as arguments8.5 else if (Regexp::match(&mr, S, U"(%C+?) (%c+)")) Package as argument8.6; step->step_stage = ParsingPipelines::parse_stage(S); Make consistency checks8.7; Regexp::dispose_of(&mr); return step; }
§8.1. Left arrow notation8.1 =
if (Str::len(mr.exp[1]) > 0) { step->step_argument = ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown); if (step->step_argument == NULL) return NULL; } else { PipelineErrors::syntax(tfp, syntax, "no source to right of arrow"); return NULL; } Str::copy(S, mr.exp[0]); left_arrow_used = TRUE;
- This code is used in §8.
§8.2. Right arrow notation with generator8.2 =
code_generator *cgt = Generators::find(mr.exp[1]); if (cgt == NULL) { PipelineErrors::syntax_with(tfp, syntax, "no such code generation format as '%S'", mr.exp[1]); return NULL; } else { step->generator_argument = cgt; } step->step_argument = ParsingPipelines::text_arg(mr.exp[2], D, tfp, syntax, allow_unknown); if (step->step_argument == NULL) return NULL; Str::copy(S, mr.exp[0]);
- This code is used in §8.
§8.3. Right arrow notation without generator8.3 =
step->generator_argument = NULL; step->take_generator_argument_from_VM = TRUE; step->step_argument = ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown); if (step->step_argument == NULL) return NULL; Str::copy(S, mr.exp[0]);
- This code is used in §8.
§8.4. Tree number as argument8.4 =
step->tree_argument = Str::atoi(mr.exp[1], 0); Str::copy(S, mr.exp[0]);
- This code is used in §8.
§8.5. Tree number and package as arguments8.5 =
step->tree_argument = Str::atoi(mr.exp[1], 0); if (Str::len(mr.exp[2]) > 0) { step->package_URL_argument = ParsingPipelines::text_arg(mr.exp[2], D, tfp, syntax, allow_unknown); if (step->package_URL_argument == NULL) return NULL; } Str::copy(S, mr.exp[0]);
- This code is used in §8.
§8.6. Package as argument8.6 =
step->package_URL_argument = ParsingPipelines::text_arg(mr.exp[1], D, tfp, syntax, allow_unknown); if (step->package_URL_argument == NULL) return NULL; Str::copy(S, mr.exp[0]);
- This code is used in §8.
§8.7. Make consistency checks8.7 =
if (step->step_stage == NULL) { PipelineErrors::syntax_with(tfp, syntax, "no such stage as '%S'", S); return NULL; } if (step->step_stage->takes_tree) { if (left_arrow_used == FALSE) { PipelineErrors::syntax(tfp, syntax, "this stage should take a left arrow and a source"); return NULL; } } else { if (left_arrow_used) { PipelineErrors::syntax(tfp, syntax, "this stage should not take a left arrow and a source"); return NULL; } }
- This code is used in §8.
§9. A textual argument beginning with an asterisk means "expand to the value of this variable", which is required to exist unless allow_unknown is set. If it is, then an empty text results as the argument.
text_stream *ParsingPipelines::text_arg(text_stream *from, dictionary *D, text_file_position *tfp, text_stream *syntax, int allow_unknown) { if (Str::get_first_char(from) == '*') { text_stream *find = Dictionaries::get_text(D, from); if (find) return Str::duplicate(find); if (allow_unknown == FALSE) { PipelineErrors::syntax_with(tfp, syntax, "no such pipeline variable as '%S'", from); } else { return I""; } } return Str::duplicate(from); }
§10. Stages. Stages are a fixed set within this compiler: there's no way for a pipeline file to specify a new one.
pipeline_stage *ParsingPipelines::parse_stage(text_stream *from) { static int stages_made = FALSE; if (stages_made == FALSE) { stages_made = TRUE; SimpleStages::create_pipeline_stages(); CodeGen::create_pipeline_stage(); NewStage::create_pipeline_stage(); LoadBinaryKitsStage::create_pipeline_stage(); CompileSplatsStage::create_pipeline_stage(); DetectIndirectCallsStage::create_pipeline_stage(); EliminateRedundantMatterStage::create_pipeline_stage(); ShortenWiringStage::create_pipeline_stage(); EliminateRedundantLabelsStage::create_pipeline_stage(); EliminateRedundantOperationsStage::create_pipeline_stage(); MakeSynopticModuleStage::create_pipeline_stage(); ParsingStages::create_pipeline_stage(); ResolveConditionalsStage::create_pipeline_stage(); ReconcileVerbsStage::create_pipeline_stage(); MakeIdentifiersUniqueStage::create_pipeline_stage(); IndexStage::create_pipeline_stage(); } pipeline_stage *stage; LOOP_OVER(stage, pipeline_stage) if (Str::eq(from, stage->stage_name)) return stage; return NULL; }
§11. Starting a variables dictionary. Note that the above ways to create a pipeline all expect a dictionary of variable names and their values to exist. These dictionaries are typically very small, and by convention the main variable is *out, the leafname to write output to. So the following utility is convenient for getting started.
dictionary *ParsingPipelines::basic_dictionary(text_stream *leafname) { dictionary *D = Dictionaries::new(16, TRUE); if (Str::len(leafname) > 0) Str::copy(Dictionaries::create_text(D, I"*out"), leafname); Str::copy(Dictionaries::create_text(D, I"*log"), I"*log"); return D; }
§12. Back to text. Here we write a textual description to a string, which is useful for logging:
void ParsingPipelines::write_step(OUTPUT_STREAM, pipeline_step *step) { WRITE("%S", step->step_stage->stage_name); if (step->step_stage->stage_arg != NO_STAGE_ARG) { if (step->tree_argument > 0) { WRITE(" %d", step->tree_argument); if (Str::len(step->package_URL_argument) > 0) WRITE(":%S", step->package_URL_argument); } else { if (Str::len(step->package_URL_argument) > 0) WRITE(" %S", step->package_URL_argument); } if (step->step_stage->takes_tree) WRITE(" <- %S", step->step_argument); if (step->generator_argument) WRITE(" %S -> %S", step->generator_argument->generator_name, step->step_argument); } }