Building an inter schema, a form of annotated syntax tree.
§1. Introduction. See What This Module Does for an introduction to what an Inter schema is. For this section of code, it's sufficient to know that an inter_schema is an annotated syntax tree made up of inter_schema_node nodes.
Relatively few inter schemas are generated during Inform's run — typically a few hundred — so they do not need to be stored compactly or compiled quickly.
classdef inter_schema {
struct text_stream *converted_from; /* a copy of the source notation */
struct inter_schema_node *node_tree; /* the structure */
int mid_case; /* does this seem to be used inside a switch case? */
int dereference_mode; /* emit from this in dereference-pointers mode */
struct linked_list *parsing_errors; /* of schema_parsing_error */
struct text_provenance provenance;
}
- The structure inter_schema is accessed in 2/pis, 2/tkn, 2/rmf, 2/eis, 2/i6se and here.
inter_schema *InterSchemas::new(text_stream *source, text_provenance provenance) { inter_schema *sch = CREATE(inter_schema); sch->converted_from = Str::duplicate(source); sch->node_tree = NULL; sch->mid_case = FALSE; sch->dereference_mode = FALSE; sch->parsing_errors = NULL; sch->provenance = provenance; return sch; }
§3. Each node has one of the following *_ISNT types. (For a while eliminated nodes
were retained in the tree but given the no-op type OH_NO_IT_ISNT, but unfortunately
we now more efficiently remove them.)
enumerate EXPRESSION_ISNT 1
enumerate CODE_ISNT
enumerate EVAL_ISNT
enumerate STATEMENT_ISNT
enumerate OPERATION_ISNT
enumerate SUBEXPRESSION_ISNT
enumerate DIRECTIVE_ISNT
enumerate ASSEMBLY_ISNT
enumerate LABEL_ISNT
enumerate CALL_ISNT
enumerate MESSAGE_ISNT
enumerate CALLMESSAGE_ISNT
classdef inter_schema_node { struct inter_schema *parent_schema; struct inter_schema_node *parent_node; struct inter_schema_node *child_node; struct inter_schema_node *next_node; int node_marked; /* used fleetingly during traverses */ int isn_type; /* one of the*_ISNTvalues */ inter_ti isn_clarifier; /* forSTATEMENT_ISNTandOPERATION_ISNTonly */ int dir_clarifier; /* forDIRECTIVE_ISNTonly */ struct inter_schema_token *expression_tokens; /* forEXPRESSION_ISNTonly */ int semicolon_terminated; /* forEXPRESSION_ISNTonly */ int unclosed; /* forCODE_ISNTonly */ int unopened; /* forCODE_ISNTonly */ int blocked_by_conditional; /* used in code generation */ struct text_provenance provenance; /* used for error reporting */ }
- The structure inter_schema_node is accessed in 2/rmf, 2/eis, 2/i6se and here.
inter_schema_node *InterSchemas::new_node(inter_schema *sch, int isnt, inter_schema_token *near_here) { inter_schema_node *isn = CREATE(inter_schema_node); isn->parent_schema = sch; isn->parent_node = NULL; isn->child_node = NULL; isn->next_node = NULL; isn->node_marked = FALSE; isn->isn_type = isnt; isn->expression_tokens = NULL; isn->isn_clarifier = 0; isn->dir_clarifier = -1; isn->semicolon_terminated = FALSE; isn->unclosed = FALSE; isn->unopened = FALSE; isn->blocked_by_conditional = FALSE; isn->provenance = (sch)?(sch->provenance):(Provenance::nowhere()); if (near_here) Provenance::advance_line(&(isn->provenance), near_here->line_offset); return isn; } inter_schema_node *InterSchemas::new_node_near_node(inter_schema *sch, int isnt, inter_schema_node *near_here) { inter_schema_node *isn = InterSchemas::new_node(sch, isnt, NULL); if (near_here) isn->provenance = near_here->provenance; return isn; }
§5. Ordinarily, a CODE_ISNT node represents a complete block of I6 code.
For example, in
if (x == 1) { print "Hello!"; }
the print statement occurs inside a complete block, which will eventually
be represented as a CODE_ISNT. But some inline phrase definitions leave
blocks half-open. For example,
if (x == 1)
is a legal phrase definition: we read it as
if (x == 1) {
and the schema has to contain a CODE_ISNT marked as having been left
unclosed. Similarly, some definitions close an I6 block which is assumed
as having been opened by an earlier phrase invocation.
Here we mark a code node as being unclosed. Clearly, if a node is unclosed then any parent code nodes of it must also be unclosed, so we recurse upwards.
void InterSchemas::mark_unclosed(inter_schema_node *isn) { while (isn) { if (isn->isn_type == CODE_ISNT) isn->unclosed = TRUE; isn = isn->parent_node; } }
§6. The situation with unopened nodes is different: we must ensure that a single code node is left unopened, even if we have to create a code node at the top of the tree to serve that role.
void InterSchemas::mark_unopened(inter_schema_node *isn) { while (isn) { if (isn->isn_type == CODE_ISNT) { isn->unopened = TRUE; return; } if (isn->parent_node == NULL) { inter_schema *sch = isn->parent_schema; inter_schema_node *top = sch->node_tree; inter_schema_node *code_isn = InterSchemas::new_node(isn->parent_schema, CODE_ISNT, NULL); code_isn->child_node = top; sch->node_tree = code_isn; for (inter_schema_node *n = top; n; n = n->next_node) n->parent_node = code_isn; code_isn->unopened = TRUE; return; } isn = isn->parent_node; } }
§7. A further complication is that some schemas arise from definitions used only in the middle of switch statements; they contain neither the start nor the end of the construct, so the above mechanisms are not sufficient. For example, the inline definition
{X}:
implies, by use of the colon, that it's a switch case. We can't conveniently associate this with any single node, so we mark the schema as a whole.
void InterSchemas::mark_case_closed(inter_schema_node *isn) { if (isn) isn->parent_schema->mid_case = TRUE; }
§8. EXPRESSION_ISNT nodes carry a linked list of tokens with them. When we
begin compiling a schema, we form a single expression node with a long list
of tokens: gradually this is transformed into a tree of nodes with more,
but much shorter, expression nodes, and in the process most of the token
types are eliminated. The following types are present only during the
compilation process, and never survive into the final schema:
enumerate RAW_ISTT 1 /* something unidentified as yet */
enumerate WHITE_SPACE_ISTT /* a stretch of white space */
enumerate RESERVED_ISTT /* am I6 reserved word such as while */
enumerate OPERATOR_ISTT /* an I6 operator such as-->or+*/
enumerate DIVIDER_ISTT /* a semicolon used to divide I6 statements */
enumerate OPEN_ROUND_ISTT /* open round bracket */
enumerate CLOSE_ROUND_ISTT /* close round bracket */
enumerate OPEN_BRACE_ISTT /* open brace bracket */
enumerate CLOSE_BRACE_ISTT /* close brace bracket */
enumerate COMMA_ISTT /* comma */
enumerate COLON_ISTT /* colon */
enumerate DCOLON_ISTT /* double-colon */
enumerate IDENTIFIER_ISTT /* an I6 identifier such as my_var12 */
enumerate OPCODE_ISTT /* an Inform assembly language opcode such as @pull */
enumerate DIRECTIVE_ISTT /* an Inform compiler directive such as #iftrue */
enumerate NUMBER_ISTT /* a constant number */
enumerate BIN_NUMBER_ISTT /* a constant number */
enumerate HEX_NUMBER_ISTT /* a constant number */
enumerate REAL_NUMBER_ISTT /* a constant number */
enumerate DQUOTED_ISTT /* a constant piece of text "like this" */
enumerate SQUOTED_ISTT /* a constant piece of text such as 'x' */
enumerate I7_ISTT /* I7 material in (+ ... +) notation */
enumerate INLINE_ISTT /* an inline command such as {-my:1} */
enumerate ASM_ARROW_ISTT /* the arrow sign -> used in assembly language only */
enumerate ASM_SP_ISTT /* the stack pointer pseudo-variable sp */
enumerate ASM_LABEL_ISTT /* the label sign ? used in assembly language only */
enumerate ASM_NEGATED_LABEL_ISTT /* the label sign ?~ used in assembly language only */
classdef inter_schema_token { struct inter_schema_node *owner; /* these form a linked list attached to the owner node */ struct inter_schema_token *next; int ist_type; /* one of the*_ISTTvalues above */ struct text_stream *material; /* textual form of token */ inter_ti operation_primitive; /*OPERATOR_ISTTonly: e.g.PLUS_BIPfor+*/ int reserved_word; /*RESERVED_ISTTonly: which one */ int constant_number; /*NUMBER_ISTTonly: if non-negative, value of number */ struct inter_name *as_quoted; /*IDENTIFIER_ISTTonly: the identified symbol if known */ int inline_command; /*INLINE_ISTTonly: one of the*_ISINCvalues */ int inline_modifiers; int inline_subcommand; /*INLINE_ISTTonly: one of the*_ISINSCvalues */ struct text_stream *bracing; struct text_stream *command; struct text_stream *operand; struct text_stream *operand2; #ifdef CORE_MODULE struct property *extremal_property; int extremal_property_sign; #endif int preinsert; /* fleeting markers only */ int postinsert; int line_offset; /* counting lines for error message use */ }
- The structure inter_schema_token is accessed in 2/tkn, 2/rmf, 2/eis, 2/if, 2/i6a and here.
inter_schema_token *InterSchemas::new_token(int type, text_stream *material, inter_ti operation_primitive, int reserved_word, int n, int offset) { inter_schema_token *t = CREATE(inter_schema_token); t->ist_type = type; t->material = Str::duplicate(material); t->bracing = NULL; t->command = NULL; t->operand = NULL; t->operand2 = NULL; #ifdef CORE_MODULE t->extremal_property = NULL; /* that is, none given */ t->extremal_property_sign = MEASURE_T_EXACTLY; /* that is, none given */ #endif t->inline_command = no_ISINC; t->inline_subcommand = no_ISINSC; t->next = NULL; t->owner = NULL; t->operation_primitive = operation_primitive; t->as_quoted = NULL; t->reserved_word = reserved_word; t->constant_number = n; t->preinsert = FALSE; t->postinsert = FALSE; t->line_offset = offset; return t; }
enumerate IF_I6RW 1
enumerate ELSE_I6RW
enumerate STYLE_I6RW
enumerate RETURN_I6RW
enumerate RTRUE_I6RW
enumerate RFALSE_I6RW
enumerate FOR_I6RW
enumerate OBJECTLOOP_I6RW
enumerate WHILE_I6RW
enumerate DO_I6RW
enumerate UNTIL_I6RW
enumerate PRINT_I6RW
enumerate PRINTRET_I6RW
enumerate NEWLINE_I6RW
enumerate GIVE_I6RW
enumerate MOVE_I6RW
enumerate REMOVE_I6RW
enumerate JUMP_I6RW
enumerate SWITCH_I6RW
enumerate DEFAULT_I6RW
enumerate FONT_I6RW
enumerate BREAK_I6RW
enumerate CONTINUE_I6RW
enumerate QUIT_I6RW
enumerate RESTORE_I6RW
enumerate SPACES_I6RW
enumerate READ_I6RW
enumerate INVERSION_I6RW
enumerate IFDEF_I6RW
enumerate IFNDEF_I6RW
enumerate IFTRUE_I6RW
enumerate IFFALSE_I6RW
enumerate IFNOT_I6RW
enumerate ENDIF_I6RW
enumerate ORIGSOURCE_I6RW
enumerate no_ISINC 1
enumerate primitive_definition_ISINC
enumerate new_ISINC
enumerate new_list_of_ISINC
enumerate printing_routine_ISINC
enumerate next_routine_ISINC
enumerate previous_routine_ISINC
enumerate ranger_routine_ISINC
enumerate indexing_routine_ISINC
enumerate strong_kind_ISINC
enumerate weak_kind_ISINC
enumerate object_kind_ISINC
enumerate backspace_ISINC
enumerate erase_ISINC
enumerate open_brace_ISINC
enumerate close_brace_ISINC
enumerate label_ISINC
enumerate counter_ISINC
enumerate counter_storage_ISINC
enumerate counter_up_ISINC
enumerate counter_down_ISINC
enumerate counter_makes_array_ISINC
enumerate by_reference_ISINC
enumerate by_reference_blank_out_ISINC
enumerate reference_exists_ISINC
enumerate lvalue_by_reference_ISINC
enumerate by_value_ISINC
enumerate make_true_ISINC
enumerate box_quotation_text_ISINC
enumerate try_action_ISINC
enumerate try_action_silently_ISINC
enumerate return_value_ISINC
enumerate return_value_from_rule_ISINC
enumerate property_holds_block_value_ISINC
enumerate mark_event_used_ISINC
enumerate rtp_code_ISINC
enumerate rtp_location_ISINC
enumerate my_ISINC
enumerate unprotect_ISINC
enumerate copy_ISINC
enumerate initialise_ISINC
enumerate match_right_relation_domain_ISINC
enumerate match_left_relation_domain_ISINC
enumerate matches_description_ISINC
enumerate now_matches_description_ISINC
enumerate arithmetic_operation_ISINC
enumerate say_ISINC
enumerate show_me_ISINC
enumerate segment_count_ISINC
enumerate final_segment_marker_ISINC
enumerate list_together_ISINC
enumerate rescale_ISINC
enumerate unknown_ISINC
enumerate substitute_ISINC
enumerate combine_ISINC
enumerate no_ISINSC 1
enumerate unarticled_ISINSC
enumerate articled_ISINSC
enumerate repeat_through_ISINSC
enumerate repeat_through_list_ISINSC
enumerate number_of_ISINSC
enumerate random_of_ISINSC
enumerate total_of_ISINSC
enumerate extremal_ISINSC
enumerate function_application_ISINSC
enumerate description_application_ISINSC
enumerate solve_equation_ISINSC
enumerate switch_ISINSC
enumerate break_ISINSC
enumerate verbose_checking_ISINSC
void InterSchemas::add_token(inter_schema *sch, inter_schema_token *t) { if (sch->node_tree == NULL) sch->node_tree = InterSchemas::new_node(sch, EXPRESSION_ISNT, t); InterSchemas::add_token_to_node(sch->node_tree, t); } void InterSchemas::add_token_to_node(inter_schema_node *isn, inter_schema_token *t) { if (isn->expression_tokens == NULL) isn->expression_tokens = t; else { inter_schema_token *p = isn->expression_tokens; while ((p) && (p->next)) p = p->next; p->next = t; } t->owner = isn; } void InterSchemas::add_token_after(inter_schema_token *t, inter_schema_token *existing) { if (existing == NULL) internal_error("can't add after null element"); inter_schema_token *was = existing->next; existing->next = t; t->next = was; t->owner = existing->owner; }
void InterSchemas::changed_tokens_on(inter_schema_node *isn) { if (isn) for (inter_schema_token *l = isn->expression_tokens; l; l=l->next) l->owner = isn; }
int InterSchemas::opening_reserved_word(inter_schema_node *node) { inter_schema_token *f = InterSchemas::first_dark_token(node); if ((f) && (f->ist_type == RESERVED_ISTT)) return f->reserved_word; return 0; } int InterSchemas::opening_directive_word(inter_schema_node *node) { inter_schema_token *f = InterSchemas::first_dark_token(node); if ((f) && (f->ist_type == DIRECTIVE_ISTT)) return f->reserved_word; return 0; } inter_schema_token *InterSchemas::first_dark_token(inter_schema_node *node) { if (node == NULL) return NULL; inter_schema_token *n = node->expression_tokens; while ((n) && (n->ist_type == WHITE_SPACE_ISTT)) n = n->next; return n; } inter_schema_token *InterSchemas::next_dark_token(inter_schema_token *t) { if (t == NULL) return NULL; inter_schema_token *n = t->next; while ((n) && (n->ist_type == WHITE_SPACE_ISTT)) n = n->next; return n; } inter_schema_token *InterSchemas::second_dark_token(inter_schema_node *node) { return InterSchemas::next_dark_token(InterSchemas::first_dark_token(node)); }
§17. Logging. It is invaluable to be able to see compiled schemas in the debugging log, so we go to some trouble here.
void InterSchemas::log(OUTPUT_STREAM, void *vis) { inter_schema *sch = (inter_schema *) vis; if (sch == NULL) LOG("<null schema>\n"); else if (sch->node_tree == NULL) LOG("<schema without nodes>\n"); else InterSchemas::log_depth(sch->node_tree, 0); } void InterSchemas::log_depth(inter_schema_node *isn, int depth) { for (; isn; isn=isn->next_node) InterSchemas::log_just(isn, depth); } void InterSchemas::log_just(inter_schema_node *isn, int depth) { if (Provenance::is_somewhere(isn->provenance)) { LOG("%04d ", Provenance::get_line(isn->provenance)); } else { LOG(".... "); } if (isn->blocked_by_conditional) LOG("XX"); else LOG(" "); for (int d = 0; d < depth; d++) LOG(" "); switch (isn->isn_type) { case STATEMENT_ISNT: LOG("* (statement) %S\n", Primitives::BIP_to_name(isn->isn_clarifier)); break; case OPERATION_ISNT: LOG("* (operation) %S\n", Primitives::BIP_to_name(isn->isn_clarifier)); break; case CODE_ISNT: LOG("* (code)"); if (isn->unclosed) LOG(" <"); if (isn->unopened) LOG(" >"); LOG("\n"); break; case EVAL_ISNT: LOG("* (eval)\n"); break; case ASSEMBLY_ISNT: LOG("* (assembly)\n"); break; case DIRECTIVE_ISNT: LOG("* (directive) "); switch(isn->dir_clarifier) { case IFDEF_I6RW: LOG("#ifdef"); break; case IFNDEF_I6RW: LOG("#ifndef"); break; case IFTRUE_I6RW: LOG("#iftrue"); break; case IFFALSE_I6RW: LOG("#iffalse"); break; case IFNOT_I6RW: LOG("#ifnot"); break; case ENDIF_I6RW: LOG("#endif"); break; case ORIGSOURCE_I6RW: LOG("#origsource"); break; default: LOG("<unknown>"); break; } LOG("\n"); break; case LABEL_ISNT: LOG("* (label)\n"); break; case CALL_ISNT: LOG("* (call)\n"); break; case MESSAGE_ISNT: LOG("* (message)\n"); break; case CALLMESSAGE_ISNT: LOG("* (call-message)\n"); break; case SUBEXPRESSION_ISNT: LOG("* (subexpression)\n"); break; case EXPRESSION_ISNT: LOG("* (expr)"); if (isn->semicolon_terminated) LOG(" ;"); if (isn->unclosed) LOG(" <"); if (isn->unopened) LOG(" >"); if (isn->expression_tokens == NULL) LOG(" - empty"); LOG("\n"); for (inter_schema_token *t = isn->expression_tokens; t; t=t->next) { if (Provenance::is_somewhere(isn->provenance)) { LOG("%04d ", Provenance::get_line(isn->provenance)); } else { LOG(".... "); } for (int d = 0; d < depth + 1; d++) LOG(" "); InterSchemas::log_ist(t); if (isn != t->owner) LOG(" !!! ownership incorrect here"); LOG("\n"); } break; } InterSchemas::log_depth(isn->child_node, depth+1); } void InterSchemas::log_ist(inter_schema_token *t) { if (t == NULL) { LOG("<NULL-IST>"); return; } switch (t->ist_type) { case RAW_ISTT: LOG("RAW "); break; case OPERATOR_ISTT: LOG("OPERATOR "); break; case OPCODE_ISTT: LOG("OPCODE "); break; case DIRECTIVE_ISTT: LOG("DIRECTIVE "); break; case IDENTIFIER_ISTT: LOG("IDENTIFIER "); break; case RESERVED_ISTT: LOG("RESERVED "); break; case NUMBER_ISTT: LOG("NUMBER "); break; case BIN_NUMBER_ISTT: LOG("BIN_NUMBER "); break; case HEX_NUMBER_ISTT: LOG("HEX_NUMBER "); break; case REAL_NUMBER_ISTT: LOG("REAL_NUMBER "); break; case DQUOTED_ISTT: LOG("DQUOTED "); break; case SQUOTED_ISTT: LOG("SQUOTED "); break; case WHITE_SPACE_ISTT: LOG("WHITE_SPACE "); break; case DIVIDER_ISTT: LOG("DIVIDER "); break; case OPEN_ROUND_ISTT: LOG("OPEN_ROUND "); break; case CLOSE_ROUND_ISTT: LOG("CLOSE_ROUND "); break; case OPEN_BRACE_ISTT: LOG("OPEN_BRACE "); break; case CLOSE_BRACE_ISTT: LOG("CLOSE_BRACE "); break; case COMMA_ISTT: LOG("COMMA "); break; case COLON_ISTT: LOG("COLON "); break; case DCOLON_ISTT: LOG("DCOLON "); break; case I7_ISTT: LOG("I7 "); break; case INLINE_ISTT: LOG("INLINE "); break; case ASM_ARROW_ISTT: LOG("ASM_ARROW "); break; case ASM_SP_ISTT: LOG("ASM_SP "); break; case ASM_LABEL_ISTT: LOG("ASM_LABEL "); break; case ASM_NEGATED_LABEL_ISTT: LOG("NEGASM_LABEL "); break; default: LOG("<unknown>"); break; } LOG("%S", t->material); if (t->inline_modifiers & GIVE_KIND_ID_ISSBM) LOG(" GIVE_KIND_ID"); if (t->inline_modifiers & GIVE_COMPARISON_ROUTINE_ISSBM) LOG(" GIVE_COMPARISON_ROUTINE"); if (t->inline_modifiers & DEREFERENCE_PROPERTY_ISSBM) LOG(" DEREFERENCE_PROPERTY"); if (t->inline_modifiers & ADOPT_LOCAL_STACK_FRAME_ISSBM) LOG(" ADOPT_LOCAL_STACK_FRAME"); if (t->inline_modifiers & CAST_TO_KIND_OF_OTHER_TERM_ISSBM) LOG(" CAST_TO_KIND_OF_OTHER_TERM"); if (t->inline_modifiers & BY_REFERENCE_ISSBM) LOG(" BY_REFERENCE"); }
§18. Lint. As can be seen, the inter_schema structure is quite complicated, and there
are numerous invariants it has to satisfy. As a precaution, then, we check that
all of these invariants hold before shipping out a compiled schema. This is
where the check is done:
void InterSchemas::lint(inter_schema *sch) { if ((sch) && (sch->parsing_errors == NULL)) { text_stream *err = InterSchemas::lint_isn(sch->node_tree, 0); if (err) { LOG("Lint fail: %S\n$1\n", err, sch); WRITE_TO(STDERR, "Lint fail: %S\n%S\n", err, sch->converted_from); internal_error("inter schema failed lint"); } } } text_stream *InterSchemas::lint_isn(inter_schema_node *isn, int depth) { for (; isn; isn=isn->next_node) { if (isn->isn_type == EXPRESSION_ISNT) { int asm = FALSE; for (inter_schema_token *t = isn->expression_tokens; t; t=t->next) { switch (t->ist_type) { case OPCODE_ISTT: if (t == isn->expression_tokens) asm = TRUE; else return I"contains loose assembler"; break; case RAW_ISTT: return I"contains raw node"; case OPEN_BRACE_ISTT: return I"contains open-brace node"; case CLOSE_BRACE_ISTT: return I"contains close-brace node"; case OPEN_ROUND_ISTT: return I"contains open-round node"; case CLOSE_ROUND_ISTT: return I"contains close-round node"; case COMMA_ISTT: return I"contains comma node"; case DIVIDER_ISTT: return I"contains divider node"; case RESERVED_ISTT: return I"contains reserved word node"; case COLON_ISTT: return I"contains colon node"; case DCOLON_ISTT: return I"contains double-colon node"; case OPERATOR_ISTT: return I"contains operator node"; } if ((t->ist_type == INLINE_ISTT) && (t->inline_command == open_brace_ISINC)) return I"contains manual open-brace"; if ((t->ist_type == INLINE_ISTT) && (t->inline_command == close_brace_ISINC)) return I"contains manual close-brace"; if ((t->ist_type == NUMBER_ISTT) && (t->next) && (t->next->ist_type == NUMBER_ISTT) && (asm == FALSE)) return I"two consecutive numbers"; if (isn != t->owner) return I"ownership linkage broken"; } if (isn->child_node) return I"expression has child nodes"; } else { if (isn->expression_tokens) return I"non-expression has elements"; } if ((isn->child_node) && (isn->child_node->parent_node != isn)) return I"child-parent linkage broken"; if ((isn->isn_clarifier) && (isn->expression_tokens)) return I"ambiguous is-node"; text_stream *R = InterSchemas::lint_isn(isn->child_node, depth+1); if (R) return R; } return NULL; }