Reading the rawtext in, breaking it up into blocks, and sending them for output as formatted documentation.


§1. The rawtext files. This reads an entire rawtext volume.

text_stream *Rawtext::process_large_rawtext_file(OUTPUT_STREAM, volume *V) {
    rawtext_helper_state rhs;
    rhs.V = V;
    rhs.OUT = OUT;
    OUT = Rawtext::turn_rawtext_into_blocks(OUT, V, FALSE, V->vol_rawtext_filename, NULL);
    OUT = Renderer::close_formatted_file(OUT);
    return OUT;
}

§2. The other source of rawtext is an Example file. These, however, start with a three-line header containing metadata — we need to skip this before running the rawtext scanner. Examples are rendered as partial files, not as multi-section rawtext volumes.

text_stream *Rawtext::process_example_rawtext_file(OUTPUT_STREAM,
    volume *V, example *E) {
    OUT = Rawtext::turn_rawtext_into_blocks(OUT, V, TRUE, E->ex_filename, E);
    return OUT;
 }

§3. The scanner. And here is the common scanner used for both.

"Rawtext" is the very lightly marked-up form of plain text in which the Inform manuals are written. Perhaps I should have used Markdown or REST, but those formats were less well-known in the early 2000s, so rawtext is its own unique flower.

A rawtext file is divided up into one or more blocks. The first of these can optionally be introduced by a block heading line; any subsequent ones must be. (A block ends when a new heading line appears, or at end of file.)

text_stream *Rawtext::turn_rawtext_into_blocks(OUTPUT_STREAM,
    volume *V, int render_as_partial_file_only, filename *name, example *E) {
    rawtext_helper_state rhs_structure;
    rawtext_helper_state *rhs = &rhs_structure;
    rhs->OUT = OUT;
    rhs->E = E;
    rhs->V = V;
    rhs->skipping_current_block = FALSE;
    rhs->no_blocks_written = 0;
    rhs->this_is_first_block_in_file = TRUE;
    rhs->partial_only = render_as_partial_file_only;

    rhs->no_chapters_read_in_current_rawtext = 0;
    rhs->no_blocks_read_in_current_chapter = 0;
    rhs->no_pars_read_in_current_block = 0;
    rhs->title_of_block_being_read = Str::new();  Untitled until a block heading found
    if (E) rhs->skip_opening_lines = E->ex_header_length;
    else rhs->skip_opening_lines = 0;

    Prepare to read a new chapter of rawtext3.1;
    Prepare to read a new block of rawtext3.2;

    Scan the file and render blocks as they complete3.4;

    Render the block just completed, unless it's empty3.3;
    Str::dispose_of(rhs->title_of_block_being_read);
    return OUT;
}

typedef struct rawtext_helper_state {
    struct text_stream *OUT;
    struct volume *V;
    struct example *E;
    int skipping_current_block;
    int skip_opening_lines;
    int no_blocks_written;
    int this_is_first_block_in_file;
    int no_chapters_read_in_current_rawtext;
    int no_blocks_read_in_current_chapter;
    int no_pars_read_in_current_block;
    int partial_only;
    struct text_stream *title_of_block_being_read;
} rawtext_helper_state;

§3.1. Prepare to read a new chapter of rawtext3.1 =

    rhs->no_blocks_read_in_current_chapter = 0;

§3.2. Prepare to read a new block of rawtext3.2 =

    rhs->no_blocks_read_in_current_chapter++;
    rhs->no_pars_read_in_current_block = 0;
    Renderer::clear_block_buffer();

§3.3. Render the block just completed, unless it's empty3.3 =

    if (rhs->no_pars_read_in_current_block > 0) {
        if ((rhs->E) && (no_paras_in_block_buffer > 0)) {
            if ((Str::len(paragraphs[no_paras_in_block_buffer-1].par_texts) == 0) &&
                (paragraphs[no_paras_in_block_buffer-1].par_shortened == FALSE)) {
                no_paras_in_block_buffer--;
            }
        }
        if (rhs->partial_only) {
            OUT = Renderer::render_text_of_block(OUT, rhs->V, NULL);
        } else {
            index_to_examples = TRUE;
            OUT = Renderer::render_block(OUT, rhs->V,
                (rhs->V)?(rhs->V->sections[rhs->no_blocks_written]):NULL);
        }
        rhs->OUT = OUT;
        rhs->this_is_first_block_in_file = FALSE;
        rhs->no_blocks_written++;
    }

§3.4. Scan the file and render blocks as they complete3.4 =

    TextFiles::read(name, FALSE, "can't open rawtext file",
        TRUE, Rawtext::process_large_helper, NULL, rhs);
    OUT = rhs->OUT;

§4.

void Rawtext::process_large_helper(text_stream *rawl, text_file_position *tfp,
    void *v_rhs) {
    rawtext_helper_state *rhs = (rawtext_helper_state *) v_rhs;
    if (rhs->skip_opening_lines >= 0) {
        rhs->skip_opening_lines--; return;
    }
    int shortened = Str::trim_white_space_at_end(rawl);
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, rawl, U"%[(%c*?)%] (%c*)"))
        Deal with a block heading4.1
    else if (rhs->skipping_current_block == FALSE) {
        int suppress_p_tag = FALSE;
        TEMPORARY_TEXT(HTML_prefix)
        TEMPORARY_TEXT(css_style)
        match_results mr2 = Regexp::create_mr();
        Deal with any permitted markup4.2;
        if ((indoc_settings->treat_code_as_verbatim == FALSE) || (Str::get_first_char(rawl) != '\t')) {
            Deal with an insert-change-log notation4.3;
            Deal with an insert-image notation4.5;
        }
        int abandon_para = FALSE;
        Deal with paragraph tags4.6;
        if (abandon_para == FALSE) Deal with a regular paragraph4.7;
        DISCARD_TEXT(HTML_prefix)
        DISCARD_TEXT(css_style)
        Regexp::dispose_of(&mr2);
    }
    Regexp::dispose_of(&mr);
}

§4.1. Block headings are paragraphs beginning with square-bracketed material:

    [x] The footwear kind

This one is a typical section heading. The [x] marks it as being a mere level-B heading in the book; "The footwear kind" is the text of the title; the braced {kind_footwear} is another documentation reference.

The x text is a meaningless placeholder. The way to get this noticed is to write something like:

    [Chapter: Bananas] Introduction to soft yellow fruit

which creates a new chapter called "Bananas", within which this block will be the first section.

Deal with a block heading4.1 =

    text_stream *block_header = mr.exp[0];  The text in the square brackets
    text_stream *title = mr.exp[1];

    rhs->skipping_current_block = FALSE;
    match_results mr2 = Regexp::create_mr();
    if (Regexp::match(&mr2, block_header, U"{(%c*?):}(%c*?)")) {
        Str::copy(block_header, mr2.exp[1]);
        if (Symbols::perform_ifdef(mr2.exp[0]) == FALSE) {
            rhs->skipping_current_block = TRUE;
        }
    }

    if (rhs->skipping_current_block == FALSE) {
        text_stream *OUT = rhs->OUT;
        Render the block just completed, unless it's empty3.3;
        rhs->OUT = OUT;
        Take note of documentation references4.1.1;
        Str::copy(rhs->title_of_block_being_read, title);

        if (Regexp::match(&mr2, block_header, U"Chapter: (%c*)")) {
            ++(rhs->no_chapters_read_in_current_rawtext);
            Prepare to read a new chapter of rawtext3.1;
        }
        Prepare to read a new block of rawtext3.2;
    }
    Regexp::dispose_of(&mr2);

§4.1.1. Section headings can be marked with braced documentation references:

    [x] The footwear kind {kind_footwear}

Take note of documentation references4.1.1 =

    while (Regexp::match(&mr2, title, U"(%c*) {(%C+)} *")) {
        Str::copy(title, mr2.exp[0]);
        Updater::add_reference_symbol(mr2.exp[1], rhs->V,
            (rhs->V)?(rhs->V->sections[rhs->no_blocks_written]):NULL);
    }

§4.2. Rawtext is not allowed to contain direct HTML markup, but it can contain "span notations", which can in turn be configured to look like HTML markup. So, for instance, the Inform documentation uses <b>...</b> for bold and <i>...</i> for italic, but this is only because its instructions say so.

(We also look for indexing markup, and we need to do that first, because smoke-test indexing mode applies direct markup to make its smoky black rectangles.)

Deal with any permitted markup4.2 =

    if ((indoc_settings->treat_code_as_verbatim == FALSE) || (Str::get_first_char(rawl) != '\t')) {
        Indexes::scan_indexingnotations(rawl, rhs->V,
            (rhs->V)?(rhs->V->sections[rhs->no_blocks_written]):NULL, rhs->E);
        CSS::expand_spannotations(rawl, MARKUP_SPP);
    }

    if (indoc_settings->format == HTML_FORMAT) Regexp::replace(rawl, U"<(%c*?)>", U"&lt;%0&gt;", REP_REPEATING);

    inchar32_t *replacement = U"%1";
    if (indoc_settings->format == HTML_FORMAT) replacement = U"<span class=\"%0\">%1</span>";
    Regexp::replace(rawl, U"___mu___(%c*?)___mo___(%c*?)___mc___", replacement, REP_REPEATING);

§4.3. The notation ///6X12.txt/// means "insert the change log for build 6X12 here". It should be the only thing on its line.

Deal with an insert-change-log notation4.3 =

    if (Regexp::match(&mr2, rawl, U"(%c*?)/(%c*?.txt)/// *")) {
        Str::copy(rawl, mr2.exp[0]);
        if (indoc_settings->format == HTML_FORMAT) {
            Str::clear(rawl);
            HTML::hr(rawl, NULL);
            HTML::open(rawl, "pre", I"class='changelog'", __FILE__, __LINE__);
            suppress_p_tag = TRUE;
        }
        filename *cl = Filenames::in(indoc_settings->change_logs_folder, mr2.exp[1]);
        TextFiles::read(cl, FALSE, "can't open change log file",
            TRUE, Rawtext::process_change_log_helper, NULL, rawl);
        if (indoc_settings->format == HTML_FORMAT) {
            WRITE_TO(rawl, "\n");
            HTML::close(rawl, "pre", __FILE__, __LINE__);
        }
    }

§4.4. Where, almost verbatim, we copy from the change log into the raw-line:

void Rawtext::process_change_log_helper(text_stream *sml, text_file_position *tfp,
    void *v_rawl) {
    text_stream *rawl = (text_stream *) v_rawl;
    if (indoc_settings->format == HTML_FORMAT) {
        Regexp::replace(sml, U"<", U"&lt;", REP_REPEATING);
        Regexp::replace(sml, U">", U"&gt;", REP_REPEATING);
    }
    WRITE_TO(rawl, "%S\n", sml);
}

§4.5. Images are embedded with the notation

    ///filename.extension///

though only one of these may appear in each line. If the form

    ///classname:filename.extension///

is used, then the image is styled as img.classname.

Deal with an insert-image notation4.5 =

    while (Regexp::match(&mr2, rawl, U"(%c*?)/(%c*?)///(%c*)")) {
        text_stream *left = mr2.exp[0];
        text_stream *name = mr2.exp[1];
        text_stream *right = mr2.exp[2];
        TEMPORARY_TEXT(cl)
        match_results mr3 = Regexp::create_mr();
        if (Regexp::match(&mr3, name, U"(%c*?): *(%c*)")) {
            Str::copy(cl, mr3.exp[0]); Str::copy(name, mr3.exp[1]);
            Regexp::dispose_of(&mr3);
        }
        TEMPORARY_TEXT(url)
        HTMLUtilities::image_URL(url, name);
        Str::clear(rawl);
        if (indoc_settings->format == HTML_FORMAT) {
            WRITE_TO(rawl, "%S", left);
            TEMPORARY_TEXT(details)
            WRITE_TO(details, "alt=\"%S\" src=\"%S\"", name, url);
            if (Str::len(cl) > 0) WRITE_TO(details, " class=\"%S\"", cl);
            HTML::tag_sc(rawl, "img", details);
            DISCARD_TEXT(details)
            WRITE_TO(rawl, "%S", right);
        } else {
            WRITE_TO(rawl, "%S(Image %S here)%S", left, name, right);
        }
        DISCARD_TEXT(cl)
        DISCARD_TEXT(url)
    }

§4.6. A paragraph beginning with braced material, {thus}, is "tagged". There can be multiple tags, in principle, which is why this is arranged as a loop, though it's not often needed more than once. Tags are simply markers which annotate the paragraph, so we extract each in turn from the left-hand side, then act accordingly.

Deal with paragraph tags4.6 =

    match_results mr3 = Regexp::create_mr();
    match_results mr4 = Regexp::create_mr();
    while (Regexp::match(&mr3, rawl, U"{(%c*?)}(%c*)")) {
        text_stream *paragraph_tag = mr3.exp[0];
        Str::copy(rawl, mr3.exp[1]);

        Deal with a conditional paragraph tag4.6.1;
        Deal with a phrase definition paragraph tag4.6.2;
        Deal with a CSS-styling paragraph tag4.6.3;
        Errors::with_text("{%S} is not a tag I know", paragraph_tag);
    }
    Regexp::dispose_of(&mr3);
    Regexp::dispose_of(&mr4);

§4.6.1. One use of paragraph tags is to mark a paragraph as being relevant only to one of the platforms on which Inform runs. (We've already seen this done for whole blocks of documentation: this is much finer control.) For example, documentation might say:

    {Windows}The My Documents folder can be reached using...

If we're generating for Windows, we ignore the tag: this looks like a regular paragraph to us. If we're generating for some other platform, we throw the whole paragraph away. If we're generating for no specific platform (for example, for the Inform website), we keep the paragraph but annotate it.

Deal with a conditional paragraph tag4.6.1 =

    if (Regexp::match(&mr4, paragraph_tag, U"(%c*):")) {
        if (Symbols::perform_ifdef(mr4.exp[0])) continue;
        abandon_para = TRUE; break;
    }

§4.6.2. Tags also mark the presence of phrase explanations in the main WWI:

    {defn ph_letdefault}let (a name not so far used) be (name of kind)
    ...
    {end}

Deal with a phrase definition paragraph tag4.6.2 =

    if (Regexp::match(&mr4, paragraph_tag, U"defn *(%c*?)")) {
        text_stream *defn = mr4.exp[0];
        TEMPORARY_TEXT(head)
        Str::copy(head, rawl);
        while (Characters::is_whitespace(Str::get_last_char(head)))
            Str::delete_last_character(head);
        Updater::add_reference_symbol(defn, rhs->V, (rhs->V)?(rhs->V->sections[rhs->no_blocks_written]):NULL);
        Str::clear(rawl);
        HTMLUtilities::definition_box(rawl, head, defn, rhs->V,
            (rhs->V)?(rhs->V->sections[rhs->no_blocks_written]):NULL);
        suppress_p_tag = TRUE;
        continue;
    }
    if (Str::eq_wide_string(paragraph_tag, U"end")) {
        Str::clear(rawl);
        HTMLUtilities::end_definition_box(rawl);
        suppress_p_tag = TRUE;
        continue;
    }

§4.6.3. Deal with a CSS-styling paragraph tag4.6.3 =

    if (Regexp::match(&mr4, paragraph_tag, U"(%c*)/")) {
        Str::copy(css_style, mr4.exp[0]);
        continue;
    }

§4.7. Finally, then, we're left with a regular paragraph. It was never a block heading, and whatever tags it once had have been removed.

Deal with a regular paragraph4.7 =

    int indentation_count = 0;
    Establish the indentation level4.7.1;
    Treat the text as necessary4.7.2;
    Renderer::add_para_to_block_buffer(rawl, indentation_count, suppress_p_tag,
        HTML_prefix, css_style, shortened);
    rhs->no_pars_read_in_current_block++;

§4.7.1. Initial tab characters (alone) are read as indentation.

Establish the indentation level4.7.1 =

    while (Str::get_first_char(rawl) == '\t') {
        indentation_count++;
        Str::delete_first_character(rawl);
    }

§4.7.2. In the case of HTML, we need to be careful not to turn double-quotes used in tag elements into &quot; escapes.

Treat the text as necessary4.7.2 =

    if (indoc_settings->format == HTML_FORMAT) {
        TEMPORARY_TEXT(dequotee)
        Str::copy(dequotee, rawl);
        Str::clear(rawl);
        match_results mr4 = Regexp::create_mr();
        while (Regexp::match(&mr4, dequotee, U"(%c*?)<(%c*?)>(%c*)")) {
            text_stream *L = mr4.exp[0]; text_stream *M = mr4.exp[1]; text_stream *R = mr4.exp[2];
            Rawtext::escape_HTML_characters_in(L);
            WRITE_TO(rawl, "%S<%S>", L, M);
            Str::copy(dequotee, R);
        }
        Rawtext::escape_HTML_characters_in(dequotee);
        WRITE_TO(rawl, "%S", dequotee);
    }

§5.

void Rawtext::escape_HTML_characters_in(text_stream *text) {
    if (indoc_settings->format == HTML_FORMAT) {
        TEMPORARY_TEXT(modified)
        for (int i=0, L=Str::len(text); i<L; i++) {
            inchar32_t c = Str::get_at(text, i);
            switch (c) {
                case '\"':      WRITE_TO(modified, "&quot;"); break;
                case '<':       WRITE_TO(modified, "&lt;"); break;
                case '>':       WRITE_TO(modified, "&gt;"); break;
                case '&':
                    if (Str::get_at(text, i+1) == '#') { PUT_TO(modified, c); break; }
                    int j = i+1;
                    while (Characters::isalnum(Str::get_at(text, j))) j++;
                    if ((j > i+1) && (Str::get_at(text, j) == ';')) { PUT_TO(modified, c); break; }
                    WRITE_TO(modified, "&amp;");
                    break;
                default:        PUT_TO(modified, c); break;
            }
        }
        Str::copy(text, modified);
        DISCARD_TEXT(modified)
    }
}