To transcribe a version of the text in the web into a form which can be compiled as a program.


§1. The Master Tangler. Here's what has happened so far, on a -tangle run of Inweb: on any other sort of run, of course, we would never be in this section of code. The web was read completely into memory, and then fully parsed, with all of the arrays and hashes populated. Program Control then sent us straight here for the tangling to begin...

void Tangler::tangle(web *W, tangle_target *target, filename *dest_file) {
    programming_language *lang = target->tangle_language;
    PRINT("  tangling <%/f> (written in %S)\n", dest_file, lang->language_name);

    text_stream TO_struct;
    text_stream *OUT = &TO_struct;
    if (STREAM_OPEN_TO_FILE(OUT, dest_file, ISO_ENC) == FALSE)
        Errors::fatal_with_file("unable to write tangled file", dest_file);
    Perform the actual tangle1.1;
    STREAM_CLOSE(OUT);

    Tangle any imported headers1.2;
    Tangle any extract files not part of the target itself1.3;
    LanguageMethods::additional_tangling(lang, W, target);
}

§1.1. All of the sections are tangled together into one big file, the structure of which can be seen below.

define LOOP_OVER_PARAGRAPHS(C, S, T, P)
    LOOP_OVER_LINKED_LIST(C, chapter, W->chapters)
        LOOP_OVER_LINKED_LIST(S, section, C->sections)
            if (S->sect_target == T)
                LOOP_OVER_LINKED_LIST(P, paragraph, S->paragraphs)

Perform the actual tangle1.1 =

     (a) The shebang line, a header for scripting languages, and other heading matter
    LanguageMethods::shebang(OUT, lang, W, target);
    LanguageMethods::disclaimer(OUT, lang, W, target);
    LanguageMethods::additional_early_matter(OUT, lang, W, target);
    chapter *C; section *S; paragraph *P;
    LOOP_OVER_PARAGRAPHS(C, S, target, P)
        if ((P->placed_very_early) && (P->defines_macro == NULL))
            Tangler::tangle_paragraph(OUT, P);

     (b) Results of @d declarations
    Tangle all the constant definitions in section order1.1.1;

     (c) Miscellaneous automated C predeclarations
    LanguageMethods::additional_predeclarations(OUT, lang, W);

     (d) Above-the-bar code from all of the sections (global variables, and such)
    LOOP_OVER_PARAGRAPHS(C, S, target, P)
        if ((P->placed_early) && (P->defines_macro == NULL))
            Tangler::tangle_paragraph(OUT, P);

     (e) Below-the-bar code: the bulk of the program itself
    LOOP_OVER_PARAGRAPHS(C, S, target, P)
        if ((P->placed_early == FALSE) && (P->placed_very_early == FALSE) && (P->defines_macro == NULL))
            Tangler::tangle_paragraph(OUT, P);

     (f) Opposite of the shebang: a footer
    LanguageMethods::gnabehs(OUT, lang, W);

§1.1.1. This is the result of all those @d definitions; note that these sometimes extend across multiple lines.

Tangle all the constant definitions in section order1.1.1 =

    chapter *C;
    section *S;
    LOOP_WITHIN_TANGLE(C, S, target)
        if (L->category == BEGIN_DEFINITION_LCAT)
            if (L->default_defn == FALSE)
                Define the constant1.1.1.1;
    LOOP_WITHIN_TANGLE(C, S, target)
        if (L->category == BEGIN_DEFINITION_LCAT)
            if (L->default_defn) {
                LanguageMethods::open_ifdef(OUT, lang, L->text_operand, FALSE);
                Define the constant1.1.1.1;
                LanguageMethods::close_ifdef(OUT, lang, L->text_operand, FALSE);
            }
    Enumerations::define_extents(OUT, target, lang);

§1.1.1.1. Define the constant1.1.1.1 =

    if (L->owning_paragraph == NULL) Main::error_in_web(I"misplaced definition", L);
    else Tags::open_ifdefs(OUT, L->owning_paragraph);
    LanguageMethods::start_definition(OUT, lang,
        L->text_operand,
        L->text_operand2, S, L);
    while ((L->next_line) && (L->next_line->category == CONT_DEFINITION_LCAT)) {
        L = L->next_line;
        LanguageMethods::prolong_definition(OUT, lang, L->text, S, L);
    }
    LanguageMethods::end_definition(OUT, lang, S, L);
    if (L->owning_paragraph) Tags::close_ifdefs(OUT, L->owning_paragraph);

§1.2. Tangle any imported headers1.2 =

    filename *F;
    LOOP_OVER_LINKED_LIST(F, filename, W->headers)
        Shell::copy(F, Reader::tangled_folder(W), "");

§1.3. The following simple implementation splices raw lines from text (probably code, or configuration gobbledegook) marked as "to ...", giving a leafname. We place files of those leafnames in the same directory as the tangle target.

define MAX_EXTRACT_FILES 10

Tangle any extract files not part of the target itself1.3 =

    text_stream *extract_names[MAX_EXTRACT_FILES];
    text_stream extract_files[MAX_EXTRACT_FILES];
    int no_extract_files = 0;
    chapter *C; section *S; paragraph *P;
    LOOP_OVER_PARAGRAPHS(C, S, target, P)
        for (source_line *L = P->first_line_in_paragraph;
            ((L) && (L->owning_paragraph == P)); L = L->next_line)
                if (Str::len(L->extract_to) > 0) {
                    int j = no_extract_files;
                    for (int i=0; i<no_extract_files; i++)
                        if (Str::eq(L->extract_to, extract_names[i])) j = i;
                    if (j == no_extract_files) {
                        if (j == MAX_EXTRACT_FILES)
                            Errors::fatal("too many extract files in tangle");
                        extract_names[j] = Str::duplicate(L->extract_to);
                        filename *F = Filenames::in(Filenames::up(dest_file), L->extract_to);
                        if (STREAM_OPEN_TO_FILE(&(extract_files[j]), F, UTF8_ENC) == FALSE)
                            Errors::fatal_with_file("unable to write extract file", F);
                        no_extract_files++;
                    }
                    WRITE_TO(&(extract_files[j]), "%S\n", L->text);
                }
    for (int i=0; i<no_extract_files; i++) STREAM_CLOSE(&(extract_files[i]));

§2. So here is the main tangler for a single paragraph. We basically expect to act only on CODE_BODY_LCAT lines (those containing actual code), unless something quirky has been done to support a language feature.

void Tangler::tangle_paragraph(OUTPUT_STREAM, paragraph *P) {
    Tags::open_ifdefs(OUT, P);
    int contiguous = FALSE;
    for (source_line *L = P->first_line_in_paragraph;
        ((L) && (L->owning_paragraph == P)); L = L->next_line) {
        if (LanguageMethods::will_insert_in_tangle(P->under_section->sect_language, L)) {
            Insert line marker if necessary to show the origin of this code2.1;
            LanguageMethods::insert_in_tangle(OUT, P->under_section->sect_language, L);
        }
        if ((L->category != CODE_BODY_LCAT) || (L->suppress_tangling)) {
            contiguous = FALSE;
        } else {
            Insert line marker if necessary to show the origin of this code2.1;
            Tangler::tangle_line(OUT, L->text, P->under_section, L); WRITE("\n");
        }
    }
    Tags::close_ifdefs(OUT, P);
}

§2.1. The tangled file is, as the term suggests, a tangle, with lines coming from many different origins. Some programming languages (C, for instance) support a notation to tell the compiler that code has come from somewhere else; if so, here's where we use it.

Insert line marker if necessary to show the origin of this code2.1 =

    if (contiguous == FALSE) {
        contiguous = TRUE;
        LanguageMethods::insert_line_marker(OUT, P->under_section->sect_language, L);
    }

§3. The Code Tangler. All of the final tangled code passes through the following routine. Almost all of the time, it simply prints original verbatim to the file OUT.

void Tangler::tangle_line(OUTPUT_STREAM, text_stream *original, section *S, source_line *L) {
    int mlen, slen;
    int mpos = Regexp::find_expansion(original, '@', '<', '@', '>', &mlen);
    int spos = Regexp::find_expansion(original, '[', '[', ']', ']', &slen);
    if ((mpos >= 0) && ((spos == -1) || (mpos <= spos)) &&
        (LanguageMethods::allow_expansion(S->sect_language, original)))
        Expand a paragraph macro3.1
    else if (spos >= 0)
        Expand a double-square command3.2
    else
        LanguageMethods::tangle_line(OUT, S->sect_language, original);  this is usually what happens
}

§3.1. The first form of escape is a paragraph macro in the middle of code. For example, we handle

    if (banana_count == 0) @<Yes, we have no bananas@>;

by calling the lower-level tangler on if (banana_count == 0) (a substring which we know can't involve any macros, since we are detecting macros from left to right, and this is to the left of the one we found); then by tangling the definition of "Yes, we have no bananas"; then by calling the upper-level code tangler on ;. (In this case, of course, there's nothing much there, but in principle it could contain further macros.)

Note that when we've expanded "Yes, we have no bananas" we have certainly placed code into the tangled file from a different location; that will insert a #line marker for the definition location; and we don't want the eventual C compiler to think that the code which follows is also from that location. So we insert a fresh line marker.

Expand a paragraph macro3.1 =

    TEMPORARY_TEXT(temp)
    Str::copy(temp, original); Str::truncate(temp, mpos);
    LanguageMethods::tangle_line(OUT, S->sect_language, temp);

    programming_language *lang = S->sect_language;
    for (int i=0; i<mlen-4; i++) Str::put_at(temp, i, Str::get_at(original, mpos+2+i));
    Str::truncate(temp, mlen-4);
    para_macro *pmac = Macros::find_by_name(temp, S);
    if (pmac) {
        LanguageMethods::before_macro_expansion(OUT, lang, pmac);
        Tangler::tangle_paragraph(OUT, pmac->defining_paragraph);
        LanguageMethods::after_macro_expansion(OUT, lang, pmac);
        LanguageMethods::insert_line_marker(OUT, lang, L);
    } else {
        Main::error_in_web(I"unknown macro", L);
        WRITE_TO(STDERR, "Macro is '%S'\n", temp);
        LanguageMethods::comment(OUT, lang, temp);  recover by putting macro name in comment
    }
    TEMPORARY_TEXT(rest)
    Str::substr(rest, Str::at(original, mpos + mlen), Str::end(original));
    Tangler::tangle_line(OUT, rest, S, L);
    DISCARD_TEXT(rest)
    DISCARD_TEXT(temp)

§3.2. This is a similar matter, except that it expands bibliographic data:

    printf("This is build [[Build Number]].\n");

takes the bibliographic data for "Build Number" (as set on the web's contents page) and substitutes that, so that we end up with (say)

    printf("This is build 5Q47.\n");

In some languages there are also special expansions (for example, in InC [[nonterminals]] has a special meaning).

If the text in double-squares isn't recognised, that's not an error: it simply passes straight through. So [[water]] becomes just [[water]].

Expand a double-square command3.2 =

    web *W = S->owning_web;

    TEMPORARY_TEXT(temp)
    for (int i=0; i<spos; i++) PUT_TO(temp, Str::get_at(original, i));
    LanguageMethods::tangle_line(OUT, S->sect_language, temp);

    for (int i=0; i<slen-4; i++) Str::put_at(temp, i, Str::get_at(original, spos+2+i));
    Str::truncate(temp, slen-4);
    if (LanguageMethods::special_tangle_command(OUT, S->sect_language, temp) == FALSE) {
        if (Bibliographic::look_up_datum(W->md, temp))
            WRITE("%S", Bibliographic::get_datum(W->md, temp));
        else
            WRITE("[[%S]]", temp);
    }

    TEMPORARY_TEXT(rest)
    Str::substr(rest, Str::at(original, spos + slen), Str::end(original));
    Tangler::tangle_line(OUT, rest, S, L);
    DISCARD_TEXT(rest)
    DISCARD_TEXT(temp)

§4. Prinary target. The first target in a web is always the one for the main program.

tangle_target *Tangler::primary_target(web *W) {
    if (W == NULL) internal_error("no such web");
    return FIRST_IN_LINKED_LIST(tangle_target, W->tangle_targets);
}