Web Structure

To read the structure of a literate programming web from a path in the file system.

§1. Web objects.Each web loaded in produces a single instance of the following. If W is an ls_web, note that W->chapters is the full list of all chapters in its program, including those imported from other webs: this may be different from W->main_module->chapters, which contains just its own chapters.

In fact, the W->chapters list is arguably redundant, since it's just a concatenation of the chapter lists of the modules, but it's much more convenient to store this redundant copy than to have to keep traversing the module tree.

typedef struct ls_web {
    struct ls_module *main_module;  the root of a small dependency graph
    struct linked_list *chapters;  of ls_chapter

    struct pathname *path_to_web;  relative to the current working directory
    struct filename *single_file;  relative to the current working directory
    struct linked_list *bibliographic_data;  of web_bibliographic_datum
    struct semantic_version_number version_number;  as deduced from bibliographic data
    struct ls_syntax *web_syntax;  which version syntax the sections will have
    int chaptered;  has the author explicitly divided it into named chapters?

    struct programming_language *web_language;  in which most of the sections are written
    struct linked_list *tangle_target_names;  of text_stream
    struct linked_list *tangle_targets;  of tangle_target

    struct filename *contents_filename;  or NULL for a single-file web
    struct linked_list *header_filenames;  of filename

    void *weaving_ref;
    void *tangling_ref;
    void *analysis_ref;
    CLASS_DEFINITION
} ls_web;

ls_web *WebStructure::new_ls_web(pathname *P, filename *c_F, filename *alt_F,
    ls_syntax *syntax_version) {
    ls_web *W = CREATE(ls_web);
    W->bibliographic_data = NEW_LINKED_LIST(web_bibliographic_datum);
    Bibliographic::initialise_data(W);
    if (P) {
        W->path_to_web = P;
        W->single_file = NULL;
        W->contents_filename = c_F;
    } else {
        W->path_to_web = Filenames::up(alt_F);
        W->single_file = alt_F;
        W->contents_filename = NULL;
    }
    W->version_number = VersionNumbers::null();
    W->web_syntax = syntax_version;
    W->chaptered = FALSE;
    W->chapters = NEW_LINKED_LIST(ls_chapter);
    W->tangle_target_names = NEW_LINKED_LIST(text_stream);
    W->tangle_targets = NEW_LINKED_LIST(tangle_target);
    W->web_language = NULL;
    W->header_filenames = NEW_LINKED_LIST(filename);
    W->main_module = WebModules::create_main_module(W);
    W->weaving_ref = NULL;
    W->tangling_ref = NULL;
    W->analysis_ref = NULL;
    return W;
}

The structure ls_web is accessed in 8/bdfw, 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/ws2, 8/tt, 8/tt2, 8/bf, 9/ca, 9/taf, 9/cs, 9/lm, 9/as, 9/cl, 9/is, 10/wd, 10/ts, 10/pt, 10/tc, 10/tw, 10/twot, 10/tf, 10/hf, 11/mk, 11/cl and here.

§2. Statistics:

int WebStructure::chapter_count(ls_web *W) {
    int n = 0;
    ls_chapter *C;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) n++;
    return n;
}
int WebStructure::section_count(ls_web *W) {
    int n = 0;
    ls_chapter *C; ls_section *S;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters)
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections)
            n++;
    return n;
}

int WebStructure::has_only_one_section(ls_web *W) {
    if (WebStructure::section_count(W) == 1) return TRUE;
    return FALSE;
}

int WebStructure::has_errors(ls_web *W) {
    ls_chapter *C; ls_section *S;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters)
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections)
            if (LiterateSource::unit_has_errors(S->literate_source))
                return TRUE;
    return FALSE;
}

§3. This really serves no purpose, but seems to boost morale.

void WebStructure::print_statistics(ls_web *W) {
    int s = 0, c = 0, n = 0, lc = 0;
    ls_chapter *C;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) {
        c++;
        ls_section *S;
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) {
            s++;
            for (ls_paragraph *par = S->literate_source->first_par; par; par = par->next_par)
                n++;
            lc += S->literate_source->lines_read;
        }
    }
    PRINT("web \"%S\"", Bibliographic::get_datum(W, I"Title"));
    if (W->web_syntax) PRINT(" (%S", W->web_syntax->name); else PRINT(" (no syntax");
    if (W->web_language) PRINT(", %S)", W->web_language->language_name); else PRINT(", no language)");
    PRINT(": ");
    if (W->chaptered) PRINT("%d chapter%s : ",
        c, (c == 1)?"":"s");
    PRINT("%d section%s : %d paragraph%s : %d line%s\n",
        s, (s == 1)?"":"s",
        n, (n == 1)?"":"s",
        lc, (lc == 1)?"":"s");
}

§4. This is really for debugging:

void WebStructure::write_literate_source(OUTPUT_STREAM, ls_web *W) {
    ls_chapter *C;
    ls_section *S;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters)
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections)
            LiterateSource::write_lsu(OUT, S->literate_source);
}

§5. Chapter objects.The chapters list in a ls_web contains these as its entries. Instances of ls_chapter are never created for any other purpose, so they can exist only as part of an ls_web; and once added they are never removed.

typedef struct ls_chapter {
    struct ls_web *owning_web;
    struct ls_module *owning_module;
    int imported;  did this originate in a different web?
    struct linked_list *sections;  of ls_section

    struct text_stream *ch_range;  e.g., P for Preliminaries, 7 for Chapter 7, C for Appendix C
    struct text_stream *ch_title;  e.g., "Chapter 3: Fresh Water Fish"
    struct text_stream *ch_basic_title;  e.g., "Chapter 3"
    struct text_stream *ch_decorated_title;  e.g., "Fresh Water Fish"
    struct text_stream *rubric;  optional; without double-quotation marks

    struct text_stream *ch_language_name;  in which most of the sections are written
    struct programming_language *ch_language;  in which this chapter is written


    void *weaving_ref;
    void *tangling_ref;
    void *analysis_ref;
    CLASS_DEFINITION
} ls_chapter;

ls_chapter *WebStructure::new_ls_chapter(ls_web *W, text_stream *range, text_stream *titling) {
    if (W == NULL) internal_error("no web for chapter");
    ls_chapter *C = CREATE(ls_chapter);
    C->ch_range = Str::duplicate(range);
    C->ch_title = Str::duplicate(titling);
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, C->ch_title, U"(%c*?): *(%c*)")) {
        C->ch_basic_title = Str::duplicate(mr.exp[0]);
        C->ch_decorated_title = Str::duplicate(mr.exp[1]);
    } else {
        C->ch_basic_title = Str::duplicate(C->ch_title);
        C->ch_decorated_title = Str::new();
    }
    Regexp::dispose_of(&mr);
    C->rubric = Str::new();
    C->ch_language_name = NULL;
    C->ch_language = NULL;
    C->imported = FALSE;
    C->sections = NEW_LINKED_LIST(ls_section);
    C->owning_web = W;
    C->owning_module = NULL;
    C->weaving_ref = NULL;
    C->tangling_ref = NULL;
    C->analysis_ref = NULL;

    ADD_TO_LINKED_LIST(C, ls_chapter, W->chapters);
    return C;
}

The structure ls_chapter is accessed in 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/tt, 8/tt2, 9/ca, 9/taf, 9/lm, 9/cl, 9/is, 10/wd, 10/ts, 10/tc, 10/tw, 10/ptf, 10/tf, 10/hf, 10/df, 11/mk, 11/cl and here.

§6. Section objects.The chapters list in an ls_chapter contains these as its entries. Instances of ls_section are never created for any other purpose, so they can exist only as part of an ls_chapter; and once added they are never removed.

typedef struct ls_section {
    struct ls_chapter *owning_chapter;

    struct text_stream *sect_title;  e.g., "Program Control"
    struct text_stream *sect_range;  e.g., "2/ct"

    struct text_stream *titling_line_to_insert;
    struct ls_unit *literate_source;

    struct filename *source_file_for_section;
    int skip_from;  ignore lines numbered in this inclusive range
    int skip_to;
    int sect_extent;  total number of lines read from a file (including skipped ones)

    struct text_stream *tag_name;

    struct programming_language *sect_language;  in which this section is written
    struct text_stream *sect_language_name;
    struct text_stream *sect_independent_language;
    struct tangle_target *sect_target;  NULL unless this section produces a tangle of its own
    int paragraph_numbers_visible;

    int scratch_flag;  temporary workspace

    void *weaving_ref;
    void *tangling_ref;
    void *analysis_ref;
    CLASS_DEFINITION
} ls_section;

ls_section *WebStructure::new_ls_section(ls_chapter *C, text_stream *titling) {
    if (C == NULL) internal_error("no chapter for section");
    ls_section *S = CREATE(ls_section);
    S->source_file_for_section = NULL;
    S->skip_from = 0;
    S->skip_to = 0;
    S->titling_line_to_insert = NULL;
    S->sect_range = Str::new();
    S->literate_source = NULL;
    S->sect_target = NULL;
    S->paragraph_numbers_visible = TRUE;

    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, titling, U"(%c+) %^\"(%c+)\" *")) {
        S->sect_title = Str::duplicate(mr.exp[0]);
        S->tag_name = Str::duplicate(mr.exp[1]);
    } else {
        S->sect_title = Str::duplicate(titling);
        S->tag_name = NULL;
    }
    Regexp::dispose_of(&mr);
    S->owning_chapter = C;

    S->scratch_flag = FALSE;
    S->sect_extent = 0;

    S->weaving_ref = NULL;
    S->tangling_ref = NULL;
    S->analysis_ref = NULL;

    ADD_TO_LINKED_LIST(S, ls_section, C->sections);
    return S;
}

int WebStructure::paragraph_count_within_section(ls_section *S) {
    int n = 0;
    for (ls_paragraph *par = S->literate_source->first_par; par; par = par->next_par)
        n++;
    return n;
}

The structure ls_section is accessed in 5/htm, 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/tt, 8/tt2, 9/ca, 9/cc, 9/taf, 9/cs, 9/lm, 9/as, 9/cl, 9/is, 10/wd, 10/ts, 10/tc, 10/tw, 10/ptf, 10/tf, 10/hf, 10/df, 11/mk, 11/cl and here.

§7. Woven and Tangled folders.We abstract these in order to be able to respond well to their not existing:

pathname *WebStructure::woven_folder(ls_web *W) {
    pathname *P = Pathnames::down(W->path_to_web, I"Woven");
    if (Pathnames::create_in_file_system(P) == FALSE)
        Errors::fatal_with_path("unable to create Woven subdirectory", P);
    return P;
}
pathname *WebStructure::tangled_folder(ls_web *W) {
    pathname *P = Pathnames::down(W->path_to_web, I"Tangled");
    if (Pathnames::create_in_file_system(P) == FALSE)
        Errors::fatal_with_path("unable to create Tangled subdirectory", P);
    return P;
}

§8. Contents page.The contents page for a large web is usually at a fixed leafname, so:

int WebStructure::directory_looks_like_a_web(pathname *P) {
    return TextFiles::exists(Filenames::in(P, I"Contents.w"));
}

§9. But mid-sized webs can consist more or less of an arbitrary file itself serving as contents page, so we won't assume it's always "Contents.w":

filename *WebStructure::contents_filename(ls_web *W) {
    return W->contents_filename;
}

§10. Reading from the file system.Webs can be stored in two ways: as a directory containing a multitude of files, in which case the pathname P is supplied; or as a single file with everything in one (and thus, implicitly, a single chapter and a single section), in which case a filename alt_F is supplied.

ls_web *WebStructure::get_without_modules(pathname *P, filename *alt_F) {
    return WebStructure::get(P, alt_F, NULL, NULL, FALSE, FALSE, NULL);
}

ls_web *WebStructure::get_without_targets(pathname *P, filename *alt_F, programming_language *pl) {
    return WebStructure::get(P, alt_F, NULL, NULL, FALSE, TRUE, pl);
}

ls_web *WebStructure::get(pathname *P, filename *alt_F, ls_syntax *syntax_version,
    module_search *I, int verbosely, int including_modules, programming_language *pl) {
    if ((including_modules) && (I == NULL)) I = WebModules::make_search_path(NULL);
    filename *contents_file = NULL;
    if (alt_F) {
        TEMPORARY_TEXT(extension)
        Filenames::write_extension(extension, alt_F);
        if ((Str::eq_insensitive(extension, I".inwebc")) ||
            (Str::eq_insensitive(Filenames::get_leafname(alt_F), I"Contents.w"))) {
            P = Filenames::up(alt_F);
            contents_file = alt_F;
            alt_F = NULL;
        }
    } else if (P) {
        contents_file = Filenames::in(P, I"Contents.w");
    } else internal_error("no location for web");
    ls_web *W = WebStructure::new_ls_web(P, contents_file, alt_F, syntax_version);
    if (W->single_file)
        SingleFileWebs::reconnoiter(W, verbosely);
    else
        WebContents::read_contents_page(W, W->main_module, I, verbosely, including_modules, NULL);
    if (W->web_syntax == NULL) internal_error("no LS syntax for web");

    Bibliographic::check_required_data(W);
    BuildFiles::set_bibliographic_data_for(W);
    BuildFiles::deduce_semver(W);

    TangleTargets::set_languages_and_targets(W, pl);
    return W;
}

§11. Web reading.All of that ran very quickly, but now things will slow down. The next function is where the actual contents of a web are read — which means opening each section and reading it line by line. We read the complete literate source of the web into memory, which is profligate, but saves time.

void WebStructure::read_web_source(ls_web *W, int verbosely, int with_internals) {
    ls_chapter *C;
    ls_section *S;
    LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters)
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) {
            filename *F = S->source_file_for_section;
            Read one section from a file11.1;
        }
}

§11.1. Read one section from a file11.1 =

    pathname *P = W->path_to_web;
    ls_module *M = S->owning_chapter->owning_module;
    if ((M) && (M->module_location))
        P = M->module_location;  references are relative to module
    pathname *DP = Pathnames::down(W->path_to_web, I"Dialects");  dialects not

    S->literate_source = LiterateSource::begin_unit(S, W->web_syntax, S->sect_language, P, DP);
    if (Str::eq(Bibliographic::get_datum(W, I"Paragraph Numbers Visibility"), I"Off"))
        S->paragraph_numbers_visible = FALSE;

    if (WebSyntax::supports(W->web_syntax, EXPLICIT_SECTION_HEADINGS_WSF)) {
        if (W->single_file)
            Insert an implied purpose, for a single-file web11.1.1;
    }

    int cl = TextFiles::read(F, FALSE, "can't open section file", TRUE,
            WebStructure::scan_source_line, NULL, (void *) S);
    LiterateSource::complete_unit(S->literate_source);
    if (Str::len(S->literate_source->heading.operand1) > 0) {
        S->sect_title = Str::duplicate(S->literate_source->heading.operand1);
        if (W->single_file) Bibliographic::set_datum(W, I"Title", S->sect_title);
    }
    if ((with_internals) &&
        (WebSyntax::supports(W->web_syntax, MARKDOWN_COMMENTARY_WSF)))
        LiterateSource::parse_markdown(S->literate_source);
    if (verbosely) PRINT("Read section: '%S' (%d lines)\n", S->sect_title, cl);

This code is used in §11.

§11.1.1. Insert an implied purpose, for a single-file web11.1.1 =

    text_stream *purpose = Bibliographic::get_datum(W, I"Purpose");
    if (Str::len(purpose) > 0) LiterateSource::add_purpose(S->literate_source, NULL, purpose);

This code is used in §11.1.

§12. Non-implied source lines come from here. Note that we assume here that trailing whitespace on a line is not significant in the language being tangled for.

void WebStructure::scan_source_line(text_stream *line, text_file_position *tfp, void *state) {
    ls_section *S = (ls_section *) state;
    S->sect_extent++;
    if ((S->skip_from > 0) && (S->skip_from <= tfp->line_count) && (tfp->line_count <= S->skip_to))
        return;
    int l = Str::len(line) - 1;
    while ((l>=0) && (Characters::is_space_or_tab(Str::get_at(line, l))))
        Str::truncate(line, l--);
    LiterateSource::feed_line(S->literate_source, tfp, line);
}

§13. Debugging.This is useful mainly for testing: it produces a verbose listing of everything in a web.

void WebStructure::write_web(OUTPUT_STREAM, ls_web *W, text_stream *range) {
    ls_chapter *C = WebRanges::to_chapter(W, range);
    if (C) {
        ls_section *S;
        LOOP_OVER_LINKED_LIST(S, ls_section, C->sections)
            LiterateSource::write_lsu(OUT, S->literate_source);
    } else {
        ls_section *S = WebRanges::to_section(W, range);
        if (S) {
            LiterateSource::write_lsu(OUT, S->literate_source);
        } else {
            LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters)
                LOOP_OVER_LINKED_LIST(S, ls_section, C->sections)
                    LiterateSource::write_lsu(OUT, S->literate_source);
        }
    }
}