To read the structure of a literate programming web from a path in the file system.
- §1. Web objects
- §5. Chapter objects
- §6. Section objects
- §7. Woven and Tangled folders
- §8. Contents page
- §10. Reading from the file system
- §11. Web reading
- §13. Debugging
§1. Web objects.Each web loaded in produces a single instance of the following. If W is an ls_web, note that W->chapters is the full list of all chapters in its program, including those imported from other webs: this may be different from W->main_module->chapters, which contains just its own chapters.
In fact, the W->chapters list is arguably redundant, since it's just a concatenation of the chapter lists of the modules, but it's much more convenient to store this redundant copy than to have to keep traversing the module tree.
typedef struct ls_web { struct ls_module *main_module; the root of a small dependency graph struct linked_list *chapters; of ls_chapter struct pathname *path_to_web; relative to the current working directory struct filename *single_file; relative to the current working directory struct linked_list *bibliographic_data; of web_bibliographic_datum struct semantic_version_number version_number; as deduced from bibliographic data struct ls_syntax *web_syntax; which version syntax the sections will have int chaptered; has the author explicitly divided it into named chapters? struct programming_language *web_language; in which most of the sections are written struct linked_list *tangle_target_names; of text_stream struct linked_list *tangle_targets; of tangle_target struct filename *contents_filename; or NULL for a single-file web struct linked_list *header_filenames; of filename void *weaving_ref; void *tangling_ref; void *analysis_ref; CLASS_DEFINITION } ls_web; ls_web *WebStructure::new_ls_web(pathname *P, filename *c_F, filename *alt_F, ls_syntax *syntax_version) { ls_web *W = CREATE(ls_web); W->bibliographic_data = NEW_LINKED_LIST(web_bibliographic_datum); Bibliographic::initialise_data(W); if (P) { W->path_to_web = P; W->single_file = NULL; W->contents_filename = c_F; } else { W->path_to_web = Filenames::up(alt_F); W->single_file = alt_F; W->contents_filename = NULL; } W->version_number = VersionNumbers::null(); W->web_syntax = syntax_version; W->chaptered = FALSE; W->chapters = NEW_LINKED_LIST(ls_chapter); W->tangle_target_names = NEW_LINKED_LIST(text_stream); W->tangle_targets = NEW_LINKED_LIST(tangle_target); W->web_language = NULL; W->header_filenames = NEW_LINKED_LIST(filename); W->main_module = WebModules::create_main_module(W); W->weaving_ref = NULL; W->tangling_ref = NULL; W->analysis_ref = NULL; return W; }
- The structure ls_web is accessed in 8/bdfw, 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/ws2, 8/tt, 8/tt2, 8/bf, 9/ca, 9/taf, 9/cs, 9/lm, 9/as, 9/cl, 9/is, 10/wd, 10/ts, 10/pt, 10/tc, 10/tw, 10/twot, 10/tf, 10/hf, 11/mk, 11/cl and here.
int WebStructure::chapter_count(ls_web *W) { int n = 0; ls_chapter *C; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) n++; return n; } int WebStructure::section_count(ls_web *W) { int n = 0; ls_chapter *C; ls_section *S; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) n++; return n; } int WebStructure::has_only_one_section(ls_web *W) { if (WebStructure::section_count(W) == 1) return TRUE; return FALSE; } int WebStructure::has_errors(ls_web *W) { ls_chapter *C; ls_section *S; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) if (LiterateSource::unit_has_errors(S->literate_source)) return TRUE; return FALSE; }
§3. This really serves no purpose, but seems to boost morale.
void WebStructure::print_statistics(ls_web *W) { int s = 0, c = 0, n = 0, lc = 0; ls_chapter *C; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) { c++; ls_section *S; LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) { s++; for (ls_paragraph *par = S->literate_source->first_par; par; par = par->next_par) n++; lc += S->literate_source->lines_read; } } PRINT("web \"%S\"", Bibliographic::get_datum(W, I"Title")); if (W->web_syntax) PRINT(" (%S", W->web_syntax->name); else PRINT(" (no syntax"); if (W->web_language) PRINT(", %S)", W->web_language->language_name); else PRINT(", no language)"); PRINT(": "); if (W->chaptered) PRINT("%d chapter%s : ", c, (c == 1)?"":"s"); PRINT("%d section%s : %d paragraph%s : %d line%s\n", s, (s == 1)?"":"s", n, (n == 1)?"":"s", lc, (lc == 1)?"":"s"); }
§4. This is really for debugging:
void WebStructure::write_literate_source(OUTPUT_STREAM, ls_web *W) { ls_chapter *C; ls_section *S; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) LiterateSource::write_lsu(OUT, S->literate_source); }
§5. Chapter objects.The chapters list in a ls_web contains these as its entries. Instances of ls_chapter are never created for any other purpose, so they can exist only as part of an ls_web; and once added they are never removed.
typedef struct ls_chapter { struct ls_web *owning_web; struct ls_module *owning_module; int imported; did this originate in a different web? struct linked_list *sections; of ls_section struct text_stream *ch_range; e.g., P for Preliminaries, 7 for Chapter 7, C for Appendix C struct text_stream *ch_title; e.g., "Chapter 3: Fresh Water Fish" struct text_stream *ch_basic_title; e.g., "Chapter 3" struct text_stream *ch_decorated_title; e.g., "Fresh Water Fish" struct text_stream *rubric; optional; without double-quotation marks struct text_stream *ch_language_name; in which most of the sections are written struct programming_language *ch_language; in which this chapter is written void *weaving_ref; void *tangling_ref; void *analysis_ref; CLASS_DEFINITION } ls_chapter; ls_chapter *WebStructure::new_ls_chapter(ls_web *W, text_stream *range, text_stream *titling) { if (W == NULL) internal_error("no web for chapter"); ls_chapter *C = CREATE(ls_chapter); C->ch_range = Str::duplicate(range); C->ch_title = Str::duplicate(titling); match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, C->ch_title, U"(%c*?): *(%c*)")) { C->ch_basic_title = Str::duplicate(mr.exp[0]); C->ch_decorated_title = Str::duplicate(mr.exp[1]); } else { C->ch_basic_title = Str::duplicate(C->ch_title); C->ch_decorated_title = Str::new(); } Regexp::dispose_of(&mr); C->rubric = Str::new(); C->ch_language_name = NULL; C->ch_language = NULL; C->imported = FALSE; C->sections = NEW_LINKED_LIST(ls_section); C->owning_web = W; C->owning_module = NULL; C->weaving_ref = NULL; C->tangling_ref = NULL; C->analysis_ref = NULL; ADD_TO_LINKED_LIST(C, ls_chapter, W->chapters); return C; }
- The structure ls_chapter is accessed in 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/tt, 8/tt2, 9/ca, 9/taf, 9/lm, 9/cl, 9/is, 10/wd, 10/ts, 10/tc, 10/tw, 10/ptf, 10/tf, 10/hf, 10/df, 11/mk, 11/cl and here.
§6. Section objects.The chapters list in an ls_chapter contains these as its entries. Instances of ls_section are never created for any other purpose, so they can exist only as part of an ls_chapter; and once added they are never removed.
typedef struct ls_section { struct ls_chapter *owning_chapter; struct text_stream *sect_title; e.g., "Program Control" struct text_stream *sect_range; e.g., "2/ct" struct text_stream *titling_line_to_insert; struct ls_unit *literate_source; struct filename *source_file_for_section; int skip_from; ignore lines numbered in this inclusive range int skip_to; int sect_extent; total number of lines read from a file (including skipped ones) struct text_stream *tag_name; struct programming_language *sect_language; in which this section is written struct text_stream *sect_language_name; struct text_stream *sect_independent_language; struct tangle_target *sect_target; NULL unless this section produces a tangle of its own int paragraph_numbers_visible; int scratch_flag; temporary workspace void *weaving_ref; void *tangling_ref; void *analysis_ref; CLASS_DEFINITION } ls_section; ls_section *WebStructure::new_ls_section(ls_chapter *C, text_stream *titling) { if (C == NULL) internal_error("no chapter for section"); ls_section *S = CREATE(ls_section); S->source_file_for_section = NULL; S->skip_from = 0; S->skip_to = 0; S->titling_line_to_insert = NULL; S->sect_range = Str::new(); S->literate_source = NULL; S->sect_target = NULL; S->paragraph_numbers_visible = TRUE; match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, titling, U"(%c+) %^\"(%c+)\" *")) { S->sect_title = Str::duplicate(mr.exp[0]); S->tag_name = Str::duplicate(mr.exp[1]); } else { S->sect_title = Str::duplicate(titling); S->tag_name = NULL; } Regexp::dispose_of(&mr); S->owning_chapter = C; S->scratch_flag = FALSE; S->sect_extent = 0; S->weaving_ref = NULL; S->tangling_ref = NULL; S->analysis_ref = NULL; ADD_TO_LINKED_LIST(S, ls_section, C->sections); return S; } int WebStructure::paragraph_count_within_section(ls_section *S) { int n = 0; for (ls_paragraph *par = S->literate_source->first_par; par; par = par->next_par) n++; return n; }
- The structure ls_section is accessed in 5/htm, 8/wcp, 8/sw, 8/wm, 8/wr, 8/ls, 8/we, 8/tt, 8/tt2, 9/ca, 9/cc, 9/taf, 9/cs, 9/lm, 9/as, 9/cl, 9/is, 10/wd, 10/ts, 10/tc, 10/tw, 10/ptf, 10/tf, 10/hf, 10/df, 11/mk, 11/cl and here.
§7. Woven and Tangled folders.We abstract these in order to be able to respond well to their not existing:
pathname *WebStructure::woven_folder(ls_web *W) { pathname *P = Pathnames::down(W->path_to_web, I"Woven"); if (Pathnames::create_in_file_system(P) == FALSE) Errors::fatal_with_path("unable to create Woven subdirectory", P); return P; } pathname *WebStructure::tangled_folder(ls_web *W) { pathname *P = Pathnames::down(W->path_to_web, I"Tangled"); if (Pathnames::create_in_file_system(P) == FALSE) Errors::fatal_with_path("unable to create Tangled subdirectory", P); return P; }
§8. Contents page.The contents page for a large web is usually at a fixed leafname, so:
int WebStructure::directory_looks_like_a_web(pathname *P) { return TextFiles::exists(Filenames::in(P, I"Contents.w")); }
§9. But mid-sized webs can consist more or less of an arbitrary file itself serving as contents page, so we won't assume it's always "Contents.w":
filename *WebStructure::contents_filename(ls_web *W) { return W->contents_filename; }
§10. Reading from the file system.Webs can be stored in two ways: as a directory containing a multitude of files, in which case the pathname P is supplied; or as a single file with everything in one (and thus, implicitly, a single chapter and a single section), in which case a filename alt_F is supplied.
ls_web *WebStructure::get_without_modules(pathname *P, filename *alt_F) { return WebStructure::get(P, alt_F, NULL, NULL, FALSE, FALSE, NULL); } ls_web *WebStructure::get_without_targets(pathname *P, filename *alt_F, programming_language *pl) { return WebStructure::get(P, alt_F, NULL, NULL, FALSE, TRUE, pl); } ls_web *WebStructure::get(pathname *P, filename *alt_F, ls_syntax *syntax_version, module_search *I, int verbosely, int including_modules, programming_language *pl) { if ((including_modules) && (I == NULL)) I = WebModules::make_search_path(NULL); filename *contents_file = NULL; if (alt_F) { TEMPORARY_TEXT(extension) Filenames::write_extension(extension, alt_F); if ((Str::eq_insensitive(extension, I".inwebc")) || (Str::eq_insensitive(Filenames::get_leafname(alt_F), I"Contents.w"))) { P = Filenames::up(alt_F); contents_file = alt_F; alt_F = NULL; } } else if (P) { contents_file = Filenames::in(P, I"Contents.w"); } else internal_error("no location for web"); ls_web *W = WebStructure::new_ls_web(P, contents_file, alt_F, syntax_version); if (W->single_file) SingleFileWebs::reconnoiter(W, verbosely); else WebContents::read_contents_page(W, W->main_module, I, verbosely, including_modules, NULL); if (W->web_syntax == NULL) internal_error("no LS syntax for web"); Bibliographic::check_required_data(W); BuildFiles::set_bibliographic_data_for(W); BuildFiles::deduce_semver(W); TangleTargets::set_languages_and_targets(W, pl); return W; }
§11. Web reading.All of that ran very quickly, but now things will slow down. The next function is where the actual contents of a web are read — which means opening each section and reading it line by line. We read the complete literate source of the web into memory, which is profligate, but saves time.
void WebStructure::read_web_source(ls_web *W, int verbosely, int with_internals) { ls_chapter *C; ls_section *S; LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) { filename *F = S->source_file_for_section; Read one section from a file11.1; } }
§11.1. Read one section from a file11.1 =
pathname *P = W->path_to_web; ls_module *M = S->owning_chapter->owning_module; if ((M) && (M->module_location)) P = M->module_location; references are relative to module pathname *DP = Pathnames::down(W->path_to_web, I"Dialects"); dialects not S->literate_source = LiterateSource::begin_unit(S, W->web_syntax, S->sect_language, P, DP); if (Str::eq(Bibliographic::get_datum(W, I"Paragraph Numbers Visibility"), I"Off")) S->paragraph_numbers_visible = FALSE; if (WebSyntax::supports(W->web_syntax, EXPLICIT_SECTION_HEADINGS_WSF)) { if (W->single_file) Insert an implied purpose, for a single-file web11.1.1; } int cl = TextFiles::read(F, FALSE, "can't open section file", TRUE, WebStructure::scan_source_line, NULL, (void *) S); LiterateSource::complete_unit(S->literate_source); if (Str::len(S->literate_source->heading.operand1) > 0) { S->sect_title = Str::duplicate(S->literate_source->heading.operand1); if (W->single_file) Bibliographic::set_datum(W, I"Title", S->sect_title); } if ((with_internals) && (WebSyntax::supports(W->web_syntax, MARKDOWN_COMMENTARY_WSF))) LiterateSource::parse_markdown(S->literate_source); if (verbosely) PRINT("Read section: '%S' (%d lines)\n", S->sect_title, cl);
- This code is used in §11.
§11.1.1. Insert an implied purpose, for a single-file web11.1.1 =
text_stream *purpose = Bibliographic::get_datum(W, I"Purpose"); if (Str::len(purpose) > 0) LiterateSource::add_purpose(S->literate_source, NULL, purpose);
- This code is used in §11.1.
§12. Non-implied source lines come from here. Note that we assume here that trailing whitespace on a line is not significant in the language being tangled for.
void WebStructure::scan_source_line(text_stream *line, text_file_position *tfp, void *state) { ls_section *S = (ls_section *) state; S->sect_extent++; if ((S->skip_from > 0) && (S->skip_from <= tfp->line_count) && (tfp->line_count <= S->skip_to)) return; int l = Str::len(line) - 1; while ((l>=0) && (Characters::is_space_or_tab(Str::get_at(line, l)))) Str::truncate(line, l--); LiterateSource::feed_line(S->literate_source, tfp, line); }
§13. Debugging.This is useful mainly for testing: it produces a verbose listing of everything in a web.
void WebStructure::write_web(OUTPUT_STREAM, ls_web *W, text_stream *range) { ls_chapter *C = WebRanges::to_chapter(W, range); if (C) { ls_section *S; LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) LiterateSource::write_lsu(OUT, S->literate_source); } else { ls_section *S = WebRanges::to_section(W, range); if (S) { LiterateSource::write_lsu(OUT, S->literate_source); } else { LOOP_OVER_LINKED_LIST(C, ls_chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, ls_section, C->sections) LiterateSource::write_lsu(OUT, S->literate_source); } } }