Cross-referencing multiple webs gathered together.


§1. Colonies of webs. Social spiders are said to form "colonies" when their webs are shared,1 and in that spirit, a colony to Inweb is a collection of coexisting webs — which share no code, and in that sense have no connection at run-time, but which need to be cross-referenced in their woven form, so that readers can easily turn from one to another.

§2. So, then, a colony is really just a membership list:

typedef struct colony {
    struct linked_list *members;  of colony_member
    struct text_stream *home;  path of home repository
    struct pathname *assets_path;  where assets shared between weaves live
    struct pathname *patterns_path;  where additional patterns live
    CLASS_DEFINITION
} colony;

§3. Each member is represented by an instance of the following. Note the loaded field: this holds metadata on the web/module in question. (Recall that a module is really just a web that doesn't tangle to an independent program but to a library of code: for almost all purposes, it's a web.) But for efficiency's sake, we read this metadata only on demand.

Note that the path might be either the name of a single-file web, or of a directory holding a multi-section web.

typedef struct colony_member {
    int web_rather_than_module;  TRUE for a web, FALSE for a module
    struct text_stream *name;  the N in N at P in W
    struct text_stream *path;  the P in N at P in W
    struct pathname *weave_path;  the W in N at P in W
    struct text_stream *home_leaf;  usually index.html, but not for single-file webs
    struct text_stream *default_weave_pattern;  for use when weaving

    struct web_md *loaded;  metadata on its sections, lazily evaluated
    struct filename *navigation;  navigation sidebar HTML
    struct linked_list *breadcrumb_tail;  of breadcrumb_request
    CLASS_DEFINITION
} colony_member;

§4. And the following reads a colony file F and produces a suitable colony object from it. This, for example, is the colony file for the Inweb repository at GitHub:

home: inweb/docs
assets: inweb/docs/docs-assets

navigation: inweb/docs-src/nav.html
pattern: GitHubPages

breadcrumbs: none
web: "overview" at "inweb/docs-src/index.inweb" in "inweb/docs"

breadcrumbs: "Home: //overview//"
web: "inweb" at "inweb" in "inweb/docs/inweb"
module: "foundation" at "inweb/foundation-module" in "inweb/docs/foundation-module"
web: "foundation-test" at "inweb/foundation-test" in "inweb/docs/foundation-test"
web: "goldbach" at "inweb/Examples/goldbach" in "inweb/docs/goldbach"
web: "twinprimes" at "inweb/Examples/twinprimes.inweb" in "inweb/docs/twinprimes"
web: "eastertide" at "inweb/Examples/eastertide" in "inweb/docs/eastertide"

web: "intest" at "intest" in "intest/docs"
web: "inform" at "inform/docs-src/index.inweb" in "inform/docs"
typedef struct colony_reader_state {
    struct colony *province;
    struct filename *nav;
    struct linked_list *crumbs;  of breadcrumb_request
    struct text_stream *pattern;
} colony_reader_state;

void Colonies::load(filename *F) {
    colony *C = CREATE(colony);
    C->members = NEW_LINKED_LIST(colony_member);
    C->home = I"docs";
    C->assets_path = NULL;
    C->patterns_path = NULL;
    colony_reader_state crs;
    crs.province = C;
    crs.nav = NULL;
    crs.crumbs = NEW_LINKED_LIST(breadcrumb_request);
    crs.pattern = NULL;
    TextFiles::read(F, FALSE, "can't open colony file",
        TRUE, Colonies::read_line, NULL, (void *) &crs);
}

§5. Lines from the colony file are fed, one by one, into:

void Colonies::read_line(text_stream *line, text_file_position *tfp, void *v_crs) {
    colony_reader_state *crs = (colony_reader_state *) v_crs;
    colony *C = crs->province;

    Str::trim_white_space(line);  ignore trailing space
    if (Str::len(line) == 0) return;  ignore blank lines
    if (Str::get_first_char(line) == '#') return;  lines opening with # are comments

    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, line, U"(%c*?): \"*(%C+)\" at \"(%c*)\" in \"(%c*)\"")) {
        colony_member *CM = CREATE(colony_member);
        if (Str::eq(mr.exp[0], I"web")) CM->web_rather_than_module = TRUE;
        else if (Str::eq(mr.exp[0], I"module")) CM->web_rather_than_module = FALSE;
        else {
            CM->web_rather_than_module = FALSE;
            Errors::in_text_file("text before ':' must be 'web' or 'module'", tfp);
        }
        CM->name = Str::duplicate(mr.exp[1]);
        CM->path = Str::duplicate(mr.exp[2]);
        CM->home_leaf = Str::new();
        if (Str::suffix_eq(CM->path, I".inweb", 6)) {
            filename *F = Filenames::from_text(CM->path);
            Filenames::write_unextended_leafname(CM->home_leaf, F);
            WRITE_TO(CM->home_leaf, ".html");
        } else {
            WRITE_TO(CM->home_leaf, "index.html");
        }
        CM->weave_path = Pathnames::from_text(mr.exp[3]);
        CM->loaded = NULL;
        CM->navigation = crs->nav;
        CM->breadcrumb_tail = crs->crumbs;
        CM->default_weave_pattern = Str::duplicate(crs->pattern);
        ADD_TO_LINKED_LIST(CM, colony_member, C->members);
    } else if (Regexp::match(&mr, line, U"home: *(%c*)")) {
        C->home = Str::duplicate(mr.exp[0]);
    } else if (Regexp::match(&mr, line, U"assets: *(%c*)")) {
        C->assets_path = Pathnames::from_text(mr.exp[0]);
    } else if (Regexp::match(&mr, line, U"patterns: *(%c*)")) {
        C->patterns_path = Pathnames::from_text(mr.exp[0]);
    } else if (Regexp::match(&mr, line, U"pattern: none")) {
        crs->pattern = NULL;
    } else if (Regexp::match(&mr, line, U"pattern: *(%c*)")) {
        crs->pattern = Str::duplicate(mr.exp[0]);
    } else if (Regexp::match(&mr, line, U"navigation: none")) {
        crs->nav = NULL;
    } else if (Regexp::match(&mr, line, U"navigation: *(%c*)")) {
        crs->nav = Filenames::from_text(mr.exp[0]);
    } else if (Regexp::match(&mr, line, U"breadcrumbs: none")) {
        crs->crumbs = NEW_LINKED_LIST(breadcrumb_request);
    } else if (Regexp::match(&mr, line, U"breadcrumbs: *(%c*)")) {
        crs->crumbs = NEW_LINKED_LIST(breadcrumb_request);
        match_results mr2 = Regexp::create_mr();
        while (Regexp::match(&mr2, mr.exp[0], U"(\"%c*?\") > (%c*)")) {
            Colonies::add_crumb(crs->crumbs, mr2.exp[0], tfp);
            Str::clear(mr.exp[0]); Str::copy(mr.exp[0], mr2.exp[1]);
        }
        Colonies::add_crumb(crs->crumbs, mr.exp[0], tfp);
    } else {
        Errors::in_text_file("unable to read colony member", tfp);
    }
    Regexp::dispose_of(&mr);
}

§6. "Breadcrumbs" are the chain of links in a horizontal list at the top of the page, and this requests one.

void Colonies::add_crumb(linked_list *L, text_stream *spec, text_file_position *tfp) {
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, spec, U"\"(%c*?)\"") == FALSE) {
        Errors::in_text_file("each crumb must be in double-quotes", tfp);
        return;
    }
    spec = mr.exp[0];
    breadcrumb_request *br = Colonies::request_breadcrumb(spec);
    ADD_TO_LINKED_LIST(br, breadcrumb_request, L);
    Regexp::dispose_of(&mr);
}

typedef struct breadcrumb_request {
    struct text_stream *breadcrumb_text;
    struct text_stream *breadcrumb_link;
    CLASS_DEFINITION
} breadcrumb_request;

breadcrumb_request *Colonies::request_breadcrumb(text_stream *arg) {
    breadcrumb_request *BR = CREATE(breadcrumb_request);
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, arg, U"(%c*?): *(%c*)")) {
        BR->breadcrumb_text = Str::duplicate(mr.exp[0]);
        BR->breadcrumb_link = Str::duplicate(mr.exp[1]);
    } else {
        BR->breadcrumb_text = Str::duplicate(arg);
        BR->breadcrumb_link = Str::duplicate(arg);
        WRITE_TO(BR->breadcrumb_link, ".html");
    }
    Regexp::dispose_of(&mr);
    return BR;
}

void Colonies::drop_initial_breadcrumbs(OUTPUT_STREAM, filename *F, linked_list *crumbs) {
    breadcrumb_request *BR;
    LOOP_OVER_LINKED_LIST(BR, breadcrumb_request, crumbs) {
        TEMPORARY_TEXT(url)
        Colonies::link_URL(url, BR->breadcrumb_link, F);
        Colonies::write_breadcrumb(OUT, BR->breadcrumb_text, url);
        DISCARD_TEXT(url)
    }
}

void Colonies::write_breadcrumb(OUTPUT_STREAM, text_stream *text, text_stream *link) {
    if (link) {
        HTML_OPEN("li");
        HTML::begin_link(OUT, link);
        WRITE("%S", text);
        HTML::end_link(OUT);
        HTML_CLOSE("li");
    } else {
        HTML_OPEN("li");
        HTML_OPEN("b");
        WRITE("%S", text);
        HTML_CLOSE("b");
        HTML_CLOSE("li");
    }
}

§7. Searching. Given a name T, we try to find a colony member of that name, returning the first we find.

colony_member *Colonies::find(text_stream *T) {
    colony *C;
    LOOP_OVER(C, colony) {
        colony_member *CM;
        LOOP_OVER_LINKED_LIST(CM, colony_member, C->members)
            if (Str::eq_insensitive(T, CM->name))
                return CM;
    }
    return NULL;
}

§8. And this is where we find the web metadata for a colony member. It's a more subtle business than first appears, because maybe the colony member is already in Inweb's memory (because it is the web being woven, or is a module imported by that web even if not now being woven). If it is, we want to use the data we already have; but if not, we read it in.

module *Colonies::as_module(colony_member *CM, source_line *L, web_md *Wm) {
    if (CM->loaded == NULL) Perhaps the web being woven8.1;
    if (CM->loaded == NULL) Perhaps a module imported by the web being woven8.2;
    if (CM->loaded == NULL) Perhaps a module not yet seen8.3;
    if (CM->loaded == NULL) Failing that, throw an error8.4;
    return CM->loaded->as_module;
}

§8.1. Perhaps the web being woven8.1 =

    if ((Wm) && (Str::eq_insensitive(Wm->as_module->module_name, CM->name)))
        CM->loaded = Wm;

§8.2. Perhaps a module imported by the web being woven8.2 =

    if (Wm) {
        module *M;
        LOOP_OVER_LINKED_LIST(M, module, Wm->as_module->dependencies)
            if (Str::eq_insensitive(M->module_name, CM->name))
                CM->loaded = Wm;
    }

§8.3. Perhaps a module not yet seen8.3 =

    filename *F = NULL;
    pathname *P = NULL;
    if (Str::suffix_eq(CM->path, I".inweb", 6))
        F = Filenames::from_text(CM->path);
    else
        P = Pathnames::from_text(CM->path);
    CM->loaded = WebMetadata::get_without_modules(P, F);

§8.4. Failing that, throw an error8.4 =

    TEMPORARY_TEXT(err)
    WRITE_TO(err, "unable to load '%S'", CM->name);
    Main::error_in_web(err, L);

§9. Finally:

text_stream *Colonies::home(void) {
    colony *C;
    LOOP_OVER(C, colony)
        return C->home;
    return I"docs";
}

pathname *Colonies::assets_path(void) {
    colony *C;
    LOOP_OVER(C, colony)
        return C->assets_path;
    return NULL;
}

pathname *Colonies::patterns_path(void) {
    colony *C;
    LOOP_OVER(C, colony)
        return C->patterns_path;
    return NULL;
}

§10. Cross-references. The following must decide what references like the following should refer to:

    Chapter 3
    Manual
    Enumerated Constants
    Reader::get_section_for_range
    weave_order
    foundation: Text Streams
    goldbach

The reference text is in text; we return TRUE if we can make unambiguous sense of it, or throw an error and return FALSE if not. If all is well, we must write a title and URL for the link.

The web metadata Wm is for the web currently being woven, and the line L is where the reference is made from.

int Colonies::resolve_reference_in_weave(text_stream *url, text_stream *title,
    filename *for_HTML_file, text_stream *text, web_md *Wm, source_line *L, int *ext) {
    int r = 0;
    if (ext) *ext = FALSE;
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, text, U"(%c+?) -> (%c+)")) {
        r = Colonies::resolve_reference_in_weave_inner(url, NULL,
            for_HTML_file, mr.exp[1], Wm, L, ext);
        WRITE_TO(title, "%S", mr.exp[0]);
    } else {
        r = Colonies::resolve_reference_in_weave_inner(url, title,
            for_HTML_file, text, Wm, L, ext);
    }
    Regexp::dispose_of(&mr);
    return r;
}

int Colonies::resolve_reference_in_weave_inner(text_stream *url, text_stream *title,
    filename *for_HTML_file, text_stream *text, web_md *Wm, source_line *L, int *ext) {
    module *from_M = (Wm)?(Wm->as_module):NULL;
    module *search_M = from_M;
    colony_member *search_CM = NULL;
    int external = FALSE;

    Is it an explicit URL?10.1;
    Is it the name of a member of our colony?10.2;
    If it contains a colon, does this indicate a section in a colony member?10.3;

    module *found_M = NULL;
    section_md *found_Sm = NULL;
    int bare_module_name = FALSE;

     find how many hits (N), and how many which are sections (NS)
    int N = WebModules::named_reference(&found_M, &found_Sm, &bare_module_name,
        NULL, search_M, text, FALSE, FALSE);
    found_M = NULL; found_Sm = NULL; bare_module_name = FALSE;
    int NS = WebModules::named_reference(&found_M, &found_Sm, &bare_module_name,
        NULL, search_M, text, FALSE, TRUE);
    int sections_only = FALSE;
    if ((N > 1) && (NS == 1)) sections_only = TRUE;

     now perform the definitive search
    found_M = NULL; found_Sm = NULL; bare_module_name = FALSE;
    N = WebModules::named_reference(&found_M, &found_Sm, &bare_module_name,
        title, search_M, text, FALSE, sections_only);

    if (N == 0) {
        if ((L) && (external == FALSE)) {
            Is it the name of a function in the current web?10.4;
            Is it the name of a type in the current web?10.5;
        }
        TEMPORARY_TEXT(err)
        WRITE_TO(err, "Can't find the cross-reference '%S'", text);
        Main::error_in_web(err, L);
        DISCARD_TEXT(err)
        return FALSE;
    }
    if (N > 1) {
        Main::error_in_web(I"Multiple cross-references might be meant here", L);
        WebModules::named_reference(&found_M, &found_Sm, &bare_module_name,
            title, search_M, text, TRUE, FALSE);
        return FALSE;
    }
    It refers unambiguously to a single section10.6;
    return TRUE;
}

§10.1. Is it an explicit URL?10.1 =

    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, text, U"https*:%c*")) {
        WRITE_TO(url, "%S", text);
        WRITE_TO(title, "%S", text);
        Regexp::dispose_of(&mr);
        if (ext) *ext = TRUE;
        return TRUE;
    }
    Regexp::dispose_of(&mr);

§10.2. Is it the name of a member of our colony?10.2 =

    search_CM = Colonies::find(text);
    if (search_CM) {
        module *found_M = Colonies::as_module(search_CM, L, Wm);
        section_md *found_Sm = FIRST_IN_LINKED_LIST(section_md, found_M->sections_md);
        int bare_module_name = TRUE;
        WRITE_TO(title, "%S", search_CM->name);
        It refers unambiguously to a single section10.6;
    }

§10.3. If it contains a colon, does this indicate a section in a colony member?10.3 =

    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, text, U"(%c*?): (%c*)")) {
        search_CM = Colonies::find(mr.exp[0]);
        if (search_CM) {
            module *found_M = Colonies::as_module(search_CM, L, Wm);
            if (found_M) {
                search_M = found_M;
                if (LinkedLists::len(found_M->dependencies) == 0)
                    text = Str::duplicate(mr.exp[1]);
                external = TRUE;
            }
        }
    }
    Regexp::dispose_of(&mr);

§10.4. Is it the name of a function in the current web?10.4 =

    language_function *fn;
    LOOP_OVER(fn, language_function) {
        if (Str::eq_insensitive(fn->function_name, text)) {
            Colonies::paragraph_URL(url, fn->function_header_at->owning_paragraph,
                for_HTML_file);
            WRITE_TO(title, "%S", fn->function_name);
            return TRUE;
        }
    }

§10.5. Is it the name of a type in the current web?10.5 =

    language_type *str;
    LOOP_OVER(str, language_type) {
        if (Str::eq_insensitive(str->structure_name, text)) {
            Colonies::paragraph_URL(url, str->structure_header_at->owning_paragraph,
                for_HTML_file);
            WRITE_TO(title, "%S", str->structure_name);
            return TRUE;
        }
    }

§10.6. It refers unambiguously to a single section10.6 =

    if (found_M == NULL) internal_error("could not locate M");
    if (search_CM) The section is a known colony member10.6.1
    else The section is not in a known colony member10.6.2;
    return TRUE;

§10.6.1. The section is a known colony member10.6.1 =

    pathname *from = Filenames::up(for_HTML_file);
    pathname *to = search_CM->weave_path;
    Pathnames::relative_URL(url, from, to);
    if (bare_module_name) WRITE_TO(url, "%S", search_CM->home_leaf);
    else if (found_Sm) Colonies::section_URL(url, found_Sm);
    if (bare_module_name == FALSE)
        WRITE_TO(title, " (in %S)", search_CM->name);

§10.6.2. In the absence of a colony file, Inweb can really only guess, and the guess it makes is that modules of the current web will be woven alongside the main one.

The section is not in a known colony member10.6.2 =

    if (found_M == from_M) {
        Colonies::section_URL(url, found_Sm);
    } else {
        WRITE_TO(url, "../%S-module/", found_M->module_name);
        Colonies::section_URL(url, found_Sm);
        if (bare_module_name == FALSE)
            WRITE_TO(title, " (in %S)", found_M->module_name);
    }

§11. URL management.

void Colonies::link_URL(OUTPUT_STREAM, text_stream *link_text, filename *F) {
    match_results mr = Regexp::create_mr();
    if (Regexp::match(&mr, link_text, U" *(%c+)// *"))
        Colonies::reference_URL(OUT, mr.exp[0], F);
    else
        WRITE("%S", link_text);
    Regexp::dispose_of(&mr);
}

void Colonies::reference_URL(OUTPUT_STREAM, text_stream *link_text, filename *F) {
    TEMPORARY_TEXT(title)
    TEMPORARY_TEXT(url)
    if (Colonies::resolve_reference_in_weave(url, title, F, link_text, NULL, NULL, NULL))
        WRITE("%S", url);
    else
        PRINT("Warning: unable to resolve reference '%S' in navigation\n", link_text);
    DISCARD_TEXT(title)
    DISCARD_TEXT(url)
}

void Colonies::section_URL(OUTPUT_STREAM, section_md *Sm) {
    if (Sm == NULL) internal_error("unwoven section");
    LOOP_THROUGH_TEXT(pos, Sm->sect_range)
        if ((Str::get(pos) == '/') || (Str::get(pos) == ' '))
            PUT('-');
        else
            PUT(Str::get(pos));
    WRITE(".html");
}

void Colonies::paragraph_URL(OUTPUT_STREAM, paragraph *P, filename *from) {
    if (from == NULL) internal_error("no from file");
    if (P == NULL) internal_error("no para");
    section *to_S = P->under_section;
    module *to_M = to_S->md->owning_module;
    if (Str::ne(to_M->module_name, I"(main)")) {
        colony_member *to_C = Colonies::find(to_M->module_name);
        if (to_C) {
            pathname *from_path = Filenames::up(from);
            pathname *to_path = to_C->weave_path;
            Pathnames::relative_URL(OUT, from_path, to_path);
        } else {
            PRINT("Warning: a link in the weave will work only if '%S' appears in the colony file\n",
                to_M->module_name);
        }
    }
    Colonies::section_URL(OUT, to_S->md);
    WRITE("#");
    Colonies::paragraph_anchor(OUT, P);
}

void Colonies::paragraph_anchor(OUTPUT_STREAM, paragraph *P) {
    if (P == NULL) internal_error("no para");
    WRITE("%S", P->ornament);
    WRITE("P");
    text_stream *N = P->paragraph_number;
    LOOP_THROUGH_TEXT(pos, N)
        if (Str::get(pos) == '.') WRITE("_");
        else PUT(Str::get(pos));
}