Gathering indexing marks for a web, and sorting them into a woven index.
enumerate LITERAL_CHARACTER_FSMEVENT
finite_state_machine *WebIndexing::make_indexing_machine(linked_list *conventions) { fsm_state *base_state = FSM::new_state(I"unindexed"); finite_state_machine *machine = FSM::new_machine(base_state); text_stream *on = Conventions::get_textual_from(conventions, COMMENTS_LSCONVENTION); text_stream *off = Conventions::get_textual2_from(conventions, COMMENTS_LSCONVENTION); int on_event = WEB_COMMENT_START_FSMEVENT, off_event = WEB_COMMENT_END_FSMEVENT; text_stream *mnemonic = I"web-comment"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, INDEX_LSCONVENTION); on_event = INDEX_START_FSMEVENT; off_event = INDEX_END_FSMEVENT; mnemonic = I"index-entry"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, IMPORTANT_INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, IMPORTANT_INDEX_LSCONVENTION); on_event = IMPORTANT_INDEX_START_FSMEVENT; off_event = IMPORTANT_INDEX_END_FSMEVENT; mnemonic = I"important-index-entry"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, TT_INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, TT_INDEX_LSCONVENTION); on_event = TT_INDEX_START_FSMEVENT; off_event = TT_INDEX_END_FSMEVENT; mnemonic = I"tt-index-entry"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, IMPORTANT_TT_INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, IMPORTANT_TT_INDEX_LSCONVENTION); on_event = IMPORTANT_INDEX_START_FSMEVENT; off_event = IMPORTANT_TT_INDEX_END_FSMEVENT; mnemonic = I"important-tt-index-entry"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, NS_INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, NS_INDEX_LSCONVENTION); on_event = NS_INDEX_START_FSMEVENT; off_event = NS_INDEX_END_FSMEVENT; mnemonic = I"ns-index-entry"; Add indexing transition pair to fsm1.1; on = Conventions::get_textual_from(conventions, IMPORTANT_NS_INDEX_LSCONVENTION); off = Conventions::get_textual2_from(conventions, IMPORTANT_NS_INDEX_LSCONVENTION); on_event = IMPORTANT_INDEX_START_FSMEVENT; off_event = IMPORTANT_NS_INDEX_END_FSMEVENT; mnemonic = I"important-ns-index-entry"; Add indexing transition pair to fsm1.1; return machine; }
enumerate WEB_COMMENT_START_FSMEVENT
enumerate WEB_COMMENT_END_FSMEVENT
enumerate INDEX_START_FSMEVENT
enumerate IMPORTANT_INDEX_START_FSMEVENT
enumerate TT_INDEX_START_FSMEVENT
enumerate IMPORTANT_TT_INDEX_START_FSMEVENT
enumerate NS_INDEX_START_FSMEVENT
enumerate IMPORTANT_NS_INDEX_START_FSMEVENT
enumerate INDEX_END_FSMEVENT
enumerate IMPORTANT_INDEX_END_FSMEVENT
enumerate TT_INDEX_END_FSMEVENT
enumerate IMPORTANT_TT_INDEX_END_FSMEVENT
enumerate NS_INDEX_END_FSMEVENT
enumerate IMPORTANT_NS_INDEX_END_FSMEVENT
Add indexing transition pair to fsm1.1 =
if ((Str::len(on) > 0) && (Str::len(off) > 0)) { fsm_state *mid_state = FSM::new_state(mnemonic); FSM::add_transition_spelling_out_with_events(base_state, on, mid_state, NO_FSMEVENT, on_event); FSM::add_transition_spelling_out_with_events(mid_state, off, base_state, NO_FSMEVENT, off_event); }
- This code is used in §1 (7 times).
typedef struct ls_index_mark { struct text_stream *text; int style; int important; struct ls_paragraph *at; CLASS_DEFINITION } ls_index_mark; ls_index_mark *WebIndexing::new_mark(text_stream *text, int style, int important) { ls_index_mark *ie = CREATE(ls_index_mark); ie->text = Str::duplicate(text); ie->style = style; ie->important = important; ie->at = NULL; return ie; } linked_list *WebIndexing::index_from_line(OUTPUT_STREAM, text_stream *line, ls_notation *ntn, text_stream **error) { linked_list *L = NULL; TEMPORARY_TEXT(control_text) finite_state_machine *machine = ntn->indexing_machine; if (machine) { FSM::reset_machine(machine); text_stream *to = OUT; for (int i=0; i<Str::len(line); i++) { inchar32_t c = Str::get_at(line, i); PUT_TO(to, c); Run indexing machine2.1; } Check final state of indexing machine2.2; } else { Str::copy(OUT, line); } DISCARD_TEXT(control_text) return L; }
- The structure ls_index_mark is accessed in 1/wcp, 2/ls, 2/we, 3/ca, 3/cl, 3/is, 4/tt2, 4/cs, 5/apacs, 5/wt, 5/ptf, 5/tf, 5/hf, 5/df and here.
§2.1. Run indexing machine2.1 =
int len = 0; int event = FSM::cycle_machine(machine, c, &len); switch (event) { case LITERAL_CHARACTER_FSMEVENT: Str::truncate(OUT, Str::len(OUT) - len); WRITE("___inweb_protected___"); break; case WEB_COMMENT_START_FSMEVENT: case INDEX_START_FSMEVENT: case IMPORTANT_INDEX_START_FSMEVENT: case TT_INDEX_START_FSMEVENT: case IMPORTANT_TT_INDEX_START_FSMEVENT: case NS_INDEX_START_FSMEVENT: case IMPORTANT_NS_INDEX_START_FSMEVENT: Str::clear(control_text); Str::truncate(OUT, Str::len(OUT) - len); to = control_text; break; case INDEX_END_FSMEVENT: case IMPORTANT_INDEX_END_FSMEVENT: case TT_INDEX_END_FSMEVENT: case IMPORTANT_TT_INDEX_END_FSMEVENT: case NS_INDEX_END_FSMEVENT: case IMPORTANT_NS_INDEX_END_FSMEVENT: Str::truncate(control_text, Str::len(control_text) - len); ls_index_mark *ie = NULL; switch (event) { case INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 1, FALSE); break; case IMPORTANT_INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 1, TRUE); break; case TT_INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 2, FALSE); break; case IMPORTANT_TT_INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 2, TRUE); break; case NS_INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 3, FALSE); break; case IMPORTANT_NS_INDEX_END_FSMEVENT: ie = WebIndexing::new_mark(control_text, 3, TRUE); break; default: internal_error("unknown index event"); } if (L == NULL) L = NEW_LINKED_LIST(ls_index_mark); ADD_TO_LINKED_LIST(ie, ls_index_mark, L); to = OUT; break; case WEB_COMMENT_END_FSMEVENT: to = OUT; break; }
- This code is used in §2.
§2.2. Check final state of indexing machine2.2 =
fsm_state *final = FSM::last_nonintermediate_state(machine); if (Str::ne(final->mnemonic, I"unindexed")) *error = I"line contains incomplete index entry";
- This code is used in §2.
typedef struct ls_index { struct linked_list *all_marks; /* of |ls_index_mark| */ struct dictionary *lemmas; int no_lemmas_sorted; struct ls_index_lemma **lemmas_sorted; CLASS_DEFINITION } ls_index; typedef struct ls_index_lemma { struct text_stream *sort_key; struct text_stream *text; int style; struct linked_list *marks; /* of |ls_index_mark| */ struct ls_index_lemma *parent; CLASS_DEFINITION } ls_index_lemma; ls_index *WebIndexing::new_index(void) { ls_index *index = CREATE(ls_index); index->all_marks = NEW_LINKED_LIST(ls_index_mark); index->lemmas = NULL; index->no_lemmas_sorted = 0; index->lemmas_sorted = NULL; return index; } void WebIndexing::index_at(ls_index_mark *ie, ls_paragraph *at) { ls_web *W = ((at)?(at->owning_unit):NULL)?(at->owning_unit->context):NULL; if (W) { ie->at = at; ADD_TO_LINKED_LIST(ie, ls_index_mark, W->index->all_marks); } } void WebIndexing::index_function_at(text_stream *fn, ls_paragraph *at) { TEMPORARY_TEXT(lemma) WRITE_TO(lemma, "functions > %S", fn); ls_web *W = ((at)?(at->owning_unit):NULL)?(at->owning_unit->context):NULL; if (W) { ls_index_mark *ie = WebIndexing::new_mark(lemma, 2, TRUE); ie->at = at; ADD_TO_LINKED_LIST(ie, ls_index_mark, W->index->all_marks); } DISCARD_TEXT(lemma) } void WebIndexing::index_structure_at(text_stream *str, ls_paragraph *at) { TEMPORARY_TEXT(lemma) WRITE_TO(lemma, "structures > %S", str); ls_web *W = ((at)?(at->owning_unit):NULL)?(at->owning_unit->context):NULL; if (W) { ls_index_mark *ie = WebIndexing::new_mark(lemma, 2, TRUE); ie->at = at; ADD_TO_LINKED_LIST(ie, ls_index_mark, W->index->all_marks); } DISCARD_TEXT(lemma) } void WebIndexing::sort(ls_index *index, text_stream *range) { index->lemmas = Dictionaries::new(1024, FALSE); linked_list *lemmas = NEW_LINKED_LIST(ls_index_lemma); ls_index_mark *mark; LOOP_OVER_LINKED_LIST(mark, ls_index_mark, index->all_marks) { ls_section *S = LiterateSource::section_of_par(mark->at); if ((S) && (WebRanges::is_within(WebRanges::of(S), range))) { ls_index_lemma *lemma = WebIndexing::obtain(index, lemmas, mark->style, mark->text); ls_index_mark *seen; int found = FALSE; LOOP_OVER_LINKED_LIST(seen, ls_index_mark, lemma->marks) if (seen->at == mark->at) { found = TRUE; if (mark->important) seen->important = TRUE; break; } if (found == FALSE) ADD_TO_LINKED_LIST(mark, ls_index_mark, lemma->marks); } } int N = LinkedLists::len(lemmas); if (N == 0) { index->lemmas_sorted = NULL; return; } index->lemmas_sorted = (ls_index_lemma **) (Memory::calloc(N, sizeof(ls_index_lemma *), ARRAY_SORTING_MREASON)); int i=0; ls_index_lemma *lemma; LOOP_OVER_LINKED_LIST(lemma, ls_index_lemma, lemmas) index->lemmas_sorted[i++] = lemma; index->no_lemmas_sorted = N; qsort(index->lemmas_sorted, (size_t) N, sizeof(ls_index_lemma *), WebIndexing::compare_lemmas); } ls_index_lemma *WebIndexing::obtain(ls_index *index, linked_list *lemmas, int style, text_stream *text) { ls_index_lemma *parent_lemma = NULL; int from = 0; for (int i=0; i<Str::len(text); i++) if (Str::includes_at(text, i, I" > ")) from = i+3; if (from > 0) { TEMPORARY_TEXT(prefix) for (int j=0; j<from-3; j++) PUT_TO(prefix, Str::get_at(text, j)); parent_lemma = WebIndexing::obtain(index, lemmas, 1, prefix); DISCARD_TEXT(prefix) } ls_index_lemma *lemma = NULL; TEMPORARY_TEXT(key) WRITE_TO(key, "%S %d", text, style); if (Dictionaries::find(index->lemmas, key)) { lemma = Dictionaries::read_value(index->lemmas, key); } else { lemma = CREATE(ls_index_lemma); lemma->sort_key = Str::duplicate(key); lemma->text = Str::new(); for (int j=from; j<Str::len(text); j++) PUT_TO(lemma->text, Str::get_at(text, j)); lemma->style = style; lemma->marks = NEW_LINKED_LIST(ls_index_mark); lemma->parent = parent_lemma; Dictionaries::create(index->lemmas, key); Dictionaries::write_value(index->lemmas, key, lemma); ADD_TO_LINKED_LIST(lemma, ls_index_lemma, lemmas); } DISCARD_TEXT(key) return lemma; } int WebIndexing::compare_lemmas(const void *ent1, const void *ent2) { text_stream *tx1 = (*((const ls_index_lemma **) ent1))->sort_key; text_stream *tx2 = (*((const ls_index_lemma **) ent2))->sort_key; return Str::cmp_insensitive(tx1, tx2); } void WebIndexing::inspect_index(OUTPUT_STREAM, ls_web *W, text_stream *range) { ls_index *index = W->index; if (index->lemmas_sorted == NULL) WebIndexing::sort(index, range); if (index->lemmas_sorted) for (int i=0; i<(int) (index->no_lemmas_sorted); i++) { ls_index_lemma *lemma = index->lemmas_sorted[i]; for (ls_index_lemma *l2 = lemma->parent; l2; l2 = l2->parent) WRITE(" "); if (lemma->style == 2) WRITE("`"); if (lemma->style == 3) WRITE("/"); WRITE("%S", lemma->text); if (lemma->style == 2) WRITE("`"); if (lemma->style == 3) WRITE("/"); ls_index_mark *mark; int c = 0; LOOP_OVER_LINKED_LIST(mark, ls_index_mark, lemma->marks) { if (c++ > 0) WRITE(", "); else WRITE(" "); if (mark->important) WRITE("_"); WRITE("%S", mark->at->paragraph_number); if (mark->important) WRITE("_"); } WRITE("\n"); } }
- The structure ls_index is accessed in 5/hf and here.
- The structure ls_index_lemma is accessed in 2/ls, 2/we, 3/pl, 3/ca, 3/cl, 3/is, 4/tt2, 4/cs, 5/wt, 5/ptf, 5/tf, 5/hf, 5/df and here.