To provide for wrapping up sets of HTML files into ePub ebooks.

§1. Ebooks. Constructing an ePub file (essentially a zipped folder of HTML with some metadata attached) is simple enough, but the details are finicky. The HTML pages need to be fully XHTML compliant, which is quite a strict requirement, and we can't help with that here. But we can at least sort out the directory structure and the rather complicated indexing and contents files also required.

See Liza Daly's invaluable tutorial "Build a digital book with EPUB" to explicate all of this.

While under construction, any single ebook is represented by an instance of the following structure. Conceptually, we will organise it as a series of "volumes" (possibly only one), each of which is a series of "chapters" (possibly only one). The actual content is a series of "pages", which are essentially individual HTML files, plus some images.

typedef struct ebook {
    struct linked_list *metadata_list;  of ebook_datum: DCMI-standard bibliographic data
    char *prefix;  to apply to the page leafnames
    struct filename *CSS_file_throughout;  where to find a CSS file to be used for all volumes

    struct filename *eventual_epub;  filename of the final *.epub to be made
    struct pathname *holder;  directory to put the ingredients into
    struct pathname *OEBPS_path;  subdirectory which mysteriously has to be called OEBPS

    struct linked_list *ebook_volume_list;  of ebook_volume
    struct ebook_volume *current_volume;  the one to which chapters are now being added

    struct linked_list *ebook_chapter_list;  of ebook_chapter
    struct ebook_chapter *current_chapter;  the one to which pages are now being added

    struct linked_list *ebook_page_list;  of book_page
    struct linked_list *ebook_image_list;  of ebook_image
} ebook;

§2. DCMI, or "Dublin Core", metadata is a standard set of key-value pairs used to identify ebooks; we need to maintain a small dictionary, and so small that a list is entirely sufficient.

typedef struct ebook_datum {
    struct text_stream *key;
    struct text_stream *value;
} ebook_datum;

§3. As noted above, we use the following to stratify the book:

typedef struct ebook_volume {
    struct text_stream *volume_title;
    struct ebook_page *volume_starts;  on which page the volume starts
    struct filename *CSS_file;  where to find the CSS file to be included
} ebook_volume;

typedef struct ebook_chapter {
    struct text_stream *chapter_title;
    struct ebook_volume *in_volume;  to which volume this chapter belongs
    struct ebook_page *chapter_starts;  on which page the chapter starts
    struct linked_list *ebook_mark_list;  of ebook_mark: for when multiple navigable points exist within this
    struct text_stream *start_URL;
} ebook_chapter;

§4. Now for the actual resources which will end up in the EPUB. Here are the pages:

typedef struct ebook_page {
    struct text_stream *page_title;
    struct text_stream *page_type;
    struct text_stream *page_ID;

    struct filename *relative_URL;  eventual URL of this page within the ebook

    struct ebook_volume *in_volume;  to which volume this page belongs
    struct ebook_chapter *in_chapter;  to which chapter this page belongs

    int nav_entry_written;  keep track of what we've written to the navigation tree
} ebook_page;

typedef struct ebook_mark {
    struct text_stream *mark_text;
    struct text_stream *mark_URL;
} ebook_mark;

typedef struct ebook_image {
    struct text_stream *image_ID;
    struct filename *relative_URL;  eventual URL of this image within the ebook
} ebook_image;

§5. Creation.

ebook *Epub::new(text_stream *title, char *prefix) {
    ebook *B = CREATE(ebook);
    B->metadata_list = NEW_LINKED_LIST(ebook_datum);
    B->OEBPS_path = NULL;
    B->ebook_page_list = NEW_LINKED_LIST(ebook_page);
    B->ebook_image_list = NEW_LINKED_LIST(ebook_image);
    B->ebook_volume_list = NEW_LINKED_LIST(ebook_volume);
    B->current_volume = NULL;
    B->ebook_chapter_list = NEW_LINKED_LIST(ebook_chapter);
    B->current_chapter = NULL;
    B->eventual_epub = NULL;
    B->prefix = prefix;
    Epub::attach_metadata(B, U"title", title);
    return B;

void Epub::use_CSS_throughout(ebook *B, filename *F) {
    B->CSS_file_throughout = F;

void Epub::use_CSS(ebook_volume *V, filename *F) {
    V->CSS_file = F;

text_stream *Epub::attach_metadata(ebook *B, inchar32_t *K, text_stream *V) {
    ebook_datum *D = NULL;
    LOOP_OVER_LINKED_LIST(D, ebook_datum, B->metadata_list)
        if (Str::eq_wide_string(D->key, K)) {
            Str::copy(D->value, V);
            return D->value;
    D = CREATE(ebook_datum);
    D->key = Str::new_from_wide_string(K);
    D->value = Str::duplicate(V);
    ADD_TO_LINKED_LIST(D, ebook_datum, B->metadata_list);
    return D->value;

text_stream *Epub::get_metadata(ebook *B, inchar32_t *K) {
    ebook_datum *D = NULL;
    LOOP_OVER_LINKED_LIST(D, ebook_datum, B->metadata_list)
        if (Str::eq_wide_string(D->key, K))
            return D->value;
    return NULL;

text_stream *Epub::ensure_metadata(ebook *B, inchar32_t *K) {
    text_stream *S = Epub::get_metadata(B, K);
    if (S == NULL) S = Epub::attach_metadata(B, K, NULL);
    return S;

ebook_page *Epub::note_page(ebook *B, filename *F, text_stream *title, text_stream *type) {
    ebook_page *P = CREATE(ebook_page);
    P->relative_URL = F;
    P->nav_entry_written = FALSE;
    P->in_volume = B->current_volume;
    P->in_chapter = B->current_chapter;
    P->page_title = Str::duplicate(title);
    P->page_type = Str::duplicate(type);

    P->page_ID = Str::new();
    WRITE_TO(P->page_ID, B->prefix);
    Filenames::write_unextended_leafname(P->page_ID, F);
    LOOP_THROUGH_TEXT(pos, P->page_ID) {
        inchar32_t c = Str::get(pos);
        if ((c == '-') || (c == ' ')) Str::put(pos, '_');
    ADD_TO_LINKED_LIST(P, ebook_page, B->ebook_page_list);
    return P;

void Epub::note_image(ebook *B, filename *F) {
    ebook_image *I = CREATE(ebook_image);
    I->relative_URL = F;
    I->image_ID = Str::new();
    Filenames::write_unextended_leafname(I->image_ID, F);
    ADD_TO_LINKED_LIST(I, ebook_image, B->ebook_image_list);

ebook_volume *Epub::starts_volume(ebook *B, ebook_page *P, text_stream *title) {
    ebook_volume *V = CREATE(ebook_volume);
    V->volume_starts = P;
    P->in_volume = V;
    V->volume_title = Str::duplicate(title);
    B->current_volume = V;
    V->CSS_file = NULL;
    ADD_TO_LINKED_LIST(V, ebook_volume, B->ebook_volume_list);
    return V;

ebook_chapter *Epub::starts_chapter(ebook *B, ebook_page *P, text_stream *title, text_stream *URL) {
    ebook_chapter *C = CREATE(ebook_chapter);
    C->chapter_starts = P;
    C->in_volume = B->current_volume;
    C->chapter_title = Str::duplicate(title);
    C->start_URL = Str::duplicate(URL);
    C->ebook_mark_list = NEW_LINKED_LIST(ebook_mark);
    ADD_TO_LINKED_LIST(C, ebook_chapter, B->ebook_chapter_list);
    B->current_chapter = C;
    P->in_chapter = C;
    return C;

void Epub::set_mark_in_chapter(ebook_chapter *C, text_stream *text, text_stream *URL) {
    ebook_mark *M = CREATE(ebook_mark);
    M->mark_text = Str::duplicate(text);
    M->mark_URL = Str::duplicate(URL);
    ADD_TO_LINKED_LIST(M, ebook_mark, C->ebook_mark_list);

§6. Construction. Note that if the client wants to use a cover image, it must also "note" this image separately. (This is a little inconvenient, but indoc wants to do it that way.)

pathname *Epub::begin_construction(ebook *B, pathname *P, filename *cover_image) {
    if (Pathnames::create_in_file_system(P) == FALSE) return NULL;

    WRITE_TO(TEMP, "%S.epub", Epub::get_metadata(B, U"title"));
    B->eventual_epub = Filenames::in(P, TEMP);

    pathname *Holder = Pathnames::down(P, I"ePub");
    if (Pathnames::create_in_file_system(Holder) == FALSE) return NULL;
    B->holder = Holder;

    Write the EPUB mimetype file6.1;
    Write the EPUB meta-inf directory6.2;
    pathname *OEBPS = Pathnames::down(Holder, I"OEBPS");
    if (Pathnames::create_in_file_system(OEBPS) == FALSE) return NULL;
    if (cover_image) Make the cover image page6.3;
    B->OEBPS_path = OEBPS;
    return OEBPS;

§6.1. Write the EPUB mimetype file6.1 =

    filename *Mimetype = Filenames::in(Holder, I"mimetype");
    text_stream EM_struct; text_stream *OUT = &EM_struct;
        Errors::fatal_with_file("unable to open mimetype file for output: %f",
    WRITE("application/epub+zip");  EPUB requires there be no newline here

§6.2. Write the EPUB meta-inf directory6.2 =

    pathname *META_INF = Pathnames::down(Holder, I"META-INF");
    if (Pathnames::create_in_file_system(META_INF) == FALSE) return NULL;
    filename *container = Filenames::in(META_INF, I"container.xml");
    text_stream C_struct; text_stream *OUT = &C_struct;
    if (STREAM_OPEN_TO_FILE(OUT, container, ISO_ENC) == FALSE)
        Errors::fatal_with_file("unable to open container file for output: %f",
    WRITE("<?xml version=\"1.0\"?>\n");
    WRITE("<container version=\"1.0\" xmlns=\"urn:oasis:names:tc:opendocument:xmlns:container\">\n");
    WRITE("<rootfile full-path=\"OEBPS/content.opf\" media-type=\"application/oebps-package+xml\" />\n");

§6.3. It's a much-lamented fact that EPUB 2, at any rate, has no standard way to define cover images, and different readers behave slightly differently. But the following seems to work with iTunes 9.1 and later, and therefore on Apple devices. (See Keith Fahlgren's post "Best practices in ePub cover images" at the ThreePress Consulting blog.)

Make the cover image page6.3 =

    filename *cover = Filenames::in(OEBPS, I"cover.html");
    text_stream C_struct; text_stream *OUT = &C_struct;
        Errors::fatal_with_file("unable to open cover file for output: %f",

    Epub::note_page(B, cover, I"Cover", I"cover");

    HTML::declare_as_HTML(OUT, TRUE);
    HTML::begin_head(OUT, NULL);
    HTML_OPEN_WITH("style", "type=\"text/css\"");
    WRITE("img { max-width: 100%%; }\n");
    HTML::begin_body(OUT, NULL);
    HTML_OPEN_WITH("div", "id=\"cover-image\"");
    HTML_TAG_WITH("img", "src=\"%/f\" alt=\"%S\"", cover_image, Epub::get_metadata(B, U"title"));


void Epub::end_construction(ebook *B) {
    Attach default metadata7.1;
    Write the EPUB OPF file7.2;
    Write the EPUB NCX file7.3;
    Zip the EPUB7.4;

§7.1. Attach default metadata7.1 =

    text_stream *datestamp = Epub::ensure_metadata(B, U"date");
    if (Str::len(datestamp) == 0) {
        WRITE_TO(datestamp, "%04d-%02d-%02d", the_present->tm_year + 1900,
            (the_present->tm_mon)+1, the_present->tm_mday);

    WRITE_TO(TEMP, "");
    text_stream *identifier = Epub::ensure_metadata(B, U"identifier");
    if (Str::len(identifier) == 0)
        WRITE_TO(TEMP, "%S", Epub::get_metadata(B, U"title"));
        WRITE_TO(TEMP, "%S", identifier);
    Str::copy(identifier, TEMP);

    text_stream *lang = Epub::ensure_metadata(B, U"language");
    if (Str::len(lang) == 0) WRITE_TO(lang, "en-UK");

§7.2. Write the EPUB OPF file7.2 =

    filename *content = Filenames::in(B->OEBPS_path, I"content.opf");
    text_stream C_struct; text_stream *OUT = &C_struct;
    if (STREAM_OPEN_TO_FILE(OUT, content, UTF8_ENC) == FALSE)
        Errors::fatal_with_file("unable to open content file for output: %f",

    WRITE("<?xml version='1.0' encoding='utf-8'?>\n");
    WRITE("<package xmlns=\"\"\n");
    WRITE("unique-identifier=\"bookid\" version=\"2.0\">\n"); INDENT;
    Write the OPF metadata7.2.1;
    Write the OPF manifest7.2.2;
    Write the OPF spine7.2.3;
    Write the OPF guide7.2.4;
    OUTDENT; WRITE("</package>\n");


§7.2.1. The metadata here conforms to the Dublin Core Metadata Initiative (ebook_datum). (Other default values are set in the configuration file.)

Write the OPF metadata7.2.1 =

    WRITE("<metadata>\n"); INDENT;
    ebook_datum *D = NULL;
    LOOP_OVER_LINKED_LIST(D, ebook_datum, B->metadata_list) {
        WRITE("<dc:%S", D->key);
        if (Str::eq_wide_string(D->key, U"identifier")) WRITE(" id=\"bookid\"");
        WRITE("%S</dc:%S>\n", D->value, D->key);
    WRITE("<meta name=\"cover\" content=\"cover-image\" />\n");
    OUTDENT; WRITE("</metadata>\n");

§7.2.2. Write the OPF manifest7.2.2 =

    WRITE("<manifest>\n"); INDENT;
    WRITE("<item id=\"ncx\" href=\"toc.ncx\" media-type=\"application/x-dtbncx+xml\"/>\n");
    Manifest the CSS files7.2.2.1;
    Manifest the XHTML files7.2.2.2;
    Manifest the images7.2.2.3;
    OUTDENT; WRITE("</manifest>\n");

§ Manifest the CSS files7.2.2.1 =

    int cssc = 1;
    if (B->CSS_file_throughout)
        WRITE("<item id=\"css%d\" href=\"%S\" media-type=\"text/css\"/>\n",
                cssc++, Filenames::get_leafname(B->CSS_file_throughout));
    ebook_volume *V;
    LOOP_OVER_LINKED_LIST(V, ebook_volume, B->ebook_volume_list)
        if (V->CSS_file)
            WRITE("<item id=\"css%d\" href=\"%S\" media-type=\"text/css\"/>\n",
                cssc++, Filenames::get_leafname(V->CSS_file));

§ Manifest the XHTML files7.2.2.2 =

    ebook_page *P;
    LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list)
        WRITE("<item id=\"%S\" href=\"%S\" media-type=\"application/xhtml+xml\"/>\n",
            P->page_ID, Filenames::get_leafname(P->relative_URL));

§ Manifest the images7.2.2.3 =

    ebook_image *I;
    LOOP_OVER_LINKED_LIST(I, ebook_image, B->ebook_image_list) {
        char *image_type = "";
        switch (Filenames::guess_format(I->relative_URL)) {
            case FORMAT_PERHAPS_PNG: image_type = "png"; break;
            case FORMAT_PERHAPS_JPEG: image_type = "jpeg"; break;
            case FORMAT_PERHAPS_SVG: image_type = "svg"; break;
            case FORMAT_PERHAPS_GIF: image_type = "gif"; break;
            default: Errors::nowhere("image not .gif, .png, .jpg or .svg"); break;
        WRITE("<item id=\"%S\" href=\"%/f\" media-type=\"image/%s\"/>\n",
            I->image_ID, I->relative_URL, image_type);

§7.2.3. Write the OPF spine7.2.3 =

    WRITE("<spine toc=\"ncx\">\n"); INDENT;
    ebook_page *P;
    LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list) {
        WRITE("<itemref idref=\"%S\"", P->page_ID);
        if (Str::len(P->page_type) > 0) WRITE(" linear=\"no\"");
    OUTDENT; WRITE("</spine>\n");

§7.2.4. Write the OPF guide7.2.4 =

    WRITE("<guide>\n"); INDENT;
    ebook_page *P;
    LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list) {
        if (Str::len(P->page_type) > 0) {
            WRITE("<reference href=\"%S\" type=\"%S\" title=\"%S\"/>\n",
                Filenames::get_leafname(P->relative_URL), P->page_type, P->page_title);
    OUTDENT; WRITE("</guide>\n");

§7.3. The NCX duplicates some of what's in the OPF file, for historical reasons; it's left over from an earlier standard used by book-readers for visually impaired people.

Write the EPUB NCX file7.3 =

    filename *toc = Filenames::in(B->OEBPS_path, I"toc.ncx");
    text_stream C_struct; text_stream *OUT = &C_struct;
        Errors::fatal_with_file("unable to open ncx file for output: %f",

    WRITE("<?xml version='1.0' encoding='utf-8'?>\n");
    WRITE("<!DOCTYPE ncx PUBLIC \"-NISO//DTD ncx 2005-1//EN\"\n");
    WRITE(" \"\">\n");
    WRITE("<ncx xmlns=\"\" version=\"2005-1\">\n");

    int depth = 1;  there are surely at least sections
    if (LinkedLists::len(B->ebook_chapter_list) > 0) depth = 2;
    if (LinkedLists::len(B->ebook_volume_list) > 0) depth = 3;

    Write the NCX metadata7.3.1;
    Write the NCX navigation map7.3.2;


§7.3.1. Write the NCX metadata7.3.1 =

    WRITE("<head>\n"); INDENT;
    WRITE("<meta name=\"dtb:uid\" content=\"%S\"/>\n", Epub::get_metadata(B, U"identifier"));
    WRITE("<meta name=\"dtb:depth\" content=\"%d\"/>\n", depth);
    WRITE("<meta name=\"dtb:totalPageCount\" content=\"0\"/>\n");
    WRITE("<meta name=\"dtb:maxPageNumber\" content=\"0\"/>\n");
    OUTDENT; WRITE("</head>\n");
    WRITE("<docTitle>\n"); INDENT;
    WRITE("<text>%S</text>\n", Epub::get_metadata(B, U"title"));
    OUTDENT; WRITE("</docTitle>\n");

§7.3.2. Write the NCX navigation map7.3.2 =

    WRITE("<navMap>\n"); INDENT;
    int navpoint_count = 1;
    int navmap_depth = 1;
    int phase = 0;
    Include the non-section pages in this phase7.3.2.1;
    ebook_volume *V = NULL;
    LOOP_OVER_LINKED_LIST(V, ebook_volume, B->ebook_volume_list) {
        Begin navPoint7.3.2.3;
        WRITE("<navLabel><text>%S</text></navLabel>", V->volume_title);
        WRITE("<content src=\"%S\"/>\n", Filenames::get_leafname(V->volume_starts->relative_URL));
        Include the chapters and sections in this volume7.3.2.2;
        End navPoint7.3.2.4;
    phase = 1;
    Include the non-section pages in this phase7.3.2.1;
    OUTDENT; WRITE("</navMap>\n");
    if (navmap_depth != 1) internal_error("navMap numbering unbalanced");

§ Include the non-section pages in this phase7.3.2.1 =

    ebook_page *P;
    LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list) {
        int in_phase = 1;
        if ((Str::eq_wide_string(P->page_ID, U"cover")) ||
            (Str::eq_wide_string(P->page_ID, U"index")))
            in_phase = 0;
        if ((in_phase == phase) && (P->nav_entry_written == FALSE)) {
            Begin navPoint7.3.2.3;
            WRITE("<navLabel><text>%S</text></navLabel> <content src=\"%S\"/>\n",
                P->page_title, Filenames::get_leafname(P->relative_URL));
            End navPoint7.3.2.4;

§ Include the chapters and sections in this volume7.3.2.2 =

    ebook_chapter *C = NULL;
    LOOP_OVER_LINKED_LIST(C, ebook_chapter, B->ebook_chapter_list)
        if (C->in_volume == V) {
            Begin navPoint7.3.2.3;
            WRITE("<navLabel><text>%S</text></navLabel>", C->chapter_title);
            WRITE("<content src=\"%S\"/>\n", C->start_URL);
            if (C->ebook_mark_list)
                Include the marks in this chapter7.
                Include the sections in this chapter7.;
            ebook_page *P;
            LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list)
                if (P->in_chapter == C)
                    P->nav_entry_written = TRUE;
            End navPoint7.3.2.4;

§ Include the sections in this chapter7. =

    ebook_page *P;
    LOOP_OVER_LINKED_LIST(P, ebook_page, B->ebook_page_list) {
        if ((P->in_chapter == C) && (P->nav_entry_written == FALSE)) {
            Begin navPoint7.3.2.3;
            WRITE("<navLabel><text>%S</text></navLabel>", P->page_title);
            WRITE("<content src=\"%S\"/>\n", Filenames::get_leafname(P->relative_URL));
            End navPoint7.3.2.4;
            P->nav_entry_written = TRUE;

§ Include the marks in this chapter7. =

    ebook_mark *M;
    LOOP_OVER_LINKED_LIST(M, ebook_mark, C->ebook_mark_list) {
        Begin navPoint7.3.2.3;
        WRITE("<navLabel><text>%S</text></navLabel>", M->mark_text);
        WRITE("<content src=\"%S\"/>\n", M->mark_URL);
        End navPoint7.3.2.4;

§ Begin navPoint7.3.2.3 =

    WRITE("<navPoint id=\"navpoint-%d\" playOrder=\"%d\">\n",
        navpoint_count, navpoint_count);
    navmap_depth++; INDENT;

§ End navPoint7.3.2.4 =

    if (navmap_depth < 1) internal_error("navMap numbering awry");
    OUTDENT; WRITE("</navPoint>\n");

§7.4. Zip the EPUB7.4 =

    pathname *up = Pathnames::from_text(I"..");
    filename *ePub_relative =
        Filenames::in(up, Filenames::get_leafname(B->eventual_epub));
    Issue first zip instruction7.4.1;
    Issue second zip instruction7.4.2;

§7.4.1. Issue first zip instruction7.4.1 =

    Shell::plain(COMMAND, "cd ");
    Shell::quote_path(COMMAND, B->holder);
    Shell::plain(COMMAND, "; zip -0Xq ");
    Shell::quote_file(COMMAND, ePub_relative);
    Shell::plain(COMMAND, " mimetype");

§7.4.2. Issue second zip instruction7.4.2 =

    Shell::plain(COMMAND, "cd ");
    Shell::quote_path(COMMAND, B->holder);
    Shell::plain(COMMAND, "; zip -Xr9Dq ");
    Shell::quote_file(COMMAND, ePub_relative);
    Shell::plain(COMMAND, " *");