-*- mode: org -*- #+TITLE: sisudoc spine (doc_reform) object-centric document abstraction #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+FILETAGS: :spine:abstraction: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2025 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+PROPERTY: header-args :noweb yes #+PROPERTY: header-args+ :exports code #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no #+PROPERTY: header-args+ :mkdirp yes #+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t - [[./doc-reform.org][doc-reform.org]] [[./][org/]] * (Object-Centric) Document Abstraction Process markup document, create document abstraction. ** _module template_ :module:metadoc_from_src: #+HEADER: :tangle "../src/sisudoc/meta/metadoc_from_src.d" #+HEADER: :noweb yes #+BEGIN_SRC d <> // document abstraction: // abstraction of sisu markup for downstream processing // metadoc_from_src.d module sisudoc.meta.metadoc_from_src; @safe: template docAbstraction() { <> @system auto docAbstraction(CMM,Opt,Mf) ( char[][] markup_sourcefile_content, CMM conf_make_meta, Opt opt_action, Mf manifested, bool _new_doc ) { <> } // ← closed: abstract doc source } #+END_SRC ** docInitialize #+NAME: docInitialize #+HEADER: :noweb yes #+BEGIN_SRC d // ↓ abstraction imports import std.algorithm, std.container, std.digest.sha, std.file, std.json, std.path; import sisudoc.meta, sisudoc.meta.defaults, sisudoc.meta.rgx, sisudoc.meta.metadoc_object_setter, sisudoc.meta.rgx; public import sisudoc.meta.metadoc_from_src_functions; mixin docAbstractionFunctions; #+END_SRC ** docAbstraction *** abstraction summary #+NAME: docAbstraction #+HEADER: :noweb yes #+BEGIN_SRC d <> <> <> <> <> #+END_SRC *** abstraction init #+NAME: docAbstractionInit #+HEADER: :noweb yes #+BEGIN_SRC d static auto rgx = RgxI(); // ↓ abstraction init scope(success) { } scope(failure) { } scope(exit) { destroy(the_document_toc_section); destroy(the_document_head_section); destroy(the_document_body_section); destroy(the_document_bibliography_section); destroy(the_document_glossary_section); destroy(the_document_blurb_section); destroy(the_document_xml_dom_tail_section); destroy(an_object); destroy(processing); destroy(biblio_arr_json); previous_length = 0; reset_note_numbers = true; lev_anchor_tag = ""; anchor_tag = ""; } mixin spineNode; auto node_para_int_ = node_metadata_para_int; auto node_para_str_ = node_metadata_para_str; ObjGenericComposite comp_obj_; line_occur = [ "heading" : 0, "para" : 0, "glossary" : 0, "blurb" : 0, ]; uint[string] dochas = [ "inline_links" : 0, "inline_notes" : 0, "inline_notes_star" : 0, "codeblock" : 0, "table" : 0, "block" : 0, "group" : 0, "poem" : 0, "quote" : 0, "images" : 0, ]; uint[string] pith = [ "ocn" : 1, "section" : 0, "txt_is" : 0, "block_is" : 0, "block_state" : 0, "block_delim" : 0, "make_headings" : 0, "dummy_heading_status" : 0, "dummy_heading_multiple_objects" : 0, "no_ocn_multiple_objects" : 0, "verse_new" : 0, ]; string[string] object_number_poem = [ "start" : "", "end" : "" ]; string[] lv_ancestors_txt = [ "", "", "", "", "", "", "", "", ]; int[string] lv = [ "lv" : eN.bi.off, "h0" : eN.bi.off, "h1" : eN.bi.off, "h2" : eN.bi.off, "h3" : eN.bi.off, "h4" : eN.bi.off, "h5" : eN.bi.off, "h6" : eN.bi.off, "h7" : eN.bi.off, "lev_int_collapsed" : 0, ]; int[string] collapsed_lev = [ "h0" : eN.bi.off, "h1" : eN.bi.off, "h2" : eN.bi.off, "h3" : eN.bi.off, "h4" : eN.bi.off, "h5" : eN.bi.off, "h6" : eN.bi.off, "h7" : eN.bi.off ]; string[string] heading_match_str = [ "h_A": "^(none)", "h_B": "^(none)", "h_C": "^(none)", "h_D": "^(none)", "h_1": "^(none)", "h_2": "^(none)", "h_3": "^(none)", "h_4": "^(none)" ]; Regex!char[string] heading_match_rgx = [ "h_A": regex(r"^(none)"), "h_B": regex(r"^(none)"), "h_C": regex(r"^(none)"), "h_D": regex(r"^(none)"), "h_1": regex(r"^(none)"), "h_2": regex(r"^(none)"), "h_3": regex(r"^(none)"), "h_4": regex(r"^(none)") ]; string _anchor_tag; string toc_txt_; an_object["glossary_nugget"] = ""; an_object["blurb_nugget"] = ""; comp_obj_ = set_object_heading("lev4", "frontmatter", "toc", "Table of Contents"); comp_obj_.metainfo.identifier = ""; comp_obj_.metainfo.dummy_heading = false; comp_obj_.metainfo.object_number_off = true; comp_obj_.metainfo.object_number_type = 0; comp_obj_.tags.segment_anchor_tag_epub = "toc"; comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; comp_obj_.tags.in_segment_html = comp_obj_.tags.anchor_tag_html; comp_obj_.ptr.html_segnames = html_segnames_ptr; comp_obj_.tags.anchor_tags = ["toc"]; tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; auto toc_head = comp_obj_; html_segnames_ptr_cntr++; the_document_toc_section = [toc_head]; static auto mkup = InlineMarkup(); static auto munge = ObjInlineMarkupMunge(); auto note_section = NotesSection(); auto bookindex_extract_hash = BookIndexNuggetHash(); string[][string] lev4_subtoc; string[][string] segnames = ["html": ["toc"], "epub": ["toc"]]; int cnt1 = 1; int cnt2 = 1; int cnt3 = 1; // abstraction init ↑ #+END_SRC *** abstraction init substitutions #+NAME: docAbstractionInitSubstitutions #+HEADER: :noweb yes #+BEGIN_SRC d debug (substitutions) { writeln(__LINE__, ":", __FILE__, ": DEBUG substitutions:"); if (!(conf_make_meta.make.headings.empty)) { writeln(conf_make_meta.make.headings); } if (conf_make_meta.make.substitute) { foreach(substitution_pair; conf_make_meta.make.substitute) { writeln("regex to match: ", substitution_pair[Substitute.match]); writeln("substitution to make: ", substitution_pair[Substitute.markup]); } } if (conf_make_meta.make.bold) { writeln("regex to match: ", conf_make_meta.make.bold[Substitute.match]); writeln("substitution to make: ", conf_make_meta.make.bold[Substitute.markup]); } if (conf_make_meta.make.emphasis) { writeln("regex to match: ", conf_make_meta.make.emphasis[Substitute.match]); writeln("substitution to make: ", conf_make_meta.make.emphasis[Substitute.markup]); } if (conf_make_meta.make.italics) { writeln("regex to match: ", conf_make_meta.make.italics[Substitute.match]); writeln("substitution to make: ", conf_make_meta.make.italics[Substitute.markup]); } } #+END_SRC *** loop source by line (large block) #+NAME: docAbstractionMainLoopSrcByLine #+HEADER: :noweb yes #+BEGIN_SRC d auto loopMarkupSrcByLine( char[][] markup_sourcefile_content, string[string] an_object, uint[string] pith, ) { _loopMarkupSrcByLineStruct ret; srcDocLoopLineByLine_: foreach (line; markup_sourcefile_content) { // ↓ markup document/text line by line // "line" variable can be empty but should never be null // scope scope(exit) { } scope(failure) { stderr.writefln( "\n%s\n%s\n\n%s:%s\nFAILED while processing the file: ❮❮ %s ❯❯ on line with text:\n%s\n", __MODULE__, __FUNCTION__, __FILE__, __LINE__, manifested.src.filename, line, ); } debug(source) { writeln(line); } debug(srclines) { if (!line.empty) { writefln("* %s", line); } } if (!line.empty) { pith = line._check_ocn_status_(pith); } if ( pith["block_is"] == eN.blk_is.code && pith["block_state"] == eN.blk_state.on ) { // block object: code { ST_txt_by_line_block_generic _get = line.txt_by_line_block_code(an_object, pith); { an_object = _get.this_object; pith = _get.pith; } } continue; } else if (!matchFirst(line, rgx.skip_from_regular_parse)) { // object other than "code block" object // (includes regular text paragraph, headings & blocks other than code) // heading, glossary, blurb, poem, group, block, quote, table line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic if (line.matchFirst(rgx.heading_biblio) || (pith["section"] == eN.sect.bibliography && ((!(line.matchFirst(rgx.heading_glossary))) && (!(line.matchFirst(rgx.heading_blurb))) && (!(line.matchFirst(rgx.heading))) && (!(line.matchFirst(rgx.comment))))) ) { pith["section"] = eN.sect.bibliography; if (opt_action.backmatter && opt_action.section_biblio) { { ST_txt_by_line_block_biblio _get = line.txt_by_line_block_biblio(pith, bib_entry, biblio_entry_str_json, biblio_arr_json); { pith = _get.pith; bib_entry = _get.bib_entry; biblio_entry_str_json = _get.biblio_entry_str_json; biblio_arr_json = _get.biblio_arr_json; } } debug(bibliobuild) { writeln("- ", biblio_entry_str_json); writeln("-> ", biblio_arr_json.length); } } continue; } else if (line.matchFirst(rgx.heading_glossary) || (pith["section"] == eN.sect.glossary && ((!(line.matchFirst(rgx.heading_biblio))) && (!(line.matchFirst(rgx.heading_blurb))) && (!(line.matchFirst(rgx.heading))) && (!(line.matchFirst(rgx.comment))))) ) { // within section (block object): glossary debug(glossary) { writeln(__LINE__); writeln(line); } pith["section"] = eN.sect.glossary; if (opt_action.backmatter && opt_action.section_glossary) { ST_the_section add_to_glossary_sect = line.build_the_glossary_section(pith, tag_assoc); // double check, should not be necessary to pass pith the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[0]; if (add_to_glossary_sect.comp_section_obj.length > 1) { // heading the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[1]; } pith = add_to_glossary_sect.pith; tag_assoc = add_to_glossary_sect.tag_assoc; } continue; } else if (line.matchFirst(rgx.heading_blurb) || (pith["section"] == eN.sect.blurb && ((!(line.matchFirst(rgx.heading_glossary))) && (!(line.matchFirst(rgx.heading_biblio))) && (!(line.matchFirst(rgx.heading))) && (!(line.matchFirst(rgx.comment))))) ) { pith["section"] = eN.sect.blurb; debug(blurb) { writeln(__LINE__); writeln(line); } if ((opt_action.backmatter && opt_action.section_blurb) && !(line.empty)) { ST_the_section add_to_blurb_sect = line.build_the_blurb_section(pith, tag_assoc, opt_action); // double check, should not be necessary to pass pith the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[0]; if (add_to_blurb_sect.comp_section_obj.length > 1) { // heading the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[1]; } pith = add_to_blurb_sect.pith; tag_assoc = add_to_blurb_sect.tag_assoc; } continue; } else if (pith["block_state"] == eN.blk_state.on) { if (pith["block_is"] == eN.blk_is.quote) { line = line ._doc_header_and_make_substitutions_(conf_make_meta) ._doc_header_and_make_substitutions_fontface_(conf_make_meta); { auto _get = line.txt_by_line_block_quote(an_object, pith); { an_object = _get.this_object; pith = _get.pith; } } continue; } else if (pith["block_is"] == eN.blk_is.group) { line = line ._doc_header_and_make_substitutions_(conf_make_meta) ._doc_header_and_make_substitutions_fontface_(conf_make_meta) .replaceAll(rgx.para_delimiter, mkup.br_line_spaced ~ "$1"); { auto _get = line.txt_by_line_block_group(an_object, pith); { an_object = _get.this_object; pith = _get.pith; } } continue; } else if (pith["block_is"] == eN.blk_is.block) { line = line ._doc_header_and_make_substitutions_(conf_make_meta) ._doc_header_and_make_substitutions_fontface_(conf_make_meta); if (auto m = line.match(rgx.spaces_keep)) { line = line .replaceAll(rgx.spaces_keep, (m.captures[1]).translate([ ' ' : mkup.nbsp ])); } { auto _get = line.txt_by_line_block_block(an_object, pith); { an_object = _get.this_object; pith = _get.pith; } } continue; } else if (pith["block_is"] == eN.blk_is.poem) { { auto _get = line.txt_by_line_block_poem(an_object, pith, cntr, object_number_poem, conf_make_meta, tag_in_seg); { an_object = _get.this_object; pith = _get.pith; cntr = _get.cntr; } } continue; } else if (pith["block_is"] == eN.blk_is.table) { { auto _get = line.txt_by_line_block_table(an_object, pith, conf_make_meta); { an_object = _get.this_object; pith = _get.pith; conf_make_meta = _get.conf_make_meta; } } continue; } } else { // not within a block group assert( (pith["block_state"] == eN.blk_state.off) || (pith["block_state"] == eN.blk_state.closing), "block status: none or closed" ); if (line.matchFirst(rgx.block_open)) { if (line.matchFirst(rgx.block_poem_open)) { // poem to verse exceptions! object_reset(an_object); processing.remove("verse"); object_number_poem["start"] = obj_cite_digits.object_number.to!string; } { auto _get = line.txt_by_line_block_start(pith, dochas, object_number_poem); { pith = _get.pith; dochas = _get.dochas; object_number_poem = _get.object_number_poem; } } continue; } else if (!line.empty) { // line not empty - non blocks (headings, paragraphs) & closed blocks assert(!line.empty, "line tested, line not empty surely:\n \"" ~ line ~ "\""); assert( (pith["block_state"] == eN.blk_state.off) || (pith["block_state"] == eN.blk_state.closing), "code block status: none or closed" ); if (pith["block_state"] == eN.blk_state.closing) { debug(check) { writeln(__LINE__); writeln(line); } assert( line.matchFirst(rgx.book_index_item) || line.matchFirst(rgx.book_index_item_open) || pith["section"] == eN.sect.book_index, "\nblocks closed, unless followed by book index, non-matching line:\n \"" ~ line ~ "\"" ); } if (line.matchFirst(rgx.book_index_item) || line.matchFirst(rgx.book_index_item_open) || pith["section"] == eN.sect.book_index) { { // book_index auto _get = line.flow_book_index_(an_object, book_idx_tmp, pith, opt_action); { an_object = _get.this_object; pith = _get.pith; book_idx_tmp = _get.book_idx_tmp; } } } else { // not book_index an_object_key = "body_nugget"; if (auto m = line.matchFirst(rgx.comment)) { // matched comment debug(comment) { writeln(line); } an_object[an_object_key] ~= line ~= "\n"; comp_obj_comment = comp_obj_comment.init; comp_obj_comment.metainfo.is_of_part = "comment"; // breaks flow comp_obj_comment.metainfo.is_of_section = "comment"; // breaks flow comp_obj_comment.metainfo.is_of_type = "comment"; comp_obj_comment.metainfo.is_a = "comment"; comp_obj_comment.text = an_object[an_object_key].strip; the_document_body_section ~= comp_obj_comment; { auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); { line_occur = _get.line_occur; an_object = _get.this_object; pith = _get.pith; } } processing.remove("verse"); ++cntr; } else if ((line_occur["para"] == eN.bi.off && line_occur["heading"] == eN.bi.off) && pith["txt_is"] == eN.txt_is.off ) { // heading or para but neither flag nor line exists if ((conf_make_meta.make.headings.length > 2) && (pith["make_headings"] == eN.bi.off)) { // heading found { auto _get = line.flow_heading_found_(heading_match_str, conf_make_meta.make.headings, heading_match_rgx, pith); { heading_match_str = _get.heading_match_str; heading_match_rgx = _get.heading_match_rgx; pith = _get.pith; } } } if (pith["make_headings"] == eN.bi.on && (line_occur["para"] == eN.bi.off && line_occur["heading"] == eN.bi.off) && pith["txt_is"] == eN.txt_is.off ) { // heading make set { auto _get = line.flow_heading_make_set_(line_occur, heading_match_rgx, pith); { line = _get.line; an_object = _get.this_object; pith = _get.pith; } } } // TODO node info: all headings identified at this point, // - extract node info here?? // - how long can it wait? // - should be incorporated in composite objects // - should happen before endnote links set (they need to be moved down?) if (line.matchFirst(rgx.headings)) { // heading match line = line._doc_header_and_make_substitutions_(conf_make_meta); { auto _get = line.flow_heading_matched_( an_object, line_occur, an_object_key, lv, collapsed_lev, pith, conf_make_meta, ); { an_object = _get.this_object; pith = _get.pith; } } } else if (line_occur["para"] == eN.bi.off) { // para match an_object_key = "body_nugget"; line = line ._doc_header_and_make_substitutions_(conf_make_meta) ._doc_header_and_make_substitutions_fontface_(conf_make_meta); { auto _get = line.flow_para_match_(an_object, an_object_key, indent, bullet, pith, line_occur); { an_object = _get.this_object; an_object_key = _get.this_object_key; pith = _get.pith; indent = _get.indent; bullet = _get.bullet; line_occur = _get.line_occur; } } } } else if (line_occur["heading"] > eN.bi.off) { // heading debug(heading) { writeln(line); } an_object[an_object_key] ~= line ~= "\n"; ++line_occur["heading"]; } else if (line_occur["para"] > eN.bi.off) { // paragraph debug(para) { writeln(an_object_key, "-> ", line); } line = line ._doc_header_and_make_substitutions_(conf_make_meta) ._doc_header_and_make_substitutions_fontface_(conf_make_meta); an_object[an_object_key] ~= " " ~ line; ++line_occur["para"]; } } } else if (pith["block_state"] == eN.blk_state.closing) { // line empty, with blocks flag { auto _get = line.flow_block_flag_line_empty_( an_object, bookindex_extract_hash, the_document_body_section, bookindex_unordered_hashes, obj_cite_digits, comp_obj_, cntr, pith, object_number_poem, conf_make_meta, tag_in_seg, ); { an_object = _get.this_object; the_document_body_section = _get.the_document_body_section; bookindex_unordered_hashes = _get.bookindex_unordered_hashes; obj_cite_digits = _get.obj_cite_digits; comp_obj_ = _get.comp_obj_; cntr = _get.cntr; pith = _get.pith; } } } else { // line.empty, post contents, empty variables: assert( line.empty, "\nline should be empty:\n \"" ~ line ~ "\"" ); assert( (pith["block_state"] == eN.blk_state.off), "code block status: none" ); if (_new_doc) { tag_assoc = tag_assoc.init; lv0to3_tags = lv0to3_tags.init; tag_in_seg = tag_in_seg.init; } if (pith["txt_is"] == eN.txt_is.heading && line_occur["heading"] > eN.bi.off ) { // heading object (current line empty) obj_cite_digits = (an_object["lev_markup_number"].to!int == 0) ? ocn_emit(eN.ocn.reset) : ocn_emit(pith["ocn"]); an_object["is"] = "heading"; an_object_key = "body_nugget"; ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_object_and_anchor_tags_struct = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, ((_new_doc) ? Yes._new_doc : No._new_doc)); an_object["substantive"] = substantive_object_and_anchor_tags_struct.obj_txt; anchor_tag = substantive_object_and_anchor_tags_struct.anchor_tag; if (_new_doc) { cnt1 = 1; cnt2 = 1; cnt3 = 1; _new_doc = false; } if ( an_object["lev_markup_number"].to!int == 4 && (!(anchor_tag.empty) || (lv0to3_tags.length > 0)) ) { tag_in_seg["seg_lv4"] = anchor_tag; tag_in_seg["seg_lv1to4"] = anchor_tag; lev_anchor_tag = anchor_tag; tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; if (lv0to3_tags.length > 0) { // names used for html markup segments 1 to 4 (rather than epub which has separate segments for A to D) foreach (lv0_to_lv3_html_tag; lv0to3_tags) { tag_assoc[lv0_to_lv3_html_tag]["seg_lv4"] = anchor_tag; } } anchor_tag_ = anchor_tag; lv0to3_tags = lv0to3_tags.init; } else if (an_object["lev_markup_number"].to!int > 4) { tag_in_seg["seg_lv4"] = anchor_tag_; tag_in_seg["seg_lv1to4"] = anchor_tag_; lev_anchor_tag = anchor_tag; tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; } else if (an_object["lev_markup_number"].to!int < 4) { string segn; switch (an_object["lev_markup_number"].to!int) { // names used for epub markup segments A to D case 0: segn = "_the_title"; goto default; case 1: segn = "_part_" ~ cnt1.to!string; ++cnt1; goto default; case 2: segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string; ++cnt2; goto default; case 3: segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string ~ "_" ~ cnt3.to!string; ++cnt3; goto default; default: lv0to3_tags ~= obj_cite_digits.object_number.to!string; lv0to3_tags ~= segn; tag_in_seg["seg_lv4"] = segn; // for html segname need following lv4 not yet known tag_in_seg["seg_lv1to4"] = segn; break; } } an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); _anchor_tag = obj_cite_digits.identifier; // (incrementally build toc) table of contents here! { auto _get = obj_im.flow_table_of_contents_gather_headings( an_object, conf_make_meta, tag_in_seg, _anchor_tag, lev4_subtoc, the_document_toc_section, ); { the_document_toc_section = _get.the_document_toc_section; lev4_subtoc = _get.lev4_subtoc; } } if (an_object["lev_markup_number"] == "4") { segnames["html"] ~= tag_in_seg["seg_lv4"]; html_segnames_ptr = html_segnames_ptr_cntr; html_segnames_ptr_cntr++; } if (an_object["lev_markup_number"].to!int <= 4) { segnames["epub"] ~= tag_in_seg["seg_lv1to4"]; } auto comp_obj_ = node_construct.node_emitter_heading( an_object, tag_in_seg, lev_anchor_tag, tag_assoc, obj_cite_digits, // OCNset cntr, // int heading_ptr, // int lv_ancestors_txt, // string[] html_segnames_ptr, // int substantive_object_and_anchor_tags_struct, ); ++heading_ptr; debug(segments) { writeln(an_object["lev_markup_number"]); writeln(tag_in_seg["seg_lv4"]); writeln(tag_in_seg["seg_lv1to4"]); } the_document_body_section ~= comp_obj_; debug(objectrelated1) { writeln(line); } // check { auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); { line_occur = _get.line_occur; an_object = _get.this_object; pith = _get.pith; } } an_object.remove("lev"); an_object.remove("lev_markup_number"); processing.remove("verse"); ++cntr; } else if (pith["txt_is"] == eN.txt_is.para && line_occur["para"] > eN.bi.off ) { // paragraph object (current line empty) - repeated character paragraph separator if ((an_object[an_object_key].to!string).matchFirst(rgx.repeated_character_line_separator)) { pith["ocn"] = eN.ocn.off; } obj_cite_digits = ocn_emit(pith["ocn"]); an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); an_object["is"] = "para"; auto comp_obj_ = node_construct.node_location_emitter( content_non_header, tag_in_seg, lev_anchor_tag, tag_assoc, obj_cite_digits, cntr, heading_ptr-1, an_object["is"], ); ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_obj_misc_struct = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, No._new_doc); an_object["substantive"] = substantive_obj_misc_struct.obj_txt; anchor_tag = substantive_obj_misc_struct.anchor_tag; comp_obj_ = set_object_generic("body", "body", "para", "para", an_object["substantive"].to!string.strip, obj_cite_digits.object_number); comp_obj_.tags.html_segment_anchor_tag_is = tag_in_seg["seg_lv4"]; comp_obj_.tags.epub_segment_anchor_tag_is = tag_in_seg["seg_lv1to4"]; comp_obj_.metainfo.identifier = obj_cite_digits.identifier; comp_obj_.metainfo.object_number_off = (obj_cite_digits.off == 0) ? true : false; // TODO comp_obj_.metainfo.o_n_book_index = obj_cite_digits.bkidx; comp_obj_.metainfo.object_number_type = obj_cite_digits.type; comp_obj_.attrib.indent_hang = indent["hang_position"]; comp_obj_.attrib.indent_base = indent["base_position"]; comp_obj_.attrib.bullet = bullet; comp_obj_.tags.anchor_tags = [anchor_tag]; anchor_tag=""; comp_obj_.has.inline_notes_reg = substantive_obj_misc_struct.has_notes_reg; comp_obj_.has.inline_notes_star = substantive_obj_misc_struct.has_notes_star; comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links; comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions; the_document_body_section ~= comp_obj_; tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc); { auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); { line_occur = _get.line_occur; an_object = _get.this_object; pith = _get.pith; } } indent = [ "hang_position" : 0, "base_position" : 0, ]; bullet = false; processing.remove("verse"); ++cntr; // } else { // could be useful to test line variable should be empty and never null } } // close else for line empty } // close else for not the above } // close after non code, other blocks or regular text // unless (the_document_body_section.length == 0) ? if (the_document_body_section.length > 0) { if (((the_document_body_section[$-1].metainfo.is_a == "para") || (the_document_body_section[$-1].metainfo.is_a == "heading") || (the_document_body_section[$-1].metainfo.is_a == "quote") || (the_document_body_section[$-1].metainfo.is_a == "group") || (the_document_body_section[$-1].metainfo.is_a == "block") || (the_document_body_section[$-1].metainfo.is_a == "verse")) && (the_document_body_section.length > previous_length)) { if ((the_document_body_section[$-1].metainfo.is_a == "heading") && (the_document_body_section[$-1].metainfo.heading_lev_markup < 5)) { pith["section"] = eN.sect.unset; } if (the_document_body_section[$-1].metainfo.is_a == "verse") { // scan for endnotes for whole poem (each verse in poem) foreach (i; previous_length .. the_document_body_section.length) { if (the_document_body_section[i].metainfo.is_a == "verse") { if ((the_document_body_section[i].text).match( rgx.inline_notes_al_all_note )) { object_notes = note_section.gather_notes_for_endnote_section( the_document_body_section, tag_in_seg, (i).to!int, ); } } } } else { // scan object for endnotes previous_length = the_document_body_section.length.to!int; if ((the_document_body_section[$-1].text).match( rgx.inline_notes_al_all_note )) { previous_count = (the_document_body_section.length -1).to!int; object_notes = note_section.gather_notes_for_endnote_section( the_document_body_section, tag_in_seg, (the_document_body_section.length-1).to!int, ); } } previous_length = the_document_body_section.length.to!int; } } } ret.toc = the_document_toc_section; ret.body = the_document_body_section; ret.glossary = the_document_glossary_section; ret.blurb = the_document_blurb_section; ret.object_notes = object_notes; ret.segnames = segnames; return ret; } { // loopMarkupSrcByLine auto _doc_by_line = loopMarkupSrcByLine(markup_sourcefile_content, an_object, pith); the_document_toc_section = _doc_by_line.toc; the_document_body_section = _doc_by_line.body; the_document_glossary_section = _doc_by_line.glossary; the_document_blurb_section = _doc_by_line.blurb; segnames = _doc_by_line.segnames; object_notes = _doc_by_line.object_notes; // endnotes, compare, not sure is used destroy(_doc_by_line); } #+END_SRC *** post loop (consider and segment/break up further in code & org) #+NAME: docAbstractionPostMainLoop #+HEADER: :noweb yes #+BEGIN_SRC d { // EOF backMatter comp_obj_ = set_object_heading("lev1", "backmatter", "tail", ""); comp_obj_.metainfo.identifier = ""; comp_obj_.metainfo.dummy_heading = false; comp_obj_.metainfo.object_number_off = false; comp_obj_.metainfo.object_number_type = 0; comp_obj_.tags.segment_anchor_tag_epub = "_part_eof"; comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; comp_obj_.tags.in_segment_html = "tail"; comp_obj_.tags.anchor_tags = ["section_eof"]; comp_obj_.metainfo.dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; comp_obj_.metainfo.dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; the_document_xml_dom_tail_section ~= comp_obj_; tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; } // endNotes ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt_action); { // endnotes the_document_endnotes_section = en_st.endnotes; obj_cite_digits = en_st.ocn; debug(endnotes) { writefln("%s %s", __LINE__, the_document_endnotes_section.length); foreach (o; the_document_endnotes_section) { writeln(o); } } } { // glossary if (an_object["glossary_nugget"].length == 0) { comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Glossary section"); comp_obj_.metainfo.identifier = ""; comp_obj_.metainfo.dummy_heading = true; comp_obj_.metainfo.object_number_off = true; comp_obj_.metainfo.object_number_type = 0; the_document_glossary_section ~= comp_obj_; } debug(glossary) { foreach (gloss; the_document_glossary_section) { writeln(gloss.text); } } } { // bibliography string[] biblio_unsorted_incomplete = biblio_arr_json.dup; ST_biblio_section biblio_section = backmatter_make_the_bibliography_section(biblio_unsorted_incomplete, bib_arr_json); the_document_bibliography_section = biblio_section.bibliography_section; tag_assoc = biblio_section.tag_assoc; } { // bookindex BookIndexReportSection bi = BookIndexReportSection(); ST_bookindex bi_st = bi.backmatter_bookindex_build_abstraction_section(bookindex_unordered_hashes, obj_cite_digits, opt_action); destroy(bookindex_unordered_hashes); the_document_bookindex_section = bi_st.bookindex; obj_cite_digits = bi_st.ocn; debug(bookindex) { foreach (bi_entry; the_document_bookindex_section) { writeln(bi_entry); } } } { // blurb if (an_object["blurb_nugget"].length == 0) { comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Blurb section"); comp_obj_.metainfo.identifier = ""; comp_obj_.metainfo.object_number_off = true; comp_obj_.metainfo.object_number_type = 0; comp_obj_.tags.segment_anchor_tag_epub = ""; comp_obj_.tags.anchor_tag_html = ""; comp_obj_.tags.in_segment_html = ""; the_document_blurb_section ~= comp_obj_; } debug(blurb) { foreach (blurb; the_document_blurb_section) { writeln(blurb.text); } } } { // toc gather backmatter the_document_toc_section ~= backmatter_gather_table_of_contents(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); // } { // document head and body the_document_head_section ~= the_document_body_section[0]; the_document_body_section = the_document_body_section[1..$]; } { // document ancestors ST_ancestors get_ancestors; get_ancestors = the_document_body_section.after_doc_determine_ancestors(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); the_document_body_section = get_ancestors.the_document_body_section; the_document_endnotes_section = get_ancestors.the_document_endnotes_section; the_document_glossary_section = get_ancestors.the_document_glossary_section; the_document_bibliography_section = get_ancestors.the_document_bibliography_section; the_document_bookindex_section = get_ancestors.the_document_bookindex_section; the_document_blurb_section = get_ancestors.the_document_blurb_section; } { // document segnames ST_segnames get_segnames; get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); // segnames = get_segnames.segnames; html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr; html_segnames_ptr = get_segnames.html_segnames_ptr; } // document head string[] segnames_0_to_4; foreach (ref obj; the_document_head_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; } if (obj.metainfo.heading_lev_markup == 0) { // TODO second hit (of two) with same assertion failure, check, fix and reinstate // assert( obj.metainfo.ocn == 1, // "Title OCN should be 1 not: " ~ obj.metainfo.ocn.to!string); // bug introduced 0.18.1 obj.metainfo.ocn = 1; obj.metainfo.identifier = "1"; obj.metainfo.object_number_type = OCNtype.ocn; } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } obj = _links(obj); } if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_toc_section) { if (obj.metainfo.is_a == "heading") { if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } obj = _links(obj); } } // images string[] _images; // multiple 1~ levels, loop through document body if (the_document_body_section.length > 1) { // writeln("body"); foreach (ref obj; the_document_body_section) { if (!(obj.metainfo.identifier.empty)) { if (!(((obj.metainfo.identifier) in tag_assoc) && ("seg_lv4" in tag_assoc[(obj.metainfo.identifier)])) ) { tag_assoc[(obj.metainfo.identifier)]["seg_lv4"] = obj.tags.html_segment_anchor_tag_is; } tag_assoc[(obj.metainfo.identifier)]["seg_lv1to4"] = obj.tags.epub_segment_anchor_tag_is; } if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.lev4_subtoc = lev4_subtoc[obj.tags.anchor_tag_html]; obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } else if (obj.metainfo.is_a == "para") { _images ~= extract_images(obj.text); obj = _image_dimensions(obj, manifested); } obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } auto image_list = (_images.sort()).uniq; // endnotes optional only one 1~ level if (the_document_endnotes_section.length > 1) { // writeln("endnotes"); dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status_buffer.dup; foreach (ref obj; the_document_endnotes_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup == 1) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } obj = _links(obj); } } // glossary optional only one 1~ level if (the_document_glossary_section.length > 1) { // writeln("glossary"); foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup == 1) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } else if (obj.metainfo.is_a == "glossary" && !(obj.text.empty)) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // bibliography optional only one 1~ level if (the_document_bibliography_section.length > 1) { // writeln("bibliography"); foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup == 1) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } else if (obj.metainfo.is_a == "bibliography") { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // book index, optional only one 1~ level int ocn_ = obj_cite_digits.object_number; int ocn_bkidx_ = 0; int ocn_bidx_; if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_bookindex_section) { if (obj.metainfo.is_a == "heading") { // debug(dom) { } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; } if (obj.metainfo.heading_lev_markup == 1) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } if (obj.metainfo.heading_lev_markup <= 4) { if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } else if (obj.metainfo.is_a == "bookindex") { obj_cite_digits = ocn_emit(eN.ocn.bkidx); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } // TODO assert failure, reinstate // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); } // blurb optional only one 1~ level if (the_document_blurb_section.length > 1) { // writeln("blurb"); foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } if (obj.metainfo.heading_lev_markup == 1) { obj_cite_digits = ocn_emit(eN.ocn.on); obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } if (obj.metainfo.heading_lev_markup <= 4) { segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; if (obj.metainfo.heading_lev_markup == 4) { obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; if (segnames["html"].length > obj.ptr.html_segnames + 1) { obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; } assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); } } // dom structure (marked up & collapsed) if (opt_action.meta_processing_xml_dom) { obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); } obj = obj.obj_heading_ancestors(lv_ancestors_txt); } else if (obj.metainfo.is_a == "blurb") { obj_cite_digits = ocn_emit(eN.ocn.off); obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // get descendants if (the_document_body_section.length > 1) { auto pairs = after_doc_get_descendants( the_document_head_section ~ the_document_body_section ~ the_document_endnotes_section ~ the_document_glossary_section ~ the_document_bibliography_section ~ the_document_bookindex_section ~ the_document_blurb_section ~ the_document_xml_dom_tail_section ); debug(descendants_tuple) { pairs = pairs.sort(); foreach (pair; pairs) { // (pair; pairs.sort()) writeln(pair[0], "..", pair[1]); } } foreach (ref obj; the_document_head_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } if (the_document_body_section.length > 1) { foreach (ref obj; the_document_body_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_endnotes_section.length > 1) { foreach (ref obj; the_document_endnotes_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_glossary_section.length > 1) { foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_bibliography_section.length > 1) { foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_bookindex_section.length > 1) { foreach (ref obj; the_document_bookindex_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_blurb_section.length > 1) { foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } if (the_document_xml_dom_tail_section.length > 1) { foreach (ref obj; the_document_xml_dom_tail_section) { if (obj.metainfo.is_a == "heading") { foreach (pair; pairs) { if (obj.metainfo.ocn == pair[0]) { obj.metainfo.last_descendant_ocn = pair[1]; } } } } } } // TODO // - note create/insert heading object sole purpose eof close all open tags // sort out: // - obj.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status; // - obj.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status; comp_obj_ = set_object_heading("lev1", "empty", "empty", ""); comp_obj_.metainfo.identifier = ""; comp_obj_.metainfo.dummy_heading = true; comp_obj_.metainfo.object_number_off = true; comp_obj_.metainfo.object_number_type = 0; comp_obj_.tags.segment_anchor_tag_epub = ""; comp_obj_.tags.anchor_tag_html = ""; comp_obj_.tags.in_segment_html = ""; comp_obj_.tags.html_segment_anchor_tag_is = ""; comp_obj_.tags.epub_segment_anchor_tag_is = ""; comp_obj_.metainfo.heading_lev_markup = 9; comp_obj_.metainfo.heading_lev_collapsed = 9; comp_obj_.metainfo.parent_ocn = 0; comp_obj_.metainfo.parent_lev_markup = 0; comp_obj_.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status.dup; comp_obj_.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status.dup; comp_obj_ = comp_obj_.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, 0); comp_obj_ = comp_obj_.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, 0); comp_obj_ = comp_obj_.obj_heading_ancestors(lv_ancestors_txt); // the_dom_tail_section ~= comp_obj_; // remove tail for now, decide on later // the doc ObjGenericComposite[][string] document_the = [ "head": the_document_head_section, "toc": the_document_toc_section, // substantive/body: "body": the_document_body_section, // backmatter: "endnotes": the_document_endnotes_section, "glossary": the_document_glossary_section, "bibliography": the_document_bibliography_section, "bookindex": the_document_bookindex_section, "blurb": the_document_blurb_section, // dom tail only "tail": the_document_xml_dom_tail_section, ]; // document parts keys as needed string[][string] document_section_keys_sequenced = [ "scroll": ["head", "toc", "body",], "seg": ["head", "toc", "body",], "sql": ["head", "body",], "latex": ["head", "toc", "body",] ]; if (document_the["endnotes"].length > 1) { document_section_keys_sequenced["scroll"] ~= "endnotes"; document_section_keys_sequenced["seg"] ~= "endnotes"; document_section_keys_sequenced["latex"] ~= "endnotes"; } if (document_the["glossary"].length > 1) { document_section_keys_sequenced["scroll"] ~= "glossary"; document_section_keys_sequenced["seg"] ~= "glossary"; document_section_keys_sequenced["sql"] ~= "glossary"; document_section_keys_sequenced["latex"] ~= "glossary"; } if (document_the["bibliography"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bibliography"; document_section_keys_sequenced["seg"] ~= "bibliography"; document_section_keys_sequenced["sql"] ~= "bibliography"; document_section_keys_sequenced["latex"] ~= "bibliography"; } if (document_the["bookindex"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bookindex"; document_section_keys_sequenced["seg"] ~= "bookindex"; document_section_keys_sequenced["sql"] ~= "bookindex"; document_section_keys_sequenced["latex"] ~= "bookindex"; } if (document_the["blurb"].length > 1) { document_section_keys_sequenced["scroll"] ~= "blurb"; document_section_keys_sequenced["seg"] ~= "blurb"; document_section_keys_sequenced["sql"] ~= "blurb"; document_section_keys_sequenced["latex"] ~= "blurb"; } if ((opt_action.html) || (opt_action.html_scroll) || (opt_action.html_seg) || (opt_action.epub)) { document_section_keys_sequenced["scroll"] ~= "tail"; document_section_keys_sequenced["seg"] ~= "tail"; } // segnames string[] segnames_4 = segnames["html"].dup; string[] segnames_lv1to4 = segnames["epub"].dup; debug(segnames) { writeln("segnames_lv4: ", segnames_4); writeln("segnames_lv1to4: ", segnames_lv1to4); } #+END_SRC *** return structure #+NAME: docAbstractionReturnsStruct #+HEADER: :noweb yes #+BEGIN_SRC d // restart destroy(the_document_head_section); destroy(the_document_toc_section); destroy(the_document_body_section); destroy(the_document_endnotes_section); destroy(the_document_glossary_section); destroy(the_document_bibliography_section); destroy(the_document_bookindex_section); destroy(the_document_blurb_section); destroy(the_document_xml_dom_tail_section); destroy(segnames); destroy(bookindex_unordered_hashes); destroy(an_object); obj_cite_digits = ocn_emit(eN.ocn.reset); biblio_arr_json = []; obj_cite_digit_ = 0; html_segnames_ptr = 0; html_segnames_ptr_cntr = 0; content_non_header = "8"; dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; dom_structure_markedup_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; dom_structure_collapsed_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; lev_anchor_tag = ""; anchor_tag = ""; // identify parts struct DocHas_ { uint inline_links() { return dochas["inline_links"]; } uint inline_notes_reg() { return dochas["inline_notes"]; } uint inline_notes_star() { return dochas["inline_notes_star"]; } uint codeblocks() { return dochas["codeblock"]; } uint tables() { return dochas["table"]; } uint blocks() { return dochas["block"]; } uint groups() { return dochas["group"]; } uint poems() { return dochas["poem"]; } uint quotes() { return dochas["quote"]; } ulong images() { // TODO not ideal rethink return (image_list.to!string.strip("[","]").split(",").length); } auto imagelist() { return image_list; } auto keys_seq() { return docSectKeysSeq!()(document_section_keys_sequenced); } string[] segnames_lv4() { return segnames_4; } string[] segnames_lv_0_to_4() { return segnames_0_to_4; } string[string][string] tag_associations() { return tag_assoc; } } auto doc_has() { return DocHas_(); } // the doc to be returned struct ST_docAbstraction { ObjGenericComposite[][string] document_the; DocHas_ doc_has; } ST_docAbstraction ret; { ret.document_the = document_the; ret.doc_has = doc_has; } return ret; #+END_SRC * document header including copyright & license #+NAME: doc_header_including_copyright_and_license #+HEADER: :noweb yes #+BEGIN_SRC emacs-lisp <<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> #+END_SRC * __END__