From 860b894a10f3526e6bd73d53850764c0ad95ab99 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Jun 2024 13:54:28 -0400 Subject: document digests and reduction in use of tuples --- src/sisudoc/meta/metadoc.d | 10 +++----- src/sisudoc/meta/metadoc_from_src.d | 36 ++++++++++++++++++++------- src/sisudoc/meta/metadoc_from_src_functions.d | 25 ++++++++++--------- src/sisudoc/meta/metadoc_object_setter.d | 1 + 4 files changed, 45 insertions(+), 27 deletions(-) (limited to 'src/sisudoc/meta') diff --git a/src/sisudoc/meta/metadoc.d b/src/sisudoc/meta/metadoc.d index a1899da..ed9a5b1 100644 --- a/src/sisudoc/meta/metadoc.d +++ b/src/sisudoc/meta/metadoc.d @@ -64,7 +64,6 @@ template spineAbstraction() { sisudoc.io_out.hub; mixin spineBiblio; mixin outputHub; - enum headBody { header, body_content, insert_file_list, image_list } enum makeMeta { make, meta } enum docAbst { doc_abstract_obj, doc_has } @system auto spineAbstraction(E,P,O,Cfg,M,S)( @@ -89,7 +88,6 @@ template spineAbstraction() { } auto _header_body_insertfilelist_imagelist = spineRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); - static assert(_header_body_insertfilelist_imagelist.length==4); if ((_opt_action.debug_do) || (_opt_action.debug_do_stages) ) { @@ -98,7 +96,7 @@ template spineAbstraction() { debug(header_and_body) { writeln(header); writeln(_header_body_insertfilelist_imagelist.length); - writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); + // writeln(_header_body_insertfilelist_imagelist.length.body_content[0]); } /+ ↓ split header into make and meta +/ if ((_opt_action.debug_do) @@ -109,7 +107,7 @@ template spineAbstraction() { import sisudoc.meta.conf_make_meta_yaml; _make_and_meta_struct = docHeaderMakeAndMetaTupYamlExtractAndConvertToStruct!()( - _header_body_insertfilelist_imagelist[headBody.header], + _header_body_insertfilelist_imagelist.header_raw, _make_and_meta_struct, _manifest, _opt_action, @@ -127,7 +125,7 @@ template spineAbstraction() { writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters) [", _manifest.src.filename, "]"); } auto da = docAbstraction!()( - _header_body_insertfilelist_imagelist[headBody.body_content], + _header_body_insertfilelist_imagelist.sourcefile_body_content, _make_and_meta_struct, _opt_action, _manifest, @@ -275,7 +273,7 @@ template spineAbstraction() { auto srcs() { struct SRC_ { auto file_insert_list() { - return _header_body_insertfilelist_imagelist[headBody.insert_file_list]; + return _header_body_insertfilelist_imagelist.insert_file_list; } auto image_list() { return _doc_has_struct.imagelist; diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d index 32954f1..4bd747d 100644 --- a/src/sisudoc/meta/metadoc_from_src.d +++ b/src/sisudoc/meta/metadoc_from_src.d @@ -57,6 +57,7 @@ template docAbstraction() { import std.algorithm, std.container, + std.digest.sha, std.file, std.json, std.path; @@ -970,8 +971,7 @@ template docAbstraction() { } obj = _links(obj); } - if (the_document_toc_section.length > 1) { - // scroll + if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_toc_section) { @@ -997,7 +997,7 @@ template docAbstraction() { // images string[] _images; // multiple 1~ levels, loop through document body - if (the_document_body_section.length > 1) { + if (the_document_body_section.length > 1) { // writeln("body"); foreach (ref obj; the_document_body_section) { if (!(obj.metainfo.identifier.empty)) { if (!(((obj.metainfo.identifier) in tag_assoc) @@ -1033,12 +1033,26 @@ template docAbstraction() { _images ~= extract_images(obj.text); obj = _image_dimensions(obj, manifested); } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } - auto image_list = (_images.sort()).uniq; + auto image_list = (_images.sort()).uniq; // also get digest on each image here? // workon + if (_images.length > 0) { + foreach (img; image_list) { + try { // also get sha digest on image file + // read_image + auto data = (cast(byte[]) (manifested.src.image_dir_path ~ "/" ~ img).read); + // calculate, digest, hash + writefln("%s\n%-(%02x%)::%s ⋅ %s", img, data.sha256Of, data.length, img); + writefln("%-(%02x%) ⋅ %s ⋅ %s", data.sha256Of, img, data.length); + } catch (Exception ex) { + writeln("WARNING, image not found: ", img, "\n ", manifested.src.image_dir_path ~ "/" ~ img); + } + } + } // endnotes optional only one 1~ level - if (the_document_endnotes_section.length > 1) { + if (the_document_endnotes_section.length > 1) { // writeln("endnotes"); dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; @@ -1073,7 +1087,7 @@ template docAbstraction() { } } // glossary optional only one 1~ level - if (the_document_glossary_section.length > 1) { + if (the_document_glossary_section.length > 1) { // writeln("glossary"); foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1104,11 +1118,12 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // bibliography optional only one 1~ level - if (the_document_bibliography_section.length > 1) { + if (the_document_bibliography_section.length > 1) { // writeln("bibliography"); foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1139,6 +1154,7 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } @@ -1146,7 +1162,7 @@ template docAbstraction() { int ocn_ = obj_cite_digits.object_number; int ocn_bkidx_ = 0; int ocn_bidx_; - if (the_document_bookindex_section.length > 1) { // scroll + if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_bookindex_section) { @@ -1183,13 +1199,14 @@ template docAbstraction() { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } // TODO assert failure, reinstate // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); } // blurb optional only one 1~ level - if (the_document_blurb_section.length > 1) { + if (the_document_blurb_section.length > 1) { // writeln("blurb"); foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1220,6 +1237,7 @@ template docAbstraction() { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d index 29e675c..b5956c0 100644 --- a/src/sisudoc/meta/metadoc_from_src_functions.d +++ b/src/sisudoc/meta/metadoc_from_src_functions.d @@ -54,18 +54,6 @@ module sisudoc.meta.metadoc_from_src_functions; @safe: template docAbstractionFunctions() { // ↓ abstraction imports - import - std.algorithm, - std.container, - std.file, - std.json, - std.path; - import - sisudoc.meta, - sisudoc.meta.defaults, - sisudoc.meta.rgx, - sisudoc.meta.metadoc_object_setter, - sisudoc.meta.rgx; // ↓ abstraction mixins mixin ObjectSetter; mixin InternalMarkup; @@ -2998,6 +2986,19 @@ template docAbstractionFunctions() { return obj; } // ↑ - object tags + // ↓ - object digest + pure ubyte[32] obj_digest()( + ObjGenericComposite obj, + ) { + obj.metainfo.sha256 = obj.text.sha256Of; + // if (obj.metainfo.is_a == "heading") { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a, " ", obj.metainfo.heading_lev_markup); + // } else { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a); + // } + return obj.metainfo.sha256; + } + // ↑ - object digest // ↓ - table of contents @system ObjGenericComposite[] backmatter_gather_table_of_contents( ObjGenericComposite[] the_document_endnotes_section, diff --git a/src/sisudoc/meta/metadoc_object_setter.d b/src/sisudoc/meta/metadoc_object_setter.d index a2ceff6..8b2daf0 100644 --- a/src/sisudoc/meta/metadoc_object_setter.d +++ b/src/sisudoc/meta/metadoc_object_setter.d @@ -173,6 +173,7 @@ template ObjectSetter() { int parent_lev_markup = 0; int parent_ocn = 0; int last_descendant_ocn = 0; + ubyte[32] sha256; } struct ObjGenericComposite { string text = ""; -- cgit v1.2.3