-*- mode: org -*-
#+TITLE:       spine (doc_reform) information files
#+DESCRIPTION: documents - structuring, various output representations & search
#+FILETAGS:    :spine:info:
#+AUTHOR:      Ralph Amissah
#+EMAIL:       [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
#+COPYRIGHT:   Copyright (C) 2015 - 2022 Ralph Amissah
#+LANGUAGE:    en
#+STARTUP:     content hideblocks hidestars noindent entitiespretty
#+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t
#+PROPERTY:    header-args  :exports code
#+PROPERTY:    header-args+ :noweb yes
#+PROPERTY:    header-args+ :eval no
#+PROPERTY:    header-args+ :results no
#+PROPERTY:    header-args+ :cache no
#+PROPERTY:    header-args+ :padline no
#+PROPERTY:    header-args+ :mkdirp yes

* Markup conversion tools

** README

#+HEADER: :tangle "../sundry/misc/util/d/tools/markup_conversion/README"
#+BEGIN_SRC text
#+END_SRC

** endnotes, inline from binary
*** tangle

#+HEADER: :tangle "../sundry/misc/util/d/tools/markup_conversion/endnotes_inline_from_binary.d"
#+HEADER: :tangle-mode (identity #o755)
#+HEADER: :shebang #!/usr/bin/env rdmd
#+BEGIN_SRC d
<<inline_notes_head>>
<<inline_notes_imports>>
void main(string[] args) {
  <<inline_notes_init>>
  foreach(arg; args[1..$]) {
    if (
      !(arg.match(regex(r"--\w+")))
      && arg.match(regex(r"\w+?\.ss[itm]"))
    ) {
      <<inline_notes_pre_loops>>
        <<inline_notes_loop_doc_body>>
        <<inline_notes_loop_adjust_and_output>>
      <<inline_notes_exceptions>>
    }
  }
}
#+END_SRC

*** head

#+NAME: inline_notes_head
#+BEGIN_SRC d
/+
  - read in file .sst .ssi .ssm
  - loop twice
    - first
      - check for and skip code blocks
      - use unique code marker for endnote markers in text and give an endnote
        number ★1, increment
      - extract all endnotes in array
    - second
      - check that the footnote marker number count matches the number of notes
        in the array
        - if they match either:
            - substitute each endnote marker with the array footnote[number-1]
            - substitute each endnote marker with footnote
              as inlined footnote markup (footnote number not needed)
        - if they do not match exit
  - check whether changes have been made
    - if so write file with inline footnotes in sub-directory converted_output_/
      using the same name as the original file
    - else, exit
+/
#+END_SRC

*** import

#+NAME: inline_notes_imports
#+BEGIN_SRC d
import std.stdio;
import std.file;
import std.array : split;
import std.exception;
import core.stdc.errno;
import std.regex;
import std.format;
import std.conv;
#+END_SRC

*** init

#+NAME: inline_notes_init
#+BEGIN_SRC d
static comment                 = ctRegex!(`^%+ `);
static block_tic_code_open     = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_close         = ctRegex!("^(`{3})$","m");
static block_curly_code_open   = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_code_close  = ctRegex!(`^([}]code)`);
auto rgx_endnote_ref           = ctRegex!(`([~]\^)(?P<tail>[)\]]? |$)`, "gm");
auto rgx_endnote               = ctRegex!(`^\^~\s+(.+|\n)`, "gm");
#+END_SRC

*** pre-loops

#+NAME: inline_notes_pre_loops
#+BEGIN_SRC d
writeln(arg);
string filename                  = arg;
try {
  string[] contents, endnotes, endnote_refs;
  string text                    = filename.readText;
  string[] paragraphs            = text.split("\n\n");
  int endnote_ref_count          = 0;
  int code_block_status          = 0;
  enum codeBlock { off, curly, tic, }
#+END_SRC

*** loop doc body

#+NAME: inline_notes_loop_doc_body
#+BEGIN_SRC d
foreach (paragraph; paragraphs) { /+ loop to gather binary endnotes +/
  if (code_block_status == codeBlock.off
    && paragraph.match(rgx_endnote)
  ) {
    endnotes ~= replaceAll!(m => m[1])
      (paragraph, rgx_endnote);
  } else {
    if ((code_block_status == codeBlock.curly
        && paragraph.matchFirst(block_curly_code_close))
      || ((code_block_status == codeBlock.tic
        && paragraph.matchFirst(block_tic_close))
    ) {
      code_block_status = codeBlock.off;
    } else if ( type["curly_code"] == 1 || type["tic_code"] == 1) {
      // skip, prevent search for endnotes
    } else if (paragraph.matchFirst(block_curly_code_open)) {
      code_block_status = codeBlock.curly;
    } else if (paragraph.matchFirst(block_tic_code_open)) {
      code_block_status = codeBlock.tic;
    } else if (auto m = paragraph.matchAll(rgx_endnote_ref)) {
      foreach (n; m) {
        endnote_ref_count++; // endnote_refs ~= (n.captures[1]);
      }
    }
    contents ~= paragraph;
  }
}
#+END_SRC

*** (loop to) adjustment & output

#+NAME: inline_notes_loop_adjust_and_output
#+BEGIN_SRC d
if (endnotes.length == endnote_ref_count) {
  import std.outbuffer;
  writeln("endnote ref count:         ", endnote_ref_count);
  writeln("number of binary endnotes: ", endnotes.length);
  int endnote_count = -1;
  auto buffer = new OutBuffer();
  foreach (content; contents) { /+ loop to inline endnotes +/
    content = replaceAll!(m => "~{ " ~ endnotes[++endnote_count] ~ " }~" ~ m["tail"] )
      (content, rgx_endnote_ref);
    buffer.write(content ~ "\n\n");
  }
  if (buffer) {
    try {
      string dir_out = "converted_output_";
      string path_and_file_out = dir_out ~ "/" ~ filename;
      dir_out.mkdirRecurse;
      auto f = File(path_and_file_out, "w");
      f.write(buffer);
      writeln("wrote: ", path_and_file_out);
    } catch (FileException ex) {
      writeln("did not write file");
      // Handle errors
    }
  }
} else {
  writeln("ERROR binary endnote mismatch, check markup,\nmisatch in the number of endnotes & endnote references!");
  writeln("  number of endnotes:     ", endnotes.length);
  writeln("  number of endnote refs: ", endnote_ref_count); // endnote_refs.length,
}
// assert(endnotes.length == endnote_ref_count);
#+END_SRC

*** exceptions

#+NAME: inline_notes_exceptions
#+BEGIN_SRC d
} catch (ErrnoException ex) {
  switch(ex.errno) {
    case EPERM:
    case EACCES: // Permission denied
      break;
    case ENOENT: // File does not exist
      break;
    default:     // Handle other errors
      break;
  }
}
#+END_SRC

** conversion from sisu (sisu bespoke headers) any binary to inline notes TODO
*** tangle

#+HEADER: :tangle "../sundry/misc/util/d/tools/markup_conversion/markup_conversion_from_sisu_ruby_to_sisu_spine.d"
#+HEADER: :tangle-mode (identity #o755)
#+HEADER: :shebang #!/usr/bin/env rdmd
#+BEGIN_SRC d
<<from_sisu_rb_head>>
<<from_sisu_rb_imports>>
void main(string[] args) {
  <<from_sisu_rb_init>>
  <<from_sisu_rb_body_format>>
  <<from_sisu_rb_header_format_0>>
  <<from_sisu_rb_header_format_1>>
  <<from_sisu_rb_header_format_2>>
  foreach(arg; args[1..$]) {
    if (
      !(arg.match(regex(r"--\w+")))
      && arg.match(regex(r"\w+?\.ss[itm]"))
    ) {
      <<from_sisu_rb_pre_loops>>
        <<from_sisu_rb_loop_doc_header>>
        <<from_sisu_rb_loop_doc_body>>
        <<from_sisu_rb_loop_adjust_and_output>>
      <<from_sisu_rb_exceptions>>
    }
  }
}
#+END_SRC

*** head

#+NAME: from_sisu_rb_head
#+BEGIN_SRC d
/+
  - read in file .sst .ssi .ssm
  - loop twice
    - first
      - check for and skip code blocks
      - use unique code marker for endnote markers in text and give an endnote
        number ★1, increment
      - extract all endnotes in array
    - second
      - check that the footnote marker number count matches the number of notes
        in the array
        - if they match either:
            - substitute each endnote marker with the array footnote[number-1]
            - substitute each endnote marker with footnote
              as inlined footnote markup (footnote number not needed)
        - if they do not match exit
  - check whether changes have been made
    - if so write file with inline footnotes in sub-directory converted_output_/
      using the same name as the original file
    - else, exit
+/
#+END_SRC

*** import

#+NAME: from_sisu_rb_imports
#+BEGIN_SRC d
import std.stdio;
import std.file;
import std.array : split, join;
import std.exception;
// import std.range;
import core.stdc.errno;
import std.regex;
import std.format;
import std.conv;
#+END_SRC

*** init

#+NAME: from_sisu_rb_init
#+BEGIN_SRC d
static heading_a               = ctRegex!(`^:?[A][~] `, "m");
static comment                 = ctRegex!(`^%+ `);
static block_tic_code_open     = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_close         = ctRegex!("^(`{3})$","m");
static block_curly_code_open   = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_code_close  = ctRegex!(`^([}]code)`);
auto rgx_endnote_ref           = ctRegex!(`([~]\^)(?P<tail>[)\]]? |$)`, "gm");
auto rgx_endnote               = ctRegex!(`^\^~\s+(.+|\n)`, "gm");
char[][] header0Content1(in string src_text) { // cast(char[])
  /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
  char[][] header_and_content;
  auto m = (cast(char[]) src_text).matchFirst(heading_a);
  header_and_content ~= m.pre;
  header_and_content ~= m.hit ~ m.post;
  assert(header_and_content.length == 2,
    "document markup is broken, header body split == "
    ~ header_and_content.length.to!string
    ~ "; (header / body array split should == 2 (split is on level A~))"
  );
  return header_and_content;
}
#+END_SRC

*** body format
**** format body string

#+NAME: from_sisu_rb_body_format
#+BEGIN_SRC d
string format_body_string(string s) {
  string o;
  o = s
   .replaceAll(regex("^<(?:/[ ]*)?br>[ ]*"), " \\\\ ")
   .replaceAll(regex("[ ]*<(?:/[ ]*)?br>$"), " \\\\")
   .replaceAll(regex("[ ]*<(?:/[ ]*)?br>[ ]*"), " \\\\ ");
  return o;
}
#+END_SRC

*** header format
**** format header string

#+NAME: from_sisu_rb_header_format_0
#+BEGIN_SRC d
string format_header_string(string s) {
  string o;
  o = s
   .replaceAll(regex("\""), "\\\"")
   .replaceAll(regex("[ ]*<(?:/[ ]*)?br>$"), " \\\\")
   .replaceAll(regex("[ ]*<(?:/[ ]*)?br>[ ]*"), " \\\\ ");
  return o;
}
#+END_SRC

**** format main header

#+NAME: from_sisu_rb_header_format_1
#+BEGIN_SRC d
string format_main_header(string hm, string hs = "", string c = "") {
  string o;
  if (c.length == 0) {
    o ~= hm ~ ":\n";
  } else {
    o ~= hm ~ ":\n"
      ~ "  " ~ hs ~ ": "
      ~ "\"" ~ format_header_string(c) ~ "\"\n";
  }
  return o;
}
#+END_SRC

**** format sub header

#+NAME: from_sisu_rb_header_format_2
#+BEGIN_SRC d
string format_sub_header(string hs, string c) {
  string o;
  o ~= "  " ~ hs ~ ": "
    ~ "\"" ~ format_header_string(c) ~ "\"\n";
  return o;
}
 #+END_SRC

*** pre-loops

#+NAME: from_sisu_rb_pre_loops
#+BEGIN_SRC d
writeln(arg);
string filename                  = arg;
try {
  string[] munged_header, munged_contents, munged_endnotes, endnote_refs;
  char[][] hc;
  char[] src_header;
  string[] headers;
  char[] src_txt;
  string[] paragraphs;
  enum codeBlock { off, curly, tic, }
  string _tmp_header;
  int endnote_ref_count          = 0;
  int code_block_status          = codeBlock.off;
  string text                    = filename.readText;
  if (arg.match(regex(r"\w+?\.ss[tm]"))) {
    hc                           = header0Content1(text);
    src_header                   = hc[0];
    headers                      = src_header.to!string.split("\n\n");
    src_txt                      = hc[1];
    paragraphs                   = src_txt.to!string.split("\n\n");
  } else if (arg.match(regex(r"\w+?\.ssi"))) {
    headers                      = [];
    paragraphs                   = text.split("\n\n");
  }
#+END_SRC

*** loop doc header

#+NAME: from_sisu_rb_loop_doc_header
#+BEGIN_SRC d
if (headers.length > 0) {
  headers[0] = headers[0].replaceFirst(regex(r"^%\s+SiSU.+", "i"), "# SiSU 8.0 spine (auto-conversion)");
  foreach (h_; headers) {
    _tmp_header = "";
    if (auto m = h_.match(regex(r"^%\s*", "m"))) {
      h_ = h_.replaceAll(regex(r"^%\s*", "m"), "# ") ~ "\n";
    }
    if (h_.match(regex(r"^@title:|@subtitle"))) {
      if (auto m = h_.match(regex(r"^@(?P<h>title):(?:[ ]+(?P<c>.+)|\n)"))) {
        _tmp_header ~= format_main_header(m.captures["h"], "main", m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@(?P<h>subtitle):(?:[ ]+(?P<c>.+)|$)"))) {
        if (m.captures["c"].length == 0) {
        } else {
          _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
        }
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>main):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:sub(?:title)?:(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header("subtitle", m.captures["c"]);
      }
    } else if (h_.match(regex(r"^@creator:|@author:"))) {
      if (auto m = h_.match(regex(r"^(?:@creator:|@author:)(?:[ ]+(?P<c>.+)|\n)"))) {
        _tmp_header ~= format_main_header("creator", "author", m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>author):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
    } else if (h_.match(regex(r"^@rights:"))) {
      if (auto m = h_.match(regex(r"^@(?P<h>rights):(?:[ ]+(?P<c>.+)|\n)"))) {
        _tmp_header ~= format_main_header(m.captures["h"], "copyright", m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>copyright):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:licen[cs]e:(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header("license", m.captures["c"]);
      }
    } else if (h_.match(regex(r"^@date:|@date\."))) {
      if (auto m = h_.match(regex(r"^@(?P<h>date):(?:[ ]+(?P<c>.+)|\n)"))) {
        _tmp_header ~= format_main_header(m.captures["h"], "published", m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>published):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>available):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>modified):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>created):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>issued):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>valid):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@date\.(?P<h>available):[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@date\.(?P<h>modified):[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@date\.(?P<h>created):[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@date\.(?P<h>issued):[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^@date\.(?P<h>valid):[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
    } else if (h_.match(regex(r"^@classify:"))) {
      if (auto m = h_.match(regex(r"^@classify:"))) {
        _tmp_header ~= "classify:\n";
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>topic_register):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:type:(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= "#  type: " ~ "\"" ~ m.captures["c"] ~ "\"\n";
      }
    } else if (h_.match(regex(r"^(?:@identifier:|@identify:)"))) {
      if (auto m = h_.match(regex(r"^(?:@identifier:|@idenfify)"))) {
        _tmp_header ~= "identify:\n";
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>oclc):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>isbn):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>dewey):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
    } else if (h_.match(regex(r"^@publisher:"))) {
      if (auto m = h_.match(regex(r"^@publisher:[ ]+(?P<c>.+)$"))) {
        _tmp_header ~= "publisher: " ~  "\"" ~ m.captures["c"] ~ "\"\n";
      }
    } else if (h_.match(regex(r"^@make:"))) {
      // writeln(h_);
      if (auto m = h_.match(regex(r"^@make:"))) {
        _tmp_header ~= "make:\n";
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>breaks):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>num_top):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>headings):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>italics):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>bold):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>emphasis):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>substitute):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>texpdf_font):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>home_button_text):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>home_button_image):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>cover_image):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      if (auto m = h_.match(regex(r"^\s+:(?P<h>footer):(?:[ ]+(?P<c>.+)|$)", "m"))) {
        _tmp_header ~= format_sub_header(m.captures["h"], m.captures["c"]);
      }
      // writeln(_tmp_header);
    } else if (h_.match(regex(r"^@\w+:"))) {
      _tmp_header ~= "# " ~ h_.split("\n").join("\n# ") ~ "\n";
    } else if (h_.match(regex(r"^\s+:\w+:", "m"))) {
      if (auto m = h_.match(regex(r"^(?P<g>\s+:\w+:.*)"))) {
        _tmp_header ~= "# " ~ m.captures["g"] ~ "\n";
      }
    }
    if (h_.match(regex(r"^#", "m"))) {
      if (auto m = h_.match(regex(r"^(?P<g>#.*)", "m"))) {
        _tmp_header ~= m.captures["g"] ~ "\n";
      }
     }
    if (_tmp_header.length > 0) {
      munged_header ~= _tmp_header.split("\n\n");
    } else if (h_.length > 0) {
      writeln("munging required: ", h_);
      h_ = h_.replaceAll((regex(r"\n\n\n+", "m")), "\n\n");
      munged_header ~= h_;
    }
  }
  // writeln(munged_header.join("\n"));
}
#+END_SRC

*** loop doc body (identify & ignore code blocks)

#+NAME: from_sisu_rb_loop_doc_body
#+BEGIN_SRC d
foreach (paragraph; paragraphs) {                                                                                  /+ loop to gather binary endnotes +/
  if (code_block_status == codeBlock.off
    && paragraph.match(rgx_endnote)
  ) {
    munged_endnotes ~= replaceAll!(m => m[1])
      (paragraph, rgx_endnote);
  } else {
    if ( code_block_status != codeBlock.off
      || paragraph.matchFirst(block_curly_code_open)
      || paragraph.matchFirst(block_tic_code_open)
    ) { /+ code blocks identified, no munging +/
      if ((code_block_status == codeBlock.curly
          && paragraph.matchFirst(block_curly_code_close))
        || (code_block_status == codeBlock.tic
          && paragraph.matchFirst(block_tic_close))
      ) {
        code_block_status = codeBlock.off;
      } else if (paragraph.matchFirst(block_curly_code_open)) {
        code_block_status = codeBlock.curly;
      } else if (paragraph.matchFirst(block_tic_code_open)) {
        code_block_status = codeBlock.tic;
      }
      munged_contents ~= paragraph;
    } else { /+ regular content, not a code block +/
      if (auto m = paragraph.matchAll(rgx_endnote_ref)) {
        foreach (n; m) {
          endnote_ref_count++; // endnote_refs ~= (n.captures[1]);
        }
      }
      paragraph = format_body_string(paragraph);
      // paragraph = replaceAll!(m => " \\\\ " )
      //   (paragraph, regex(r"\s*<(?:/\s*|:)?br>\s*")); // (paragraph, regex(r"(<br>)"));
      munged_contents ~= paragraph;
    }
  }
}
#+END_SRC

*** (loop to) adjustment & output

#+NAME: from_sisu_rb_loop_adjust_and_output
#+BEGIN_SRC d
{
  import std.outbuffer;
  auto buffer = new OutBuffer();
  if (munged_header.length > 0) {
    foreach (header; munged_header) { /+ loop to inline endnotes +/
      buffer.write(header ~ "\n");
    }
  }
  if (munged_endnotes.length == endnote_ref_count) {
    int endnote_count = -1;
    foreach (k, content; munged_contents) { /+ loop to inline endnotes +/
      content = replaceAll!(m => "~{ " ~ munged_endnotes[++endnote_count] ~ " }~" ~ m["tail"] )
        (content, rgx_endnote_ref); // endnote_ref cannot occur in a code block or else fail
      buffer.write(content ~ ((k == munged_contents.length - 1) ? "" : "\n\n"));
    }
    if (buffer) {
      try {
        string dir_out = "converted_output_";
        string path_and_file_out = dir_out ~ "/" ~ filename;
        dir_out.mkdirRecurse;
        auto f = File(path_and_file_out, "w");
        f.write(buffer);
        // writeln("wrote: ", path_and_file_out);
      } catch (FileException ex) {
        writeln("did not write file");
        // Handle errors
      }
    }
  } else {
    foreach (content; munged_contents) { /+ loop to inline endnotes +/
      buffer.write(content ~ "\n\n");
    }
  }
}
#+END_SRC

*** exceptions

#+NAME: from_sisu_rb_exceptions
#+BEGIN_SRC d
} catch (ErrnoException ex) {
  switch(ex.errno) {
    case EPERM:
    case EACCES: // Permission denied
      break;
    case ENOENT: // File does not exist
      break;
    default:     // Handle other errors
      break;
  }
}
#+END_SRC

** conversion from sisu and multiple headers (sisu bespoke, sdlang, toml) incomplete
*** tangle

#+HEADER: :tangle "../sundry/misc/util/d/tools/markup_conversion/markup_changes_header_and_content.d"
#+HEADER: :tangle-mode (identity #o755)
#+HEADER: :shebang #!/usr/bin/env rdmd
#+BEGIN_SRC d
<<from_previous_markups_head>>
<<from_previous_markups_imports>>
void main(string[] args) {
  <<from_previous_markups_init>>
  foreach(arg; args[1..$]) {
    if (
      !(arg.match(regex(r"--\w+")))
      && arg.match(regex(r"\w+?\.ss[itm]"))
    ) {
      <<from_previous_markups_pre_loops>>
        <<from_previous_markups_loop_doc_header>>
        <<from_previous_markups_loop_doc_body>>
        <<from_previous_markups_loop_adjust_and_output>>
      <<from_previous_markups_exceptions>>
    }
  }
}
#+END_SRC

*** head

#+NAME: from_previous_markups_head
#+BEGIN_SRC d
/+
  - read in file .sst .ssi .ssm
  - loop twice
    - first
      - check for and skip code blocks
      - use unique code marker for endnote markers in text and give an endnote
        number ★1, increment
      - extract all endnotes in array
    - second
      - check that the footnote marker number count matches the number of notes
        in the array
        - if they match either:
            - substitute each endnote marker with the array footnote[number-1]
            - substitute each endnote marker with footnote
              as inlined footnote markup (footnote number not needed)
        - if they do not match exit
  - check whether changes have been made
    - if so write file with inline footnotes in sub-directory converted_output_/
      using the same name as the original file
    - else, exit
+/
#+END_SRC

*** imports

#+NAME: from_previous_markups_imports
#+BEGIN_SRC d
import std.stdio;
import std.file;
import std.array : split;
import std.exception;
// import std.range;
import core.stdc.errno;
import std.regex;
import std.format;
import std.conv;
#+END_SRC

*** init

#+NAME: from_previous_markups_init
#+BEGIN_SRC d
static heading_a               = ctRegex!(`^:?[A][~] `, "m");
static comment                 = ctRegex!(`^%+ `);
static block_tic_code_open     = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_close         = ctRegex!("^(`{3})$","m");
static block_curly_code_open   = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_code_close  = ctRegex!(`^([}]code)`);
auto rgx_endnote_ref           = ctRegex!(`([~]\^)(?P<tail>[)\]]? |$)`, "gm");
auto rgx_endnote               = ctRegex!(`^\^~\s+(.+|\n)`, "gm");
char[][] header0Content1(in string src_text) { // cast(char[])
  /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
  char[][] header_and_content;
  auto m = (cast(char[]) src_text).matchFirst(heading_a);
  header_and_content ~= m.pre;
  header_and_content ~= m.hit ~ m.post;
  assert(header_and_content.length == 2,
    "document markup is broken, header body split == "
    ~ header_and_content.length.to!string
    ~ "; (header / body array split should == 2 (split is on level A~))"
  );
  return header_and_content;
}
#+END_SRC

*** pre-loops

#+NAME: from_previous_markups_pre_loops
#+BEGIN_SRC d
writeln(arg);
string filename                  = arg;
try {
  string[] munged_header, munged_contents, munged_endnotes, endnote_refs;
  string text                    = filename.readText;
  char[][] hc                    = header0Content1(text);
  char[] src_header              = hc[0];
  string[] headers               = src_header.to!string.split("\n\n");
  char[] src_txt                 = hc[1];
  string[] paragraphs            = src_txt.to!string.split("\n\n");
  int endnote_ref_count          = 0;
  int[string] type = [
    "curly_code"                 : 0,
    "tic_code"                   : 0,
  ];
  string _tmp_header;
#+END_SRC

*** loop doc header

#+NAME: from_previous_markups_loop_doc_header
#+BEGIN_SRC d
foreach (h_; headers) {                                                                                          /+ loop to inline endnotes +/
  _tmp_header = "";
  if (h_.match(regex(r"^[@\[]?title[:\]]?"))) {                                                                  // title
    if (auto m = h_.match(regex(r"^@title:(?:\s+(?P<c>.+)|$)"))) {                                               // sisu bespoke markup
      if (m.captures["c"].length == 0) {
        _tmp_header ~= "title:";
      } else {
        _tmp_header ~= "title:\n  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    } else if (auto m = h_.match(regex(r"^title\s*=\s*(?P<c>.+)"))) {                                            // toml?
      if (m.captures["c"].length == 0) {
        _tmp_header ~= "title:";
      } else {
        _tmp_header ~= "title:\n  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    } else if (auto m = h_.match(regex(r"^\[title\]"))) {                                                        // toml markup
      _tmp_header ~= "title:";
    } else if (auto m = h_.match(regex(r"^title(?:\s+(?P<c>.+)|\s+\\$)"))) {                                     // sdlang markup
      if (m.captures["c"].length == 0) {
        _tmp_header ~= "title:";
      } else {
        _tmp_header ~= "title:\n  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    }
    if (h_.match(regex(r"^\s*[:]?(?:main)[:= ]?", "m"))) {
      if (auto m = h_.match(regex(r"^\s+(?P<h>:main):(?:\s+(?P<c>.+)|$)", "m"))) {                                // sisu bespoke markup
        _tmp_header ~= "  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s*(?P<h>main)\s*=\s*(?P<c>.+)", "m"))) {                            // toml?
        _tmp_header ~= "  main: " ~ m.captures["c"];
      } else if (auto m = h_.match(regex(r"^\s+(?P<h>main)(?:\s*\s*(?P<c>.+)|$)", "m"))) {                       // toml markup
        _tmp_header ~= "  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s+(?P<h>main)(?:\s+(?P<c>.+)|\s+\\$)", "m"))) {                     // sdlang markup
        _tmp_header ~= "  main: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    }
    if (h_.match(regex(r"^\s*[:]?(?:sub(title)?)[:= ]?", "m"))) {
      if (auto m = h_.match(regex(r"^\s+:sub(?:title)?:(?:\s+(?P<c>.+)|$)", "m"))) {                             // sisu bespoke markup
        _tmp_header ~= "  subtitle: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s*sub(?:title)?\s*=\s*(?P<c>.+)$", "m"))) {                         // toml?
        _tmp_header ~= "  subtitle: " ~ m.captures["c"];
      } else if (auto m = h_.match(regex(r"^\s+(?:title)?(?:\s*\s*(?P<c>.+)|$)", "m"))) {                        // toml markup
        _tmp_header ~= "  subtitle: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s+(?:title)?(?:\s+(?P<c>.+)|\s+\\$)", "m"))) {                      // sdlang markup
        _tmp_header ~= "  subtitle: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    }
  }
  if (h_.match(regex(r"^[@\[]?rights[:\]]?"))) {                                                                 // rights
    if (auto m = h_.match(regex(r"^@rights:[ ]+(?P<c>.+)$"))) {                                                  // sisu bespoke markup
      _tmp_header ~= "rights: \n  copyright: \"" ~ m.captures["c"] ~ "\"";
    } else if (auto m = h_.match(regex(r"^@rights:"))) {                                                         // sisu bespoke markup
      _tmp_header ~= "rights:";
    } else if (auto m = h_.match(regex(r"^\[rights\]", "m"))) {                                                  // toml markup
      _tmp_header ~= "rights:";
    } else if (auto m = h_.match(regex(r"^rights:"))) {                                                          // sdlang markup
      _tmp_header ~= "rights:";
    }
    if (h_.match(regex(r"^\s*[:]?copyright[:= ]?", "m"))) {
      if (auto m = h_.match(regex(r"^\s+:copyright:(?:\s+(?P<c>.+)|$)", "m"))) {                                 // sisu bespoke markup
        _tmp_header ~= "  copyright: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s*copyright\s*=\s*(?P<c>.+)", "m"))) {                              // toml?
        _tmp_header ~= "  copyright: " ~ m.captures["c"];
      } else if (auto m = h_.match(regex(r"^\s+<h>copyright(?:\s*\s*(?P<c>.+)|$)", "m"))) {                      // toml markup
        _tmp_header ~= "  copyright: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s+copyright(?:\s+(?P<c>.+)|\s+\\$)", "m"))) {                       // sdlang markup
        _tmp_header ~= "  copyright: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    }
    if (h_.match(regex(r"^\s*[:]?licen[cs]e[:= ]?", "m"))) {
      if (auto m = h_.match(regex(r"^\s+:licen[cs]e:(?:\s+(?P<c>.+)|$)", "m"))) {                                // sisu bespoke markup
        _tmp_header ~= "  license: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s*licen[cs]e\s*=\s*(?P<c>.+)$", "m"))) {                            // toml?
        _tmp_header ~= "  license: " ~ m.captures["c"];
      } else if (auto m = h_.match(regex(r"^\s+licen[cs]e(?:\s*\s*(?P<c>.+)|$)", "m"))) {                        // toml markup
        _tmp_header ~= "  license: " ~ "\"" ~ m.captures["c"] ~ "\"";
      } else if (auto m = h_.match(regex(r"^\s+licen[cs]e(?:\s+(?P<c>.+)|\s+\\$)", "m"))) {                      // sdlang markup
        _tmp_header ~= "  license: " ~ "\"" ~ m.captures["c"] ~ "\"";
      }
    }
  }
  if (_tmp_header.length > 0) {
    munged_header ~= _tmp_header;
  } else {
    munged_header ~= h_;
  }
}
writeln(munged_header);
#+END_SRC

*** loop doc body

#+NAME: from_previous_markups_loop_doc_body
#+BEGIN_SRC d
foreach (paragraph; paragraphs) {                                                                                  /+ loop to gather binary endnotes +/
  if ( !( type["curly_code"] == 1 || type["tic_code"] == 1)
    && paragraph.match(rgx_endnote)
  ) {
    munged_endnotes ~= replaceAll!(m => m[1])
      (paragraph, rgx_endnote);
  } else {
    if ( type["curly_code"] == 1 || type["tic_code"] == 1
      || paragraph.matchFirst(block_curly_code_open)
      || paragraph.matchFirst(block_tic_code_open)
    ) { /+ code blocks identified, no munging +/
      if ( type["curly_code"] == 1
        && paragraph.matchFirst(block_curly_code_close)
      ) {
        type["curly_code"] = 0;
      } else if (type["tic_code"] == 1
        && paragraph.matchFirst(block_tic_close)
      ) {
        type["tic_code"] = 0;
      } else if (paragraph.matchFirst(block_curly_code_open)) {
        type["curly_code"] = 1;
      } else if (paragraph.matchFirst(block_tic_code_open)) {
        type["tic_code"] = 1;
      }
      munged_contents ~= paragraph;
    } else { /+ regular content, not a code block +/
      if (auto m = paragraph.matchAll(rgx_endnote_ref)) {
        foreach (n; m) {
          endnote_ref_count++; // endnote_refs ~= (n.captures[1]);
        }
      }
      paragraph = replaceAll!(m => " \\\\ " )
        (paragraph, regex(r"\s*<(?:/\s*|:)?br>\s*")); // (paragraph, regex(r"(<br>)"));
      munged_contents ~= paragraph;
    }
  }
}
#+END_SRC

*** (loop to) adjust & output

#+NAME: from_previous_markups_loop_adjust_and_output
#+BEGIN_SRC d
{
  import std.outbuffer;
  auto buffer = new OutBuffer();
  foreach (header; munged_header) { /+ loop to inline endnotes +/
    buffer.write(header ~ "\n\n");
  }
  if (munged_endnotes.length == endnote_ref_count) {
    int endnote_count = -1;
    foreach (content; munged_contents) { /+ loop to inline endnotes +/
      content = replaceAll!(m => "~{ " ~ munged_endnotes[++endnote_count] ~ " }~" ~ m["tail"] )
        (content, rgx_endnote_ref); // endnote_ref cannot occur in a code block or else fail
      buffer.write(content ~ "\n\n");
    }
    if (buffer) {
      try {
        string dir_out = "converted_output_";
        string path_and_file_out = dir_out ~ "/" ~ filename;
        dir_out.mkdirRecurse;
        auto f = File(path_and_file_out, "w");
        f.write(buffer);
        // writeln("wrote: ", path_and_file_out);
      } catch (FileException ex) {
        writeln("did not write file");
        // Handle errors
      }
    }
  } else {
    foreach (content; munged_contents) { /+ loop to inline endnotes +/
      buffer.write(content ~ "\n\n");
    }
  }
}
#+END_SRC

*** exceptions

#+NAME: from_previous_markups_exceptions
#+BEGIN_SRC d
} catch (ErrnoException ex) {
  switch(ex.errno) {
    case EPERM:
    case EACCES: // Permission denied
      break;
    case ENOENT: // File does not exist
      break;
    default:     // Handle other errors
      break;
  }
}
#+END_SRC