#+TITLE: sdp output sqlite discrete #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+KEYWORDS #+LANGUAGE: en #+STARTUP: indent content #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+OPTIONS: author:nil email:nil creator:nil timestamp:nil #+PROPERTY: header-args :padline no :exports code :noweb yes #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+FILETAGS: :sdp:niu:output: #+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) * sql ** 0. module, templates #+BEGIN_SRC d :tangle ../src/sdp/output/sqlite_discrete.d module sdp.output.sqlite_discrete; <<output_imports>> template SQLiteDiscreteBuildTablesAndPopulate() { void SQLiteDiscreteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, auto ref I doc_matters, ) { import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { <<sanitize_text_for_search>> <<sanitize_and_mmunge_inline_html>> <<html_objects>> <<sqlite_load_object>> <<hub_format_and_sqlite_load_objects>> } return sqlite_format_and_load_objects(); } } template SQLiteInstruct() { Statement SQLiteInstruct(I)( auto ref I doc_matters, ) { <<sqlite_db_initialize>> <<sqlite_table_initialize>> <<sqlite_create_table_metadata_and_src_txt>> /* */<<sqlite_create_table_urls>> <<sqlite_create_table_objects>> <<sqlite_close_initialize>> <<sqlite_insert_metadata_and_src_text>> //<<sqlite_insert_urls>> <<sqlite_insert_doc_objects>> } } template SQLiteObjectsLoop() { void SQLiteObjectsLoop(P)( auto ref P doc_parts, ) { <<sqlite_objects_loop>> } } SQLiteObjectsLoop!()(doc_matters.keys_seq.sql); } } template SQLiteDiscreteTablesCreate() { void SQLiteDiscreteTablesCreate()() { writeln("table create"); } } template SQLiteDiscreteTablesDrop() { void SQLiteDiscreteTablesDrop()() { writeln("table drop"); } } #+END_SRC ** 1. imports #+name: output_imports #+BEGIN_SRC d import sdp.output; import std.file, std.conv : to; #+END_SRC ** 2. format and load template *** 1. prepare objects (munge, sanitize, markup) **** 1. _text_ generic munge (sanitize text for search) - [3/4] (search text, applies to all but code blocks) - [ ] remove whitespace, paragraph on single line (formatting kept so far) - [X] remove font face attributes - [X] move embedded endnotes - [X] place after text object - [X] remove embedded endnote numbers (rely on html output to represent) - [X] urls - [X] clean url markers, leave plain link text - [X] place urls after text object and its endnotes #+name: sanitize_text_for_search #+BEGIN_SRC d auto generic_munge_sanitize_text_for_search( string _txt, ) { string _notes; string _urls; _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); } if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { if (m["link"].match(rgx.url)) { _urls ~= "\n" ~ m["link"]; } } _txt = _txt.replaceAll(rgx.inline_link_clean, ""); } if (_notes.length > 0) { _txt ~= _notes; } if (_urls.length > 0) { _txt ~= _urls; } if (doc_matters.opt_action.debug_do) { writeln(_txt, "\n"); } debug(sql_text_clean) { writeln(_txt); } return _txt; } #+END_SRC **** 2. _html_ light html objects - place endnotes after text object - light inline html markup ***** munge ****** general munge (special characters, inline markup, move notes) #+name: sanitize_and_mmunge_inline_html #+BEGIN_SRC d auto munge_html(O)( auto return ref const O obj, ) { string _html_special_characters(string _txt){ _txt = (_txt) .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } string _html_font_face(string _txt){ _txt = (_txt) .replaceAll(rgx.inline_emphasis, ("<em>$1</em>")) .replaceAll(rgx.inline_bold, ("<b>$1</b>")) .replaceAll(rgx.inline_underscore, ("<u>$1</u>")) .replaceAll(rgx.inline_italics, ("<i>$1</i>")) .replaceAll(rgx.inline_superscript, ("<sup>$1</sup>")) .replaceAll(rgx.inline_subscript, ("<sub>$1</sub>")) .replaceAll(rgx.inline_strike, ("<del>$1</del>")) .replaceAll(rgx.inline_insert, ("<ins>$1</ins>")) .replaceAll(rgx.inline_mono, ("<tt>$1</tt>")) .replaceAll(rgx.inline_cite, ("<cite>$1</cite>")); return _txt; } string _notes; string _urls; string _txt = _html_font_face(_html_special_characters(obj.text)); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "<sup>$1</sup> "); } if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { } _txt = _txt.replaceAll(rgx.inline_link_clean, ""); } if (_notes.length > 0) { _txt ~= _notes; } if (doc_matters.opt_action.debug_do) { writeln(_txt, "\n"); } return _txt; } #+END_SRC ****** special characters #+name: sanitize_and_mmunge_inline_html #+BEGIN_SRC d string html_special_characters(string _txt){ _txt = (_txt) .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } #+END_SRC ****** special characters for code #+name: sanitize_and_mmunge_inline_html #+BEGIN_SRC d string html_special_characters_code(string _txt){ _txt = (_txt) .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " "); return _txt; } #+END_SRC ****** font_face #+name: sanitize_and_mmunge_inline_html #+BEGIN_SRC d string html_font_face(string _txt){ _txt = (_txt) .replaceAll(rgx.inline_emphasis, ("<em>$1</em>")) .replaceAll(rgx.inline_bold, ("<b>$1</b>")) .replaceAll(rgx.inline_underscore, ("<u>$1</u>")) .replaceAll(rgx.inline_italics, ("<i>$1</i>")) .replaceAll(rgx.inline_superscript, ("<sup>$1</sup>")) .replaceAll(rgx.inline_subscript, ("<sub>$1</sub>")) .replaceAll(rgx.inline_strike, ("<del>$1</del>")) .replaceAll(rgx.inline_insert, ("<ins>$1</ins>")) .replaceAll(rgx.inline_mono, ("<tt>$1</tt>")) .replaceAll(rgx.inline_cite, ("<cite>$1</cite>")); return _txt; } #+END_SRC ***** objects ****** heading #+name: html_objects #+BEGIN_SRC d auto html_heading(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"><b> %s </b></p>¶", obj.is_a, _txt, ); return o; } #+END_SRC ******* +fancy+ ##+name: prepare_objects_html #+BEGIN_SRC d auto html_heading(O)( auto return ref const O obj, ) { string o; string _txt = munge_html(obj); o = format(q"¶<h%s class="%s"> %s </h%s>¶", obj.heading_lev_markup, obj.is_a, _txt, obj.heading_lev_markup, ); return o; } #+END_SRC ****** para #+name: html_objects #+BEGIN_SRC d auto html_para(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); _txt = (obj.bullet) ? ("●  " ~ _txt) : _txt; string o = format(q"¶<p class="%s" indent="h%si%s"> %s </p>¶", obj.is_a, obj.indent_hang, obj.indent_base, _txt ); return o; } #+END_SRC ****** quote #+name: html_objects #+BEGIN_SRC d auto html_quote(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"> %s </p>¶", obj.is_a, _txt ); return o; } #+END_SRC ****** group #+name: html_objects #+BEGIN_SRC d auto html_group(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"> %s </p>¶", obj.is_a, _txt ); return o; } #+END_SRC ****** block #+name: html_objects #+BEGIN_SRC d auto html_block(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶ <p class="%s">%s</p>¶", obj.is_a, _txt.stripRight ); return o; } #+END_SRC ****** verse #+name: html_objects #+BEGIN_SRC d auto html_verse(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s">%s</p>¶", obj.is_a, _txt ); return o; } #+END_SRC ****** code #+name: html_objects #+BEGIN_SRC d auto html_code(O)( auto return ref const O obj, ) { string _txt = html_special_characters_code(obj.text); string o = format(q"¶<p class="%s">%s</p>¶", obj.is_a, _txt ); return o; } #+END_SRC ****** table #+name: html_objects #+BEGIN_SRC d auto html_table(O)( auto return ref const O obj, ) { auto _tablarize(O)( auto return ref const O obj, string _txt, ) { string[] _table_rows = (_txt).split(rgx.table_delimiter_row); string[] _table_cols; string _table; string _tablenote; foreach(row_idx, row; _table_rows) { _table_cols = row.split(rgx.table_delimiter_col); _table ~= "<tr>"; foreach(col_idx, cell; _table_cols) { if ((_table_cols.length == 1) && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) _tablenote ~= cell; } else { string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td"; string _align = ("style=\"text-align:" ~ ((obj.table_column_aligns[col_idx] == "l") ? "left\"" : "right\"")); _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; _table ~= cell; _table ~= "</" ~ _col_is ~ ">"; } } _table ~= "</tr>"; } auto t = tuple( _table, _tablenote, ); return t; } string _txt = munge_html(obj); auto t = _tablarize(obj, _txt); _txt = t[0]; string _note = t[1]; string o = format(q"¶<p class="%s"> <table summary="normal text css" width="95%%" border="0" bgcolor="white" cellpadding="2" align="center"> %s </table> %s </p>¶", obj.is_a, _txt, _note ); return o; } #+END_SRC *** 2. _sqlite_ (loop preparation & pre-loop action) ***** initialize: prepare statements drop & create db & tables anew ****** d sqlite statements (initialize database) { #+name: sqlite_db_initialize #+BEGIN_SRC d auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.language); auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); // auto db = Database(":memory:"); // open database in memory db.run(" #+END_SRC ******* 0. drop table if exists #+name: sqlite_table_initialize #+BEGIN_SRC sql DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; #+END_SRC ******* 1. create tables ******** metadata & src text #+name: sqlite_create_table_metadata_and_src_txt #+BEGIN_SRC sql CREATE TABLE metadata_and_text ( tid BIGINT PRIMARY KEY, title VARCHAR(800) NOT NULL, title_main VARCHAR(400) NULL, /*NOT*/ title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, creator_editor VARCHAR(600) NULL, creator_contributor VARCHAR(600) NULL, creator_illustrator VARCHAR(600) NULL, creator_photographer VARCHAR(600) NULL, creator_translator VARCHAR(600) NULL, creator_prepared_by VARCHAR(600) NULL, creator_digitized_by VARCHAR(600) NULL, creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, language_document_char VARCHAR(6) NULL, /*NOT*/ language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, date_available VARCHAR(10) NULL, date_created VARCHAR(10) NULL, date_issued VARCHAR(10) NULL, date_modified VARCHAR(10) NULL, date_published VARCHAR(10) NULL, date_valid VARCHAR(10) NULL, date_translated VARCHAR(10) NULL, date_original_publication VARCHAR(10) NULL, date_generated VARCHAR(10) NULL, publisher VARCHAR(600) NULL, original_publisher VARCHAR(600) NULL, original_language VARCHAR(100) NULL, original_language_char VARCHAR(6) NULL, original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, original_title VARCHAR(800) NULL, rights_copyright VARCHAR(2500) NULL, rights_copyright_audio VARCHAR(2500) NULL, rights_copyright_cover VARCHAR(2500) NULL, rights_copyright_illustrations VARCHAR(2500) NULL, rights_copyright_photographs VARCHAR(2500) NULL, rights_copyright_text VARCHAR(2500) NULL, rights_copyright_translation VARCHAR(2500) NULL, rights_copyright_video VARCHAR(2500) NULL, rights_license VARCHAR(2500) NULL, identifier_oclc VARCHAR(30) NULL, identifier_isbn VARCHAR(16) NULL, classify_topic_register VARCHAR(2500) NULL, classify_subject VARCHAR(600) NULL, classify_loc VARCHAR(30) NULL, classify_dewey VARCHAR(30) NULL, classify_keywords VARCHAR(600) NULL, notes_abstract TEXT NULL, notes_description TEXT NULL, notes_comment TEXT NULL, notes_coverage VARCHAR(200) NULL, notes_relation VARCHAR(200) NULL, notes_history VARCHAR(600) NULL, notes_type VARCHAR(600) NULL, notes_format VARCHAR(600) NULL, notes_prefix TEXT NULL, notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, src_filename VARCHAR(256) NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, src_text TEXT NULL, fulltext TEXT NULL, links TEXT NULL ); #+END_SRC ******** doc_objects #+name: sqlite_create_table_objects #+BEGIN_SRC sql CREATE TABLE doc_objects ( lid BIGINT PRIMARY KEY, metadata_tid BIGINT REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, book_idx TEXT NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, lev0 SMALLINT, lev1 SMALLINT, lev2 SMALLINT, lev3 SMALLINT, lev4 SMALLINT, lev5 SMALLINT, lev6 SMALLINT, lev7 SMALLINT, en_a SMALLINT NULL, en_z SMALLINT NULL, en_a_asterisk SMALLINT NULL, en_z_asterisk SMALLINT NULL, en_a_plus SMALLINT NULL, en_z_plus SMALLINT NULL, t_of VARCHAR(16), t_is VARCHAR(16), node VARCHAR(16) NULL, parent VARCHAR(16) NULL, digest_clean CHAR(256), digest_all CHAR(256), types CHAR(1) NULL ) #+END_SRC ****** d } #+name: sqlite_close_initialize #+BEGIN_SRC d "); #+END_SRC ***** prepare insert statements and do pre-loop inserts ****** 1. _metadata & src_ text ******* sql statement ******** d { sql statement #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC d Statement insert_metadata = db.prepare(" #+END_SRC tid document number unique either: - increment by adding 1 for each document, - make hash of document filename or url and use? ********* sql insert into #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC sql INSERT INTO metadata_and_text ( tid, title, title_main, title_sub, title_short, title_edition, title_note, title_language, title_language_char, creator_author, creator_author_email, creator_author_hon, creator_author_nationality, creator_editor, creator_contributor, creator_illustrator, creator_photographer, creator_translator, creator_prepared_by, creator_digitized_by, creator_audio, creator_video, language_document, language_document_char, language_original, language_original_char, date_added_to_site, date_available, date_created, date_issued, date_modified, date_published, date_valid, date_translated, date_original_publication, date_generated, publisher, original_publisher, original_language, original_language_char, original_source, original_institution, original_nationality, original_title, rights_copyright, rights_copyright_audio, rights_copyright_cover, rights_copyright_illustrations, rights_copyright_photographs, rights_copyright_text, rights_copyright_translation, rights_copyright_video, rights_license, identifier_oclc, identifier_isbn, classify_topic_register, classify_subject, classify_loc, classify_dewey, classify_keywords, notes_abstract, notes_description, notes_comment, notes_coverage, notes_relation, notes_history, notes_type, notes_format, notes_prefix, notes_prefix_a, notes_prefix_b, notes_suffix, src_filename, src_fingerprint, src_filesize, src_wordcount, src_text, fulltext, links ) #+END_SRC ********* sql values #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC sql VALUES ( :tid, :title, :title_main, :title_sub, :title_short, :title_edition, :title_note, :title_language, :title_language_char, :creator_author, :creator_author_email, :creator_author_hon, :creator_author_nationality, :creator_editor, :creator_contributor, :creator_illustrator, :creator_photographer, :creator_translator, :creator_prepared_by, :creator_digitized_by, :creator_audio, :creator_video, :language_document, :language_document_char, :language_original, :language_original_char, :date_added_to_site, :date_available, :date_created, :date_issued, :date_modified, :date_published, :date_valid, :date_translated, :date_original_publication, :date_generated, :publisher, :original_publisher, :original_language, :original_language_char, :original_source, :original_institution, :original_nationality, :original_title, :rights_copyright, :rights_copyright_audio, :rights_copyright_cover, :rights_copyright_illustrations, :rights_copyright_photographs, :rights_copyright_text, :rights_copyright_translation, :rights_copyright_video, :rights_license, :identifier_oclc, :identifier_isbn, :classify_topic_register, :classify_subject, :classify_loc, :classify_dewey, :classify_keywords, :notes_abstract, :notes_description, :notes_comment, :notes_coverage, :notes_relation, :notes_history, :notes_type, :notes_format, :notes_prefix, :notes_prefix_a, :notes_prefix_b, :notes_suffix, :src_filename, :src_fingerprint, :src_filesize, :src_wordcount, :src_text, :fulltext, :links ) #+END_SRC ******** d } #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC d "); #+END_SRC ******* TODO insert document_metadata #+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC d // insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); // insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); // insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); // insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); insert_metadata.execute(); insert_metadata.reset(); /+ watch +/ writeln("sql statement executed"); assert(db.totalChanges == 1); #+END_SRC ###+name: sqlite_insert_metadata_and_src_text #+BEGIN_SRC d insert_metadata.inject( 1, doc_matters.dochead_meta["title"]["full"], "", "", "", "", "", "", "", doc_matters.dochead_meta["creator"]["author"] ); #+END_SRC ****** 2. _doc objects_ (used with doc_objects in document loop) ******* prepare sql statement ******** d { sql statement #+name: sqlite_insert_doc_objects #+BEGIN_SRC d Statement insert_doc_objects = db.prepare(" #+END_SRC ********* sql insert into lid unique, increment by 1 per object, not ocn metadata tid document number unique either: - increment by adding 1 for each document, - make hash of document filename or url and use? #+name: sqlite_insert_doc_objects #+BEGIN_SRC sql INSERT INTO doc_objects ( lid, metadata_tid, ocn, ocnd, ocns, clean, body, book_idx, seg, lev_an, lev, lev0, lev1, lev2, lev3, lev4, lev5, lev6, lev7, en_a, en_z, en_a_asterisk, en_z_asterisk, en_a_plus, en_z_plus, t_of, t_is, node, parent, digest_clean, digest_all, types ) #+END_SRC ********* sql values #+name: sqlite_insert_doc_objects #+BEGIN_SRC sql VALUES ( :lid, :metadata_tid, :ocn, :ocnd, :ocns, :clean, :body, :book_idx, :seg, :lev_an, :lev, :lev0, :lev1, :lev2, :lev3, :lev4, :lev5, :lev6, :lev7, :en_a, :en_z, :en_a_asterisk, :en_z_asterisk, :en_a_plus, :en_z_plus, :t_of, :t_is, :node, :parent, :digest_clean, :digest_all, :types ) #+END_SRC ******** d } #+name: sqlite_insert_doc_objects #+BEGIN_SRC d "); return insert_doc_objects; #+END_SRC ******* TODO (within loop not here - insert doc objects work out *** 3. hub (sqlite_format_and_load_objects) ***** sql related #+name: sqlite_load_object #+BEGIN_SRC d auto sqlite_load_string(O,Dm)( auto return ref const O obj, auto return ref Dm doc_matters, ) { string o; return o; } #+END_SRC #+name: sqlite_load_object #+BEGIN_SRC d auto postgresql_load_string(O,Dm)( auto return ref const O obj, auto return ref Dm doc_matters, ) { string o; return o; } #+END_SRC #+name: sqlite_load_object #+BEGIN_SRC d string sqlite_statement(O)( auto return ref const O obj, string _txt, string _html, ) { void _sql_exe(O)( string _sql, ) { writeln(_html); writeln(_sql); } string _sql; return _sql; } #+END_SRC ***** heading #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto heading(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_heading(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** para #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto para(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_para(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** quote #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto quote(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_quote(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** group #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto group(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_group(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** block #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto block(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_block(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** verse #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto verse(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_verse(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** code #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto code(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_code(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ***** table #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d auto table(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_table(obj) ]; if (doc_matters.opt_action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } #+END_SRC ** 3. loop, identify, load - loop template #+name: sqlite_objects_loop #+BEGIN_SRC d Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { case "frontmatter": assert(part == "head", part); switch (obj.is_of) { case "para": switch (obj.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(obj); break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); } break; } break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); } break; } break; case "body": // assert(part == "body", part); // TODO broken switch (obj.is_of) { case "para": switch (obj.is_a) { case "heading": debug (asserts) { // TODO consider and fix or remove if (part != "body") { writeln(__LINE__, ": ", obj.text); } } obj_txt = format_and_sqlite_load.heading(obj); break; case "para": obj_txt = format_and_sqlite_load.para(obj); break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); } break; } break; case "block": switch (obj.is_a) { case "quote": obj_txt = format_and_sqlite_load.quote(obj); break; case "group": obj_txt = format_and_sqlite_load.group(obj); break; case "block": obj_txt = format_and_sqlite_load.block(obj); break; case "poem": // double check on keeping both poem & verse break; case "verse": obj_txt = format_and_sqlite_load.verse(obj); break; case "code": obj_txt = format_and_sqlite_load.code(obj); break; case "table": obj_txt = format_and_sqlite_load.table(obj); break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); } break; } break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); } break; } break; case "backmatter": assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_seg" || "blurb" || "tail", part); switch (obj.is_of) { case "para": switch (obj.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(obj); break; case "endnote": assert(part == "endnotes", part); obj_txt = format_and_sqlite_load.para(obj); break; case "glossary": assert(part == "glossary", part); obj_txt = format_and_sqlite_load.para(obj); break; case "bibliography": assert(part == "bibliography", part); obj_txt = format_and_sqlite_load.para(obj); break; case "bookindex": assert(part == "bookindex_seg", part); obj_txt = format_and_sqlite_load.para(obj); break; case "blurb": assert(part == "blurb", part); obj_txt = format_and_sqlite_load.para(obj); break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); } break; } break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); } break; } break; case "comment": break; default: if ((doc_matters.opt_action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from } break; } if (obj.is_a == "heading") { writeln( "markup: ", obj.heading_lev_markup, "> ", obj.dom_markedup, "; collapsed: ", obj.heading_lev_collapsed, "> ", obj.dom_collapsed, "; ocn: ", obj.ocn, " node: ", obj.node, "; parent: ", obj.parent_lev_markup, "; ocn: ", obj.parent_ocn, "; ", ); } insert_doc_objects.bind(":t_of", obj.is_of); insert_doc_objects.bind(":t_is", obj.is_a); insert_doc_objects.bind(":ocn", obj.ocn); insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search insert_doc_objects.bind(":body", obj_txt["html"]); // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search insert_doc_objects.bind(":lev", obj.heading_lev_markup); // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info // insert_doc_objects.bind(":lev0", ""); // insert_doc_objects.bind(":lev1", ""); // insert_doc_objects.bind(":lev2", ""); // insert_doc_objects.bind(":lev3", ""); // insert_doc_objects.bind(":lev4", ""); // insert_doc_objects.bind(":lev5", ""); // insert_doc_objects.bind(":lev6", ""); // insert_doc_objects.bind(":lev7", ""); // insert_doc_objects.bind(":node", ""); // insert_doc_objects.bind(":type", ""); // insert_doc_objects.bind(":parent_ocn", ""); // insert_doc_objects.bind(":ancestors", ""); // insert_doc_objects.bind(":heading_lev_markup", ""); // insert_doc_objects.bind(":heading_lev_collapsed", ""); // insert_doc_objects.bind(":parent_lev_markup", ""); // insert_doc_objects.bind(":heading_ancestors", ""); // insert_doc_objects.bind(":node", ""); insert_doc_objects.execute(); insert_doc_objects.reset(); } } #+END_SRC