From 41ea8e7bcdca22a4fefb1ed85030f7da448461dd Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 3 Jul 2023 14:19:41 -0400 Subject: homepage updates, re-read --- .gitignore | 4 +- flake.lock | 13 +- markup/.gitignore | 2 + .../spine-bespoke-output/html/homepage.index.html | 671 ++++++++++++++++++++ org/config_git.org | 4 +- org/spine-bespoke-output-homepage-html.org | 691 +++++++++++++++++++++ spine-bespoke-output/html/homepage.index.html | 539 ---------------- 7 files changed, 1373 insertions(+), 551 deletions(-) create mode 100644 markup/spine-bespoke-output/html/homepage.index.html create mode 100644 org/spine-bespoke-output-homepage-html.org delete mode 100644 spine-bespoke-output/html/homepage.index.html diff --git a/.gitignore b/.gitignore index b23a5b0..0751d71 100644 --- a/.gitignore +++ b/.gitignore @@ -57,8 +57,8 @@ !markup/pod/*/tools/po4a/po/*/*.sst.po !markup/pod/*/tools/po4a/po/*/*.ssm.po !markup/pod/*/tools/po4a/po/*/*.ssi.po -!spine-bespoke-output -!spine-bespoke-output/** +!markup/spine-bespoke-output/ +!markup/spine-bespoke-output/** .dub/** **/.dub/** tmp/** diff --git a/flake.lock b/flake.lock index 2170922..d580353 100644 --- a/flake.lock +++ b/flake.lock @@ -74,11 +74,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1687701825, - "narHash": "sha256-aMC9hqsf+4tJL7aJWSdEUurW2TsjxtDcJBwM9Y4FIYM=", + "lastModified": 1688465017, + "narHash": "sha256-xzFcCnzPOgQaX7Acprfqo+tqHJ2UKWC38pXrcqvdXHU=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "07059ee2fa34f1598758839b9af87eae7f7ae6ea", + "rev": "0d5682acc109add20f70440269587a1b169cc2fe", "type": "github" }, "original": { @@ -104,11 +104,8 @@ ] }, "locked": { - "lastModified": 1687825841, - "narHash": "sha256-xkoFpg6/xTV6/oCtAICCYs6ve7aLkDvOKIiatO+EXVM=", - "ref": "refs/heads/main", - "rev": "7a652e5b73fe5af8a01d14fd282fa2e23781a0a8", - "revCount": 591, + "lastModified": 1687983830, + "narHash": "sha256-uhZwX8E3ZxtT95POA25Hj21hSYUXik1fXSEffOa2+Ts=", "type": "git", "url": "file:///home/ralph/grotto/repo/git.repo/projects/doc-reform/code/software/spine" }, diff --git a/markup/.gitignore b/markup/.gitignore index c42c00f..ef74b64 100644 --- a/markup/.gitignore +++ b/markup/.gitignore @@ -60,6 +60,8 @@ !pod/*/media/text/po/*/*.ssm.po !pod/*/media/text/po/*/*.ssi.po !pod/*/media/text/po/*/index.html.in.po +!spine-bespoke-output/ +!spine-bespoke-output/** # skip .dub/** tmp/** diff --git a/markup/spine-bespoke-output/html/homepage.index.html b/markup/spine-bespoke-output/html/homepage.index.html new file mode 100644 index 0000000..017a083 --- /dev/null +++ b/markup/spine-bespoke-output/html/homepage.index.html @@ -0,0 +1,671 @@ + + + + + ≅ SiSU project sisudoc.org + + + + + +

≅ - SiSU for documents - structuring, publishing in multiple +formats & search

+ +

ℹ - A short description

+ +

+ +SiSU is an object-centric, lightweight markup based, document structuring, +parser, publishing and search tool for document collections. It is command line +oriented and generates static content that is currently made searchable at an +object level through an SQL database. +Markup helps define (delineate) objects (primarily various types of text block) +which are tracked in sequence, substantive objects being numbered sequentially +by the program for object citation. + +

+ +

Δ - SiSU project source

+ +

+ + Δ SiSU projects repo (git) +
+ - + https://git.sisudoc.org +
+

+ +

+ + Δ SiSU (scribe): document publishing (multiple formats + search) +
+ - + https://git.sisudoc.org/sisu +
+

+ +

+ + Δ SiSU markup samples in document pods for sisu (scribe) +
+ - + https://git.sisudoc.org/sisu-markup +
+

+ +

⌘ - SiSU Spine markup sample output

+ +

+To give an idea of how this works here is a small collection of documents marked +up for and generated by the software. The curation of topics for a collection of +specialized related documents would benefit from a consistently applied bespoke +ontology or thesaurus.
The documents presented are documents that have been +released under various creative commons licences, in the public domain, or the +author's work, with the exception of one that is under GPL and the old abandoned +Debian live-manual +

+ +

+ + ⌘ Authors + + (software curated from provided document header metadata)
+ - + https://sisudoc.org/spine/authors.html + +

+ +

+ + ⌘ Topics + + (software curated from provided document header metadata)
+ - + https://sisudoc.org/spine/topics.html + +

+ +

፨ - SiSU Spine search

+

+ + ፨ Search + + (granular search of text objects)
+ - + https://sisudoc.org/spine_search + +

+ +
+ + + +
+ +

ℹ - SiSU description

+ +

+ +SiSU is an object-centric, lightweight markup based, document structuring, +parser, publishing and search tool for document collections. It is command line +oriented and generates static content that is currently made searchable at an +object level through an SQL database. +Markup helps define (delineate) objects (primarily various types of text block) +which are tracked in sequence, substantive objects being numbered sequentially +by the program for object citation. + +

+

+ +Summary. An object is a unit of text within a document the most common +being a paragraph. Objects include individual headings, paragraphs, tables, +grouped text of various types such as code blocks and within poems, verse. +Objects have properties and attributes, of particular significance are headings +and their levels which provide document structure. A heading is an object with a +heirarchical value, that conceptually contains other objects (such as paragraphs +and possibly sub-headings etc.). Objects are tracked sequentially as they relate +to each other object within a document and substantive objects are numbered +sequentially, for citation purposes. Notably footnotes are not objects in +themselves, rather belonging to the object from which they are referenced, and +following their own numbering sequence. From heading objects (linked) tables of +content may be generated, and if additional metadata is provided book type +indexes can be generated that link back to the objects to which they relate. + +

+

+ +Unpacking this a bit further. SiSU as a concept independent of its markup +language and the parsers that have been implemented, is based on the following +ideas: + +

+

+ +Object-Centricity. On objects: In SiSU objects are the fundamental unit +from which larger constructs within a document and document itself is built. +Breaking the document into objects provides interesting possibilities. + +

+

+ +Objects are fundamental building blocks: Conceptually within SiSU, +objects are the building blocks or individual units of construction of a +document. Objects are usually blocks of text, the most common of which is the +paragraph, other examples include: individual headings, tables, grouped text of +various types which include code blocks and verse within poems, ... and as +mentioned an object could also, for example, be an image. Objects can be +formatted and placed as needed, providing flexibility and enabling multiple +types of representation across disperate formats and text recepticle, examples +including html, epub, latex (in the past mind-maps) and sql (populated at an +object level, and thereby providing search with that degree of granularity). + +

+

+ +Sequential. Objects have sequence: That objects have sequence, goes +largely without saying, this follows authorship, it is part of the definition of +a document and how a document is written to convey meaning. + +

+

+ +Object Numbers & Citation. Substantive objects are numbered for citation +purposes: Most objects within a document are meant by the author to be a +substantive part of the document. All such objects are numbered sequentially and +can be referenced thereby for citation purposes. +Object numbers provide the possibility of citing/locating text precisely across +different document formats and different languages (assuming the document has +been translated). For search it also makes it possible to identify precisely +where search criteria is met within in each document in the form of an index or +to view those precise text objects before deciding which documents are of +interest. Additionally the use of objects (and that objects are numbered) frees +the possibility to represent the document in the manner considered most suitable +to a specific document format wilst retaining its structural (and citation) +integrity). + +

+

+Characteristics. Objects have properties and attributes: Objects have +properties (and may have attributes). By properties I here refer to the +fundamental type of object, be it a heading, a paragraph, table, verse etc. +Attributes extend further and may include other things that one might wish to +associate with the object (examples not necessarily currently available/ +implemented in SiSU might include, formatting whether it is indented, or +metadata e.g. the associated language, or programming language for a code block) + +

+

+ +Document structure. Heading objects hold documents structure: Heading +objects hold documents structure through their heading level property. The types +of document of interest to SiSU have structure that is captured by the heading +level property. Headings are individual objects like any other with the +additional properties that (i) they may be regarded as containing the other +objects following them sequentially (until the next heading of a similar or +higher level), heading objects may include other headings (sub-headings), and +(ii) that they have a heirarchy, the root "heading" being the document +title.
A complication was intruduced to provide greater flexibility across +document output formats. Headings have two sets of levels, the level under which +substantive text occurs, this would be a chapter or segment level, and above +that in the heirarchy if needed are document section separators, book, section, +part. + +

+

+ +Non-objects Most but not all parts of a document are treated as objects. +Notably footnotes are not objects in themselves, rather belonging to the object +from which they are referenced, and following their own numbering sequence. From +heading objects (linked) tables of content may be generated, and if additional +metadata is provided book type indexes can be generated that link back to the +objects to which they relate. + +

+

+ +The Document Header. SiSU document have headers which contain document +metadata, at a minimum the document title and author. In addition the document +header may contain markup instruction (e.g. how to identify headings within the +document, in which case those headings need not be found and treated +accordingly) + +

+

+ +SiSU parsers have now been implemented in different programming paradigms and +languages a couple of times, the chosen markup has been left unchanged though +the document headers have been modified. + +This is the core of sisu, beyond which there is more but largely in the form of +choices based on ... existing output formats and of implementation detail, +deciding what attributes of objects, or within objects should be supported, +extending markup to allow for the generation of book indexes from if tagging +provided. + +

+ +

ℹ - SiSU Historical Descriptions

+ +

+Here is a description that has been used for the original sisu (scribe): +

+ +

+With minimal preparation of a plain-text (UTF-8) file, using sisu markup syntax +in your text editor of choice, SiSU can generate various document formats, most +of which share a common object numbering system for locating content, including +plain text, HTML, XHTML, XML, EPUB, OpenDocument text (ODF:ODT), LaTeX, PDF +files, and populate an SQL database with objects (roughly paragraph-sized +chunks) so searches may be performed and matches returned with that degree of +granularity. Think of being able to finely match text in documents, using common +object numbers, across different output formats (same object identifier for pdf, +epub or html) and across languages if you have translations of the same document +(same object identifier across languages). For search, your criteria is met by +these documents at these locations within each document (equally relevant across +different output formats and languages). To be clear (if obvious) page numbers +provide none of this functionality. Object numbering is particularly suitable +for "published" works (finalized texts as opposed to works that are frequently +changed or updated) for which it provides a fixed means of reference of content. +Document outputs can also share provided semantic meta-data. +

+ +

...

+ +

+SiSU is less about document layout than it is about finding a way using little +markup to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather different +from each other and used for different purposes, whether layout and publishing, +scrollworthy online viewing/ reading, or content search. To be able to take +advantage from its minimal preparation starting point of some of the strengths +of rather different established ways of representing documents for different +purposes, whether for search (relational database, or indexed flat files +generated for that purpose whether of complete documents, or say of files made +up of objects), online or other electronic viewing (e.g. html, xml, epub), or +paper publication (e.g. pdf via latex)... +

+ +

+The solution arrived at is to extract structural information about the document +(document sections and headings within the document, available through pattern +matching or markup) and tracking objects (which primarily are defined units of +text such as paragraphs, headings, tables, verse, etc. but also images) which +can be reconstituted as the same documents with relevant object identification +numbers so text (objects) can be referenced across different output formats and +presentations. +

+ +

+SiSU generates tables of content, and through its markup the means for metadata +to be provided for the generation of book style indexes for a document (that +again due to document object numbers are the same and equally relevant across +all document formats). Per document classifying/organizing metadata can also be +provided for automated document curation. +

+ +

+... there have also been working experiments with sisu markup source, two way +conversion/representation of sisu document markup source in mind-mapping +(software kdissert was used for its strong focus on producing documents (now +apparently called semantik)); also po4a software for translators has been used +successfuly in its regular text mode for sisu markup in translation, (which is +more an attribute of po4a than of sisu, but) which is of interest due to +sisu/spine's object citation numbering being available across translations. Open +Document Format text (odf:odt), has been an output, but much more interesting +(and requested by potential users of sisu/spine) would be the ability of a word +processor to save text/a document in sisu markup, making alternative document +processing and presentations with sisu possible. +

+ +

+also worth mention, in the relatively long history of this project, there has +been work done on extracting hash representations of each object, that could +hypothetically be shared to prove the content of a document without sharing its +content, or of identifying which objects change; these hashes can also be used +as unique identifiers in a database or as identifying filenames if individual +objects are saved. +

+ +

+SiSU has evolved, the current implementation focuses on one primary use-case, +books and literary writings. However the concept on which it is based has wider +application. Here is a prevously posted souvenir from my encounter with an IBM +software evaluator in London June 2004 that came about through a chance +encounter with an IBM manager at a Linux Expo, who was curious about my interest +in Gnu/Linux with my legal background... on hearing that I also wrote software, +he suggested, maybe IBM should have a look at it. I was interested, the meeting +was set up... with an IBM, Software Innovations evaluator
His response after +the meeting: +

+ +

+"Ralph
Good to meet with you today, I was very impressed with your +software.
[colleague's name (also posted to an IBM colleague)] - in +summary - Ralph has built an application that runs on linux and takes ASCII +documents and pulls them apart in to the smallest constituent parts, storing +them as XML, PDF and HTML, the HTML are hyperlinked up so the document can be +browsed in its full form. the format and text data created is stored in a +database.
This has potential in any place that needs the power of full text +search whilst holding the structural concepts of the document i.e. legal, +pharma, education, research.. which ones we need to figure out, ..." +

+ +

+Special interest was expressed in the search implications of SiSU. To +paraphrase, the company has document management systems dealing with hundreds of +thousands of texts, these tell you which documents match your search criteria, +but cannot inform you where within a text these matches were found without +opening the documents. This is achieved through defining document objects and +making them the building block of the document, trackable document objects (that +can be placed back in the context of the document or corpus of documents if part +of a collection). SiSU's early design was to - abstract documents to their +structure, and identified objects, numbered in a citable way (as pointed out +document object hashes can be of use for the purpose). +

+ +

ℹ - SiSU Spine

+ +

+SiSU Spine is the new generator for documents prepared in sisu markup, written +in D as opposed to the original sisu which was first shared in Ruby. +

+ +

+Spine code has not as yet been made publicly available. +

+ +

+As compared with the original sisu generator sisu spine: +

+ +

+- Spine uses the same document markup for the document body, but uses yaml for +document headers (which contains document metadata and configuration details), +the original sisu has a bespoke markup for headers. +

+ +

+- Spine (written in D) is considerably faster at generating native output than +sisu (written in Ruby), on last test at least 60 times faster (what took 1 +minute takes 1 second; 1 hour a minute :-) (admittedly some time ago, ruby has +been getting faster, hopefully this is not over over promising). +

+ +

+- Spine produces fewer document outputs types than sisu (html, epub, (odt, +latex) and populates sql db for search) +

+ +

+- As regards non-native output, so far Spine has greater separation of what it +does and largely leaves calling the external program to the user, e.g.: latex +output is a native output in the sense that it is generated directly by spine, +but the pdfs that can be produced from these are produced through use of an +external program xelatex, which produces fine output but is a very much slower +process. +

+ +

+- (where both produce the same output type, generally) Spine generally produces +more up to date output format representations. +

+ +
+

+ralph.amissah www since 1993 ;-) +

+ +
+

Some external links of interest

+ +

Development

+

Programming

+

+ [ + D - (dlang) general purpose, multi-paradigm, fast C like programming language + ] + [ + dub - package registry + ] + [ + community discussion (mail list frontend) + ]
+

+

+ [ + Ruby + ] + [ + Gems + ]
+ [ + Crystal + ]
+

+

SQL DB

+

+ [ + Sqlite - an sql database engine + ]
+ [ + PostgreSQL + ]
+

+

Markup

+

+ [ + HTML + ] + [ + multipage current spec + ] + [ + dom current spec + ]
+ [ + Epub + ]
+ [ + css - cascading style sheets + ]
+

+

+ [ + OpenDocument Format + ]
+

+

+ [ + LaTeX + ]
+

+

+ [ + po4a - maintain translations + ]
+

+

Operating System Distributions

+

+ [ + NixOS - linux based operating system built on the Nix declarative, reproducible and reliable, build system + ] + [ + nixpkgs (packages @ github) + ] + [ + package search + ] + [ + community discussion (discourse) + ]
+ Gnu [ + Guix + ] + [ + packages + ] +
+

+

+ [ + Debian - the universal operating system distribution + ]
+ [ + Devuan + ]
+

+

+ [ + Arch Linux + ] + [ + Arch Wiki + ]
+

+ +
+ +

Extraneous (external) links of personal interest

+ +

Workspace

+ +
Shell
+

+ [ + zsh + ]
+ [ + starship - customizable cross-shell prompt + ]
+

+
Terminal
+

+ [ + tilix + ] + [ + alacritty + ]
+

+
Terminal Multiplexer
+

+ [ + tmux (github) + ] + [ + screen + ]
+

+
Window Manager
+

+ [ + i3wm + ] + [ + sway + ]
+

+
Text Editors
+

+ Gnu Emacs + [ + Doom Emacs (github) + ] + [ + Org-Mode - your life in plain text & literate programming + ] + [ + Evil-Mode + ]
+

+

+ [ + Vim + ] + [ + NeoVim + ]
+

+
Source Control Manager
+

+ [ + Git + ]
+

+
Browsers
+

+ [ + vieb + ] + [ + vimb + ]
+ [ + brave + ]
+

+ +

Search

+

+ [ + DuckDuckGo + ] + [ + YubNub + ]
+

+ +

eMail

+

+ [ + Migadu + ]
+

+

+ [ + NotmuchMail + ]
+

+ +

Forges

+

+ [ + Sourcehut + ]
+

+

+ [ + CodeBerg + ]
+

+

+ [ + GitHub + ] + [ + GitLab + ]
+

+ +

Software Archives

+

+ [ + Software Heritage - the universal software archive + ]
+

+ +
+

+ralph.amissah www since 1993 ;-) +

+ + + diff --git a/org/config_git.org b/org/config_git.org index a76f0e8..1aa5744 100644 --- a/org/config_git.org +++ b/org/config_git.org @@ -79,8 +79,8 @@ !markup/pod/*/tools/po4a/po/*/*.sst.po !markup/pod/*/tools/po4a/po/*/*.ssm.po !markup/pod/*/tools/po4a/po/*/*.ssi.po -!spine-bespoke-output -!spine-bespoke-output/** +!markup/spine-bespoke-output/ +!markup/spine-bespoke-output/** .dub/** ,**/.dub/** tmp/** diff --git a/org/spine-bespoke-output-homepage-html.org b/org/spine-bespoke-output-homepage-html.org new file mode 100644 index 0000000..c980a7a --- /dev/null +++ b/org/spine-bespoke-output-homepage-html.org @@ -0,0 +1,691 @@ +-*- mode: org -*- +#+TITLE: configuration git +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :spine:hub: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2023 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes +#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t + +#+HEADER: :tangle "../markup/spine-bespoke-output/html/homepage.index.html" +#+BEGIN_SRC html + + + + + ≅ SiSU project sisudoc.org + + + + + +

≅ - SiSU for documents - structuring, publishing in multiple +formats & search

+ +

ℹ - A short description

+ +

+ +SiSU is an object-centric, lightweight markup based, document structuring, +parser, publishing and search tool for document collections. It is command line +oriented and generates static content that is currently made searchable at an +object level through an SQL database. +Markup helps define (delineate) objects (primarily various types of text block) +which are tracked in sequence, substantive objects being numbered sequentially +by the program for object citation. + +

+ +

Δ - SiSU project source

+ +

+ + Δ SiSU projects repo (git) +
+ - + https://git.sisudoc.org +
+

+ +

+ + Δ SiSU (scribe): document publishing (multiple formats + search) +
+ - + https://git.sisudoc.org/sisu +
+

+ +

+ + Δ SiSU markup samples in document pods for sisu (scribe) +
+ - + https://git.sisudoc.org/sisu-markup +
+

+ +

⌘ - SiSU Spine markup sample output

+ +

+To give an idea of how this works here is a small collection of documents marked +up for and generated by the software. The curation of topics for a collection of +specialized related documents would benefit from a consistently applied bespoke +ontology or thesaurus.
The documents presented are documents that have been +released under various creative commons licences, in the public domain, or the +author's work, with the exception of one that is under GPL and the old abandoned +Debian live-manual +

+ +

+ + ⌘ Authors + + (software curated from provided document header metadata)
+ - + https://sisudoc.org/spine/authors.html + +

+ +

+ + ⌘ Topics + + (software curated from provided document header metadata)
+ - + https://sisudoc.org/spine/topics.html + +

+ +

፨ - SiSU Spine search

+

+ + ፨ Search + + (granular search of text objects)
+ - + https://sisudoc.org/spine_search + +

+ +
+ + + +
+ +

ℹ - SiSU description

+ +

+ +SiSU is an object-centric, lightweight markup based, document structuring, +parser, publishing and search tool for document collections. It is command line +oriented and generates static content that is currently made searchable at an +object level through an SQL database. +Markup helps define (delineate) objects (primarily various types of text block) +which are tracked in sequence, substantive objects being numbered sequentially +by the program for object citation. + +

+

+ +Summary. An object is a unit of text within a document the most common +being a paragraph. Objects include individual headings, paragraphs, tables, +grouped text of various types such as code blocks and within poems, verse. +Objects have properties and attributes, of particular significance are headings +and their levels which provide document structure. A heading is an object with a +heirarchical value, that conceptually contains other objects (such as paragraphs +and possibly sub-headings etc.). Objects are tracked sequentially as they relate +to each other object within a document and substantive objects are numbered +sequentially, for citation purposes. Notably footnotes are not objects in +themselves, rather belonging to the object from which they are referenced, and +following their own numbering sequence. From heading objects (linked) tables of +content may be generated, and if additional metadata is provided book type +indexes can be generated that link back to the objects to which they relate. + +

+

+ +Unpacking this a bit further. SiSU as a concept independent of its markup +language and the parsers that have been implemented, is based on the following +ideas: + +

+

+ +Object-Centricity. On objects: In SiSU objects are the fundamental unit +from which larger constructs within a document and document itself is built. +Breaking the document into objects provides interesting possibilities. + +

+

+ +Objects are fundamental building blocks: Conceptually within SiSU, +objects are the building blocks or individual units of construction of a +document. Objects are usually blocks of text, the most common of which is the +paragraph, other examples include: individual headings, tables, grouped text of +various types which include code blocks and verse within poems, ... and as +mentioned an object could also, for example, be an image. Objects can be +formatted and placed as needed, providing flexibility and enabling multiple +types of representation across disperate formats and text recepticle, examples +including html, epub, latex (in the past mind-maps) and sql (populated at an +object level, and thereby providing search with that degree of granularity). + +

+

+ +Sequential. Objects have sequence: That objects have sequence, goes +largely without saying, this follows authorship, it is part of the definition of +a document and how a document is written to convey meaning. + +

+

+ +Object Numbers & Citation. Substantive objects are numbered for citation +purposes: Most objects within a document are meant by the author to be a +substantive part of the document. All such objects are numbered sequentially and +can be referenced thereby for citation purposes. +Object numbers provide the possibility of citing/locating text precisely across +different document formats and different languages (assuming the document has +been translated). For search it also makes it possible to identify precisely +where search criteria is met within in each document in the form of an index or +to view those precise text objects before deciding which documents are of +interest. Additionally the use of objects (and that objects are numbered) frees +the possibility to represent the document in the manner considered most suitable +to a specific document format wilst retaining its structural (and citation) +integrity). + +

+

+Characteristics. Objects have properties and attributes: Objects have +properties (and may have attributes). By properties I here refer to the +fundamental type of object, be it a heading, a paragraph, table, verse etc. +Attributes extend further and may include other things that one might wish to +associate with the object (examples not necessarily currently available/ +implemented in SiSU might include, formatting whether it is indented, or +metadata e.g. the associated language, or programming language for a code block) + +

+

+ +Document structure. Heading objects hold documents structure: Heading +objects hold documents structure through their heading level property. The types +of document of interest to SiSU have structure that is captured by the heading +level property. Headings are individual objects like any other with the +additional properties that (i) they may be regarded as containing the other +objects following them sequentially (until the next heading of a similar or +higher level), heading objects may include other headings (sub-headings), and +(ii) that they have a heirarchy, the root "heading" being the document +title.
A complication was intruduced to provide greater flexibility across +document output formats. Headings have two sets of levels, the level under which +substantive text occurs, this would be a chapter or segment level, and above +that in the heirarchy if needed are document section separators, book, section, +part. + +

+

+ +Non-objects Most but not all parts of a document are treated as objects. +Notably footnotes are not objects in themselves, rather belonging to the object +from which they are referenced, and following their own numbering sequence. From +heading objects (linked) tables of content may be generated, and if additional +metadata is provided book type indexes can be generated that link back to the +objects to which they relate. + +

+

+ +The Document Header. SiSU document have headers which contain document +metadata, at a minimum the document title and author. In addition the document +header may contain markup instruction (e.g. how to identify headings within the +document, in which case those headings need not be found and treated +accordingly) + +

+

+ +SiSU parsers have now been implemented in different programming paradigms and +languages a couple of times, the chosen markup has been left unchanged though +the document headers have been modified. + +This is the core of sisu, beyond which there is more but largely in the form of +choices based on ... existing output formats and of implementation detail, +deciding what attributes of objects, or within objects should be supported, +extending markup to allow for the generation of book indexes from if tagging +provided. + +

+ +

ℹ - SiSU Historical Descriptions

+ +

+Here is a description that has been used for the original sisu (scribe): +

+ +

+With minimal preparation of a plain-text (UTF-8) file, using sisu markup syntax +in your text editor of choice, SiSU can generate various document formats, most +of which share a common object numbering system for locating content, including +plain text, HTML, XHTML, XML, EPUB, OpenDocument text (ODF:ODT), LaTeX, PDF +files, and populate an SQL database with objects (roughly paragraph-sized +chunks) so searches may be performed and matches returned with that degree of +granularity. Think of being able to finely match text in documents, using common +object numbers, across different output formats (same object identifier for pdf, +epub or html) and across languages if you have translations of the same document +(same object identifier across languages). For search, your criteria is met by +these documents at these locations within each document (equally relevant across +different output formats and languages). To be clear (if obvious) page numbers +provide none of this functionality. Object numbering is particularly suitable +for "published" works (finalized texts as opposed to works that are frequently +changed or updated) for which it provides a fixed means of reference of content. +Document outputs can also share provided semantic meta-data. +

+ +

...

+ +

+SiSU is less about document layout than it is about finding a way using little +markup to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather different +from each other and used for different purposes, whether layout and publishing, +scrollworthy online viewing/ reading, or content search. To be able to take +advantage from its minimal preparation starting point of some of the strengths +of rather different established ways of representing documents for different +purposes, whether for search (relational database, or indexed flat files +generated for that purpose whether of complete documents, or say of files made +up of objects), online or other electronic viewing (e.g. html, xml, epub), or +paper publication (e.g. pdf via latex)... +

+ +

+The solution arrived at is to extract structural information about the document +(document sections and headings within the document, available through pattern +matching or markup) and tracking objects (which primarily are defined units of +text such as paragraphs, headings, tables, verse, etc. but also images) which +can be reconstituted as the same documents with relevant object identification +numbers so text (objects) can be referenced across different output formats and +presentations. +

+ +

+SiSU generates tables of content, and through its markup the means for metadata +to be provided for the generation of book style indexes for a document (that +again due to document object numbers are the same and equally relevant across +all document formats). Per document classifying/organizing metadata can also be +provided for automated document curation. +

+ +

+... there have also been working experiments with sisu markup source, two way +conversion/representation of sisu document markup source in mind-mapping +(software kdissert was used for its strong focus on producing documents (now +apparently called semantik)); also po4a software for translators has been used +successfuly in its regular text mode for sisu markup in translation, (which is +more an attribute of po4a than of sisu, but) which is of interest due to +sisu/spine's object citation numbering being available across translations. Open +Document Format text (odf:odt), has been an output, but much more interesting +(and requested by potential users of sisu/spine) would be the ability of a word +processor to save text/a document in sisu markup, making alternative document +processing and presentations with sisu possible. +

+ +

+also worth mention, in the relatively long history of this project, there has +been work done on extracting hash representations of each object, that could +hypothetically be shared to prove the content of a document without sharing its +content, or of identifying which objects change; these hashes can also be used +as unique identifiers in a database or as identifying filenames if individual +objects are saved. +

+ +

+SiSU has evolved, the current implementation focuses on one primary use-case, +books and literary writings. However the concept on which it is based has wider +application. Here is a prevously posted souvenir from my encounter with an IBM +software evaluator in London June 2004 that came about through a chance +encounter with an IBM manager at a Linux Expo, who was curious about my interest +in Gnu/Linux with my legal background... on hearing that I also wrote software, +he suggested, maybe IBM should have a look at it. I was interested, the meeting +was set up... with an IBM, Software Innovations evaluator
His response after +the meeting: +

+ +

+"Ralph
Good to meet with you today, I was very impressed with your +software.
[colleague's name (also posted to an IBM colleague)] - in +summary - Ralph has built an application that runs on linux and takes ASCII +documents and pulls them apart in to the smallest constituent parts, storing +them as XML, PDF and HTML, the HTML are hyperlinked up so the document can be +browsed in its full form. the format and text data created is stored in a +database.
This has potential in any place that needs the power of full text +search whilst holding the structural concepts of the document i.e. legal, +pharma, education, research.. which ones we need to figure out, ..." +

+ +

+Special interest was expressed in the search implications of SiSU. To +paraphrase, the company has document management systems dealing with hundreds of +thousands of texts, these tell you which documents match your search criteria, +but cannot inform you where within a text these matches were found without +opening the documents. This is achieved through defining document objects and +making them the building block of the document, trackable document objects (that +can be placed back in the context of the document or corpus of documents if part +of a collection). SiSU's early design was to - abstract documents to their +structure, and identified objects, numbered in a citable way (as pointed out +document object hashes can be of use for the purpose). +

+ +

ℹ - SiSU Spine

+ +

+SiSU Spine is the new generator for documents prepared in sisu markup, written +in D as opposed to the original sisu which was first shared in Ruby. +

+ +

+Spine code has not as yet been made publicly available. +

+ +

+As compared with the original sisu generator sisu spine: +

+ +

+- Spine uses the same document markup for the document body, but uses yaml for +document headers (which contains document metadata and configuration details), +the original sisu has a bespoke markup for headers. +

+ +

+- Spine (written in D) is considerably faster at generating native output than +sisu (written in Ruby), on last test at least 60 times faster (what took 1 +minute takes 1 second; 1 hour a minute :-) (admittedly some time ago, ruby has +been getting faster, hopefully this is not over over promising). +

+ +

+- Spine produces fewer document outputs types than sisu (html, epub, (odt, +latex) and populates sql db for search) +

+ +

+- As regards non-native output, so far Spine has greater separation of what it +does and largely leaves calling the external program to the user, e.g.: latex +output is a native output in the sense that it is generated directly by spine, +but the pdfs that can be produced from these are produced through use of an +external program xelatex, which produces fine output but is a very much slower +process. +

+ +

+- (where both produce the same output type, generally) Spine generally produces +more up to date output format representations. +

+ +
+

+ralph.amissah www since 1993 ;-) +

+ +
+

Some external links of interest

+ +

Development

+

Programming

+

+ [ + D - (dlang) general purpose, multi-paradigm, fast C like programming language + ] + [ + dub - package registry + ] + [ + community discussion (mail list frontend) + ]
+

+

+ [ + Ruby + ] + [ + Gems + ]
+ [ + Crystal + ]
+

+

SQL DB

+

+ [ + Sqlite - an sql database engine + ]
+ [ + PostgreSQL + ]
+

+

Markup

+

+ [ + HTML + ] + [ + multipage current spec + ] + [ + dom current spec + ]
+ [ + Epub + ]
+ [ + css - cascading style sheets + ]
+

+

+ [ + OpenDocument Format + ]
+

+

+ [ + LaTeX + ]
+

+

+ [ + po4a - maintain translations + ]
+

+

Operating System Distributions

+

+ [ + NixOS - linux based operating system built on the Nix declarative, reproducible and reliable, build system + ] + [ + nixpkgs (packages @ github) + ] + [ + package search + ] + [ + community discussion (discourse) + ]
+ Gnu [ + Guix + ] + [ + packages + ] +
+

+

+ [ + Debian - the universal operating system distribution + ]
+ [ + Devuan + ]
+

+

+ [ + Arch Linux + ] + [ + Arch Wiki + ]
+

+ +
+ +

Extraneous (external) links of personal interest

+ +

Workspace

+ +
Shell
+

+ [ + zsh + ]
+ [ + starship - customizable cross-shell prompt + ]
+

+
Terminal
+

+ [ + tilix + ] + [ + alacritty + ]
+

+
Terminal Multiplexer
+

+ [ + tmux (github) + ] + [ + screen + ]
+

+
Window Manager
+

+ [ + i3wm + ] + [ + sway + ]
+

+
Text Editors
+

+ Gnu Emacs + [ + Doom Emacs (github) + ] + [ + Org-Mode - your life in plain text & literate programming + ] + [ + Evil-Mode + ]
+

+

+ [ + Vim + ] + [ + NeoVim + ]
+

+
Source Control Manager
+

+ [ + Git + ]
+

+
Browsers
+

+ [ + vieb + ] + [ + vimb + ]
+ [ + brave + ]
+

+ +

Search

+

+ [ + DuckDuckGo + ] + [ + YubNub + ]
+

+ +

eMail

+

+ [ + Migadu + ]
+

+

+ [ + NotmuchMail + ]
+

+ +

Forges

+

+ [ + Sourcehut + ]
+

+

+ [ + CodeBerg + ]
+

+

+ [ + GitHub + ] + [ + GitLab + ]
+

+ +

Software Archives

+

+ [ + Software Heritage - the universal software archive + ]
+

+ +
+

+ralph.amissah www since 1993 ;-) +

+ + + +#+END_SRC diff --git a/spine-bespoke-output/html/homepage.index.html b/spine-bespoke-output/html/homepage.index.html deleted file mode 100644 index abf0a68..0000000 --- a/spine-bespoke-output/html/homepage.index.html +++ /dev/null @@ -1,539 +0,0 @@ - - - - - ≅ SiSU project sisudoc.org - - - - - -

≅ - SiSU for documents - structuring, publishing in multiple -formats & search

- -

ℹ - A short description

- -

-SiSU is an object-centric, lightweight markup based, document structuring, -parser, publishing and search tool for document collections. It is command line -oriented and generates static content that is also made searchable at an object -level through an SQL database. -

- -

- -SiSU markup helps define (delineate) text objects which are numbered -sequentially by the program for object citation. Breaking the document into -objects provides interesting possibilities. These object numbers provide the -possibility of citing/locating text precisely across different document formats -and different languages (assuming the document has been translated). For search -it also makes it possible to identify precisely where within in each document -search criteria is met in the form of an index. Additionally the use of objects -(and that objects are numbered) frees the possibility to represent the document -in the manner considered most suitable to a specific document format (whilst -retaining its structural (and citation) integrity). - -

- -

Δ - SiSU project source

- -

- - Δ SiSU projects repo (git) -
- - - https://git.sisudoc.org -
-

- -

- - Δ SiSU (scribe): document publishing (multiple formats + search) -
- - - https://git.sisudoc.org/sisu -
-

- -

- - Δ SiSU markup samples in document pods for sisu (scribe) -
- - - https://git.sisudoc.org/sisu-markup -
-

- -

⌘ - SiSU Spine markup sample output

- -

-To give an idea of how this works here is a small collection of documents marked -up for and generated by the software. The curation of topics for a collection of -specialized related documents would benefit from a consistently applied bespoke -ontology or thesaurus.
The documents presented are documents that have been -released under various creative commons licences, in the public domain, or the -author's work, with the exception of one that is under GPL and the old abandoned -Debian live-manual -

- -

- - ⌘ Authors - - (software curated from provided document header metadata)
- - - https://sisudoc.org/spine/authors.html - -

- -

- - ⌘ Topics - - (software curated from provided document header metadata)
- - - https://sisudoc.org/spine/topics.html - -

- -

፨ - SiSU Spine search

-

- - ፨ Search - - (granular search of text objects)
- - - https://sisudoc.org/spine_search - -

- -
- - - -
- -

ℹ - SiSU description

- -

-Here is a description that has been used for the original sisu (scribe): -

- -

-With minimal preparation of a plain-text (UTF-8) file, using sisu markup syntax -in your text editor of choice, SiSU can generate various document formats, most -of which share a common object numbering system for locating content, including -plain text, HTML, XHTML, XML, EPUB, OpenDocument text (ODF:ODT), LaTeX, PDF -files, and populate an SQL database with objects (roughly paragraph-sized -chunks) so searches may be performed and matches returned with that degree of -granularity. Think of being able to finely match text in documents, using common -object numbers, across different output formats (same object identifier for pdf, -epub or html) and across languages if you have translations of the same document -(same object identifier across languages). For search, your criteria is met by -these documents at these locations within each document (equally relevant across -different output formats and languages). To be clear (if obvious) page numbers -provide none of this functionality. Object numbering is particularly suitable -for "published" works (finalized texts as opposed to works that are frequently -changed or updated) for which it provides a fixed means of reference of content. -Document outputs can also share provided semantic meta-data. -

- -

...

- -

-SiSU is less about document layout than it is about finding a way using little -markup to construct an abstract representation of a document that makes it -possible to produce multiple representations of it which may be rather different -from each other and used for different purposes, whether layout and publishing, -scrollworthy online viewing/ reading, or content search. To be able to take -advantage from its minimal preparation starting point of some of the strengths -of rather different established ways of representing documents for different -purposes, whether for search (relational database, or indexed flat files -generated for that purpose whether of complete documents, or say of files made -up of objects), online or other electronic viewing (e.g. html, xml, epub), or -paper publication (e.g. pdf via latex)... -

- -

-The solution arrived at is to extract structural information about the document -(document sections and headings within the document, available through pattern -matching or markup) and tracking objects (which primarily are defined units of -text such as paragraphs, headings, tables, verse, etc. but also images) which -can be reconstituted as the same documents with relevant object identification -numbers so text (objects) can be referenced across different output formats and -presentations. -

- -

-SiSU generates tables of content, and through its markup the means for metadata -to be provided for the generation of book style indexes for a document (that -again due to document object numbers are the same and equally relevant across -all document formats). Per document classifying/organizing metadata can also be -provided for automated document curation. -

- -

-... there have also been working experiments with sisu markup source, two way -conversion/representation of sisu document markup source in mind-mapping -(software kdissert was used for its strong focus on producing documents (now -apparently called semantik)); also po4a software for translators has been used -successfuly in its regular text mode for sisu markup in translation, (which is -more an attribute of po4a than of sisu, but) which is of interest due to -sisu/spine's object citation numbering being available across translations. Open -Document Format text (odf:odt), has been an output, but much more interesting -(and requested by potential users of sisu/spine) would be the ability of a word -processor to save text/a document in sisu markup, making alternative document -processing and presentations with sisu possible. -

- -

-also worth mention, in the relatively long history of this project, there has -been work done on extracting hash representations of each object, that could -hypothetically be shared to prove the content of a document without sharing its -content, or of identifying which objects change; these hashes can also be used -as unique identifiers in a database or as identifying filenames if individual -objects are saved. -

- -

-SiSU has evolved, the current implementation focuses on one primary use-case, -books and literary writings. However the concept on which it is based has wider -application. Here is a prevously posted souvenir from my encounter with an IBM -software evaluator in London June 2004 that came about through a chance -encounter with an IBM manager at a Linux Expo, who was curious about my interest -in Gnu/Linux with my legal background... on hearing that I also wrote software, -he suggested, maybe IBM should have a look at it. I was interested, the meeting -was set up... with an IBM, Software Innovations evaluator
His response after -the meeting: -

- -

-"Ralph
Good to meet with you today, I was very impressed with your -software.
[colleague's name (also posted to an IBM colleague)] - in -summary - Ralph has built an application that runs on linux and takes ASCII -documents and pulls them apart in to the smallest constituent parts, storing -them as XML, PDF and HTML, the HTML are hyperlinked up so the document can be -browsed in its full form. the format and text data created is stored in a -database.
This has potential in any place that needs the power of full text -search whilst holding the structural concepts of the document i.e. legal, -pharma, education, research.. which ones we need to figure out, ..." -

- -

-Special interest was expressed in the search implications of SiSU. To -paraphrase, the company has document management systems dealing with hundreds of -thousands of texts, these tell you which documents match your search criteria, -but cannot inform you where within a text these matches were found without -opening the documents. This is achieved through defining document objects and -making them the building block of the document, trackable document objects (that -can be placed back in the context of the document or corpus of documents if part -of a collection). SiSU's early design was to - abstract documents to their -structure, and identified objects, numbered in a citable way (as pointed out -document object hashes can be of use for the purpose). -

- -

ℹ - SiSU Spine

- -

-SiSU Spine is the new generator for documents prepared in sisu markup, written -in D as opposed to the original sisu which was first shared in Ruby. -

- -

-Spine code has not as yet been made publicly available. -

- -

-As compared with the original sisu generator sisu spine: -

- -

-- Spine uses the same document markup for the document body, but uses yaml for -document headers (which contains document metadata and configuration details), -the original sisu has a bespoke markup for headers. -

- -

-- Spine (written in D) is considerably faster at generating native output than -sisu (written in Ruby), on last test at least 60 times faster (what took 1 -minute takes 1 second; 1 hour a minute :-) (admittedly some time ago, ruby has -been getting faster, hopefully this is not over over promising). -

- -

-- Spine produces fewer document outputs types than sisu (html, epub, (odt, -latex) and populates sql db for search) -

- -

-- As regards non-native output, so far Spine has greater separation of what it -does and largely leaves calling the external program to the user, e.g.: latex -output is a native output in the sense that it is generated directly by spine, -but the pdfs that can be produced from these are produced through use of an -external program xelatex, which produces fine output but is a very much slower -process. -

- -

-- (where both produce the same output type, generally) Spine generally produces -more up to date output format representations. -

- -
-

-ralph.amissah www since 1993 ;-) -

- -
-

Some external links of interest

- -

Development

-

Programming

-

- [ - D - (dlang) general purpose, multi-paradigm, fast C like programming language - ] - [ - dub - package registry - ] - [ - community discussion (mail list frontend) - ]
-

-

- [ - Ruby - ] - [ - Gems - ]
- [ - Crystal - ]
-

-

SQL DB

-

- [ - Sqlite - an sql database engine - ]
- [ - PostgreSQL - ]
-

-

Markup

-

- [ - HTML - ] - [ - multipage current spec - ] - [ - dom current spec - ]
- [ - Epub - ]
- [ - css - cascading style sheets - ]
-

-

- [ - OpenDocument Format - ]
-

-

- [ - LaTeX - ]
-

-

- [ - po4a - maintain translations - ]
-

-

Operating System Distributions

-

- [ - NixOS - linux based operating system built on the Nix declarative, reproducible and reliable, build system - ] - [ - nixpkgs (packages @ github) - ] - [ - package search - ] - [ - community discussion (discourse) - ]
- Gnu [ - Guix - ] - [ - packages - ] -
-

-

- [ - Debian - the universal operating system distribution - ]
- [ - Devuan - ]
-

-

- [ - Arch Linux - ] - [ - Arch Wiki - ]
-

- -
- -

Extraneous (external) links of personal interest

- -

Workspace

- -
Shell
-

- [ - zsh - ]
- [ - starship - customizable cross-shell prompt - ]
-

-
Terminal
-

- [ - tilix - ] - [ - alacritty - ]
-

-
Terminal Multiplexer
-

- [ - tmux (github) - ] - [ - screen - ]
-

-
Window Manager
-

- [ - i3wm - ] - [ - sway - ]
-

-
Text Editors
-

- Gnu Emacs - [ - Doom Emacs (github) - ] - [ - Org-Mode - your life in plain text & literate programming - ] - [ - Evil-Mode - ]
-

-

- [ - Vim - ] - [ - NeoVim - ]
-

-
Source Control Manager
-

- [ - Git - ]
-

-
Browsers
-

- [ - vieb - ] - [ - vimb - ]
- [ - brave - ]
-

- -

Search

-

- [ - DuckDuckGo - ] - [ - YubNub - ]
-

- -

eMail

-

- [ - Migadu - ]
-

-

- [ - NotmuchMail - ]
-

- -

Forges

-

- [ - Sourcehut - ]
-

-

- [ - CodeBerg - ]
-

-

- [ - GitHub - ] - [ - GitLab - ]
-

- -

Software Archives

-

- [ - Software Heritage - the universal software archive - ]
-

- -
-

-ralph.amissah www since 1993 ;-) -

- - - -- cgit v1.2.3