From aa47d1db8596aa65746db05d369441d1def62aa4 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:12:46 -0400 Subject: db sql table and column structure changes, name prefix "sisu_v2a_" resulting sisu version bump 2.1.0, plus other lesser fixes [Note: it is necessary to create new database and tables and to populate them] * db (sql) database table name and column structure changes, new pgsql db name prefix "sisu_v2a_" (version bump), continue to review (db_columns, db_create, db_import, db_sqltxt) * db remove and update fix, match filename for removal with = (not LIKE or ~) * db sqlite, issue with --recreate, bugfix (db_drop) --- lib/sisu/v2/constants.rb | 18 +- lib/sisu/v2/db_columns.rb | 2072 +++++++++++++++++++++++++++++++++++++++--- lib/sisu/v2/db_create.rb | 463 +++++----- lib/sisu/v2/db_drop.rb | 80 +- lib/sisu/v2/db_import.rb | 321 +------ lib/sisu/v2/db_indexes.rb | 24 +- lib/sisu/v2/db_load_tuple.rb | 176 +++- lib/sisu/v2/db_remove.rb | 10 +- lib/sisu/v2/db_select.rb | 18 +- lib/sisu/v2/db_sqltxt.rb | 115 +++ 10 files changed, 2581 insertions(+), 716 deletions(-) create mode 100644 lib/sisu/v2/db_sqltxt.rb (limited to 'lib') diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 9abe9c0b..3fcb1e3a 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -111,7 +111,6 @@ Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/ Dx[:url_o]='‹'; Dx[:url_c]='›' Dx[:url_o_xml]='<'; Dx[:url_c_xml]='>' Dx[:rel_o]='‹'; Dx[:rel_c]='›' -Db[:name_prefix]="SiSU#{SiSU_version_dir}_" Tex[:backslash]="\\\\" Tex[:backslash]="\\\\" Tex[:tilde]='\\\\\\~' @@ -132,6 +131,23 @@ Px[:lv4]= '-' Px[:lv5]= '.' Px[:lv6]= '.' #Px[:lv5_6]= '.' +Db[:name_prefix]="SiSU#{SiSU_version_dir}a_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_" +Db[:col_title]=800 +Db[:col_title_part]=400 +Db[:col_title_edition]=10 +Db[:col_name]=600 +Db[:col_creator_misc_short]=100 +Db[:col_language]=100 +Db[:col_language_char]=3 +Db[:col_date_text]=10 +Db[:col_classify_txt_long]=600 +Db[:col_classify_txt_short]=600 +Db[:col_classify_short]=200 +Db[:col_classify_identify]=256 +Db[:col_classify_library]=30 +Db[:col_classify_small]=16 +Db[:col_filename]=256 __END__ consider: 〔comment〕 diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb index 1849a442..ee66c59e 100644 --- a/lib/sisu/v2/db_columns.rb +++ b/lib/sisu/v2/db_columns.rb @@ -58,154 +58,1934 @@ =end module SiSU_DB_columns - class Column_size - def lt_title - 600 - end - def lt_subtitle - 600 - end - def lt_author - 600 - end - def lt_author_title - 100 - end - def lt_author_nationality - 100 - end - def lt_illustrator - 600 - end - def lt_translator - 600 - end - def lt_prepared_by - 600 - end - def lt_digitized_by - 600 - end - def lt_subject - 600 - end - def lt_date - 10 - end - def lt_type - 600 - end - def lt_description - 2000 - end - def lt_publisher - 600 - end - def lt_contributor - 600 - end - def lt_format - 600 - end - def lt_identifier - 256 - end - def lt_source - 200 - end - def lt_language - 30 - end - def lt_language_char - 3 - end - def lt_language_original - 30 - end - def lt_language_original_char - 3 - end - def lt_relation - 100 - end - def lt_coverage - 100 - end - def lt_rights - 2000 - end - def lt_copyright - 2000 - end - def lt_owner - 600 - end - def lt_keywords - 600 - end - def lt_comment - 600 - end - def lt_loc - 30 - end - def lt_dewey - 30 - end - def lt_isbn - 16 - end - def lt_pg - 16 - end - def lt_abstract - 600 - end - def lt_skin - 100 + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb + class Columns < SiSU_DB_text::Prepare + def initialize(md=nil) + @md=md + @db=SiSU_Env::Info_db.new #watch + if defined? md.mod \ + and md.mod.inspect=~/import|update/ \ + and FileTest.exist?(md.fns) + txt_arr=IO.readlines(md.fns,'') + src=txt_arr.join("\n") + if @db.share_source? + @sisutxt=special_character_escape(src) + else @sisutxt='' + end + @fulltext=clean_searchable_text(txt_arr) + else @sisutxt,@fulltext='','' + end end - def lt_markup - 100 - end - def lt_links - 100 - end - def lt_information - 100 - end - def lt_contact - 100 - end - def lt_suffix - 600 - end - def lt_filename - 256 - end - def lt_types - 1 - end - def lt_subj - 64 - end - def lt_orig_pub - 400 - end - def lt_orig_pub_date - 400 - end - def lt_orig_pub_institution - 200 - end - def lt_orig_pub_nationality - 200 - end - def lt_writing_focus_nationality - 100 - end - def lt_topic_register - 2000 +#% structures + #def column_define + # def varchar(name,size) + # "#{name} VARCHAR(#{size}) NULL," + # end + #end +=begin +#% title +@title: + :subtitle: + :short: + :edition: + :language: + :note: +=end + def column + def title # DublinCore 1 - title + def name + 'title' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata full document title [DC1]';} + end + def tuple + t=if defined? @md.title.full \ + and @md.title.full=~/\S+/ + txt=@md.title.full + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_main + def name + 'title_main' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata main document title';} + end + def tuple + t=if defined? @md.title.main \ + and @md.title.main=~/\S+/ + txt=@md.title.main + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_sub + def name + 'title_sub' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document subtitle';} + end + def tuple + t=if defined? @md.title.sub \ + and @md.title.sub=~/\S+/ + txt=@md.title.sub + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_short + def name + 'title_short' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document short title if any';} + end + def tuple + t=if defined? @md.title.short \ + and @md.title.short=~/\S+/ + txt=@md.title.short + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_edition + def name + 'title_edition' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document edition (version)';} + end + def tuple + t=if defined? @md.title.edition \ + and @md.title.edition=~/\S+/ + txt=@md.title.edition + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_note + def name + 'title_note' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes associated with title';} + end + def tuple + t=if defined? @md.title.note \ + and @md.title.note=~/\S+/ + txt=@md.title.note + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language + def name + 'title_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language [DC12]';} + end + def tuple + t=if defined? @md.title.language \ + and @md.title.language=~/\S+/ + txt=@md.title.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language_char # consider + def name + 'title_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language iso code';} + end + def tuple + t=if defined? @md.title.language_char \ + and @md.title.language_char=~/\S+/ + txt=@md.title.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% creator +@creator: + :author: + :contributor: + :illustrator: + :photographer: + :translator: + :prepared_by: + :digitized_by: + :audio: + :video: +=end + def creator_author # DublinCore 2 - creator/author (author) + def name + 'creator_author' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author (creator) [DC2]';} + end + def tuple + t=if defined? @md.creator.author_detail \ + and @md.creator.author_detail.class==Array \ + and @md.creator.author_detail.length > 0 + txt='' + @md.creator.author_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_honorific # consider + def name + 'creator_author_hon' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} + end + def tuple + t=if defined? @md.creator.author_hon \ + and @md.creator.author_hon=~/\S+/ + txt=@md.creator.author_hon + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_nationality # consider + def name + 'creator_author_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata nationality of document author (creator)';} + end + def tuple + t=if defined? @md.creator.author_nationality_detail \ + and @md.creator.author_nationality=~/\S+/ + txt=@md.creator.author_nationality_detail + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_contributor # DublinCore 6 - contributor + def name + 'creator_contributor' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document contributor name(s) [DC6]';} + end + def tuple + t=if defined? @md.creator.contributor_detail \ + and @md.creator.contributor_detail.class==Array \ + and @md.creator.contributor_detail.length > 0 + txt=@md.creator.contributor_detail #dc + txt='' + @md.creator.contributor_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_illustrator + def name + 'creator_illustrator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document illustrator name(s)';} + end + def tuple + t=if defined? @md.creator.illustrator_detail \ + and @md.creator.illustrator_detail.class==Array \ + and @md.creator.illustrator_detail.length > 0 + txt=@md.creator.illustrator_detail + txt='' + @md.creator.illustrator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_photographer + def name + 'creator_photographer' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document photographer name(s)';} + end + def tuple + t=if defined? @md.creator.photographer_detail \ + and @md.creator.photographer_detail.class==Array \ + and @md.creator.photographer_detail.length > 0 + txt=@md.creator.photographer_detail + txt='' + @md.creator.photographer_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_translator + def name + 'creator_translator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document translator name(s)';} + end + def tuple + t=if defined? @md.creator.translator_detail \ + and @md.creator.translator_detail.class==Array \ + and @md.creator.translator_detail.length > 0 + txt='' + @md.creator.translator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_prepared_by + def name + 'creator_prepared_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document prepared by name(s)';} + end + def tuple + t=if defined? @md.creator.prepared_by_detail \ + and @md.creator.prepared_by_detail.class==Array \ + and @md.creator.prepared_by_detail.length > 0 + txt=@md.creator.prepared_by_detail + txt='' + @md.creator.prepared_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_digitized_by + def name + 'creator_digitized_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document digitized by name(s)';} + end + def tuple + t=if defined? @md.creator.digitized_by_detail \ + and @md.creator.digitized_by_detail.class==Array \ + and @md.creator.digitized_by_detail.length > 0 + txt=@md.creator.digitized_by_detail + txt='' + @md.creator.digitized_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_audio + def name + 'creator_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document audio by name(s)';} + end + def tuple + t=if defined? @md.creator.audio_detail \ + and @md.creator.audio_detail.class==Array \ + and @md.creator.audio_detail.length > 0 + txt=@md.creator.audio_detail + txt='' + @md.creator.audio_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_video + def name + 'creator_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document video by name(s)';} + end + def tuple + t=if defined? @md.creator.video_detail \ + and @md.creator.video_detail.class==Array \ + and @md.creator.video_detail.length > 0 + txt='' + @md.creator.video_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% language +#taken from other fields +@title: + :language: +@original: + :language: +#not available --> +#@language: +# :document: +# :original: +=end + def language_document + def name + 'language_document' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document \ + and @md.language.document=~/\S+/ + txt=@md.language.document + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_document_char + def name + 'language_document_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document_char \ + and @md.language.document_char=~/\S+/ + txt=@md.language.document_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original + def name + 'language_original' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata original document/text language';} + end + def tuple + t=if defined? @md.language.original \ + and @md.language.original=~/\S+/ + txt=@md.language.original + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original_char + def name + 'language_original_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.original_char \ + and @md.language.original_char=~/\S+/ + txt=@md.language.original_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% date +@date: + :added_to_site: + :available: + :created: + :issued: + :modified: + :published: + :valid: + :translated: + :original_publication: +=end + def date_added_to_site + def name + 'date_added_to_site' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + #"#{name} DATE," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site';} + end + def tuple + t=if defined? @md.date.added_to_site \ + and @md.date.added_to_site=~/\S+/ + txt=@md.date.added_to_site + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_available + def name + 'date_available' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site [DC]';} + end + def tuple + t=if defined? @md.date.available \ + and @md.date.available=~/\S+/ + txt=@md.date.available + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_created + def name + 'date_created' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date created [DC]';} + end + def tuple + t=if defined? @md.date.created \ + and @md.date.created=~/\S+/ + txt=@md.date.created + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_issued + def name + 'date_issued' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date issued [DC]';} + end + def tuple + t=if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ + txt=@md.date.issued + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_modified + def name + 'date_modified' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date modified [DC]';} + end + def tuple + t=if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ + txt=@md.date.modified + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_published + def name + 'date_published' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date published [DC7]';} + end + def tuple + t=if defined? @md.date.published \ + and @md.date.published=~/\S+/ + txt=@md.date.published + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_valid + def name + 'date_valid' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date valid [DC]';} + end + def tuple + t=if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ + txt=@md.date.valid + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_translated + def name + 'date_translated' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date translated';} + end + def tuple + t=if defined? @md.date.translated \ + and @md.date.translated=~/\S+/ + txt=@md.date.translated + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_original_publication + def name + 'date_original_publication' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of original publication';} + end + def tuple + t=if defined? @md.date.original_publication \ + and @md.date.original_publication=~/\S+/ + txt=@md.date.original_publication + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_generated + def name + 'date_generated' + end + def create_column #choose other representation of time + "#{name} VARCHAR(30) NULL," + #"#{name} VARCHAR(10) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of sisu generation of document, automatically populated';} + end + def tuple #choose other representation of time + t=if defined? @md.generated \ + and @md.generated.to_s=~/\S+/ + txt=@md.generated.to_s + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% publisher +@publisher: +=end + def publisher + def name + 'publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document publisher [DC5]';} + end + def tuple + t=if defined? @md.publisher \ + and @md.publisher=~/\S+/ + txt=@md.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +##% current +# def current_publisher +# def name +# 'current_publisher' +# end +# def size +# 10 +# end +# def create_column +# "#{name} VARCHAR(#{current_publisher.size}) NULL," +# end +# def tuple +# t=if defined? @md.current.publisher \ +# and @md.current.publisher=~/\S+/ +# txt=@md.current.publisher +# special_character_escape(txt) +# "'#{txt}', " +# end +# end +# self +# end +=begin +#% original +@original: + :publisher: + #:date: #repeated under date + :language: + :institution: + :nationality: + :source: +=end + def original_publisher + def name + 'original_publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original publisher [DC5]';} + end + def tuple + t=if defined? @md.original.publisher \ + and @md.original.publisher=~/\S+/ + txt=@md.original.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language + def name + 'original_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language';} + end + def tuple + t=if defined? @md.original.language \ + and @md.original.language=~/\S+/ + txt=@md.original.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language_char # consider + def name + 'original_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language iso character';} + end + def tuple + t=if defined? @md.original.language_char \ + and @md.original.language_char=~/\S+/ + txt=@md.original.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_source + def name + 'original_source' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original source [DC11]';} + end + def tuple + t=if defined? @md.original.source \ + and @md.original.source=~/\S+/ + txt=@md.original.source + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_institution + def name + 'original_institution' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original institution';} + end + def tuple + t=if defined? @md.original.institution \ + and @md.original.institution=~/\S+/ + txt=@md.original.institution + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_nationality + def name + 'original_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original nationality';} + end + def tuple + t=if defined? @md.original.nationality \ + and @md.original.nationality=~/\S+/ + txt=@md.original.nationality + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% rights +@rights: + #:copyright: #mapped to :text: used where no other copyrights and included in :all: + :text: + :translation: + :illustrations: + :photographs: + :preparation: + :digitization: + :audio: + :video: + :license: + :all: +=end + def rights_all + def name + 'rights' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata rights associated with document (composite) [DC15]';} + end + def tuple + t=if defined? @md.rights.all \ + and @md.rights.all=~/\S+/ + txt=@md.rights.all + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_text + def name + 'rights_copyright_text' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text';} + end + def tuple + t=if defined? @md.rights.copyright_text \ + and @md.rights.copyright_text=~/\S+/ + txt=@md.rights.copyright_text + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_translation + def name + 'rights_copyright_translation' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text translation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_translation \ + and @md.rights.copyright_translation=~/\S+/ + txt=@md.rights.copyright_translation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_illustrations + def name + 'rights_copyright_illustrations' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text illustrations (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_illustrations \ + and @md.rights.copyright_illustrations=~/\S+/ + txt=@md.rights.copyright_illustrations + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_photographs + def name + 'rights_copyright_photographs' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text photographs (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_photographs \ + and @md.rights.copyright_photographs=~/\S+/ + txt=@md.rights.copyright_photographs + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_preparation + def name + 'rights_copyright_preparation' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text preparation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_preparation \ + and @md.rights.copyright_preparation=~/\S+/ + txt=@md.rights.copyright_preparation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_digitization + def name + 'rights_copyright_digitization' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text digitization (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_digitization \ + and @md.rights.copyright_digitization=~/\S+/ + txt=@md.rights.copyright_digitization + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_audio + def name + 'rights_copyright_audio' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text audio (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_audio \ + and @md.rights.copyright_audio=~/\S+/ + txt=@md.rights.copyright_audio + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_video + def name + 'rights_copyright_video' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text video (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_video \ + and @md.rights.copyright_video=~/\S+/ + txt=@md.rights.copyright_video + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_license + def name + 'rights_license' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata license granted for use of document if any)';} + end + def tuple + t=if defined? @md.rights.license \ + and @md.rights.license=~/\S+/ + txt=@md.rights.license + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% classify +@classify: + :topic_register: + :coverage: + :format: + :identifier: + :keywords: + :relation: + :subject: + :type: + :loc: + :dewey: + :pg: + :isbn: +=end + def classify_topic_register + def name + 'classify_topic_register' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_long]}) NULL," + #"#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document topic register (semi-structured document subject information)';} + end + def tuple + t=if defined? @md.classify.topic_register \ + and @md.classify.topic_register=~/\S+/ + txt=@md.classify.topic_register + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_subject + def name + 'classify_subject' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document subject matter [DC3]';} + end + def tuple + t=if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ + txt=@md.classify.subject + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_type #check + def name + 'classify_type' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document type [DC8]';} + end + def tuple + t=if defined? @md.classify.type \ + and @md.classify.type=~/\S+/ + txt=@md.classify.type + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_loc + def name + 'classify_loc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document library of congress (if available)';} + end + def tuple + t=if defined? @md.classify.loc \ + and @md.classify.loc=~/\S+/ + txt=@md.classify.loc + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_dewey + def name + 'classify_dewey' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document dewey (if available)';} + end + def tuple + t=if defined? @md.classify.dewey \ + and @md.classify.dewey=~/\S+/ + txt=@md.classify.dewey + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_pg + def name + 'classify_pg' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document project gutenberg (if any)';} + end + def tuple + t=if defined? @md.classify.pg \ + and @md.classify.pg=~/\S+/ + txt=@md.classify.pg + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_isbn + def name + 'classify_isbn' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document isbn (if any)';} + end + def tuple + t=if defined? @md.classify.isbn \ + and @md.classify.isbn=~/\S+/ + txt=@md.classify.isbn + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_format + def name + 'classify_format' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document format [DC9]';} + end + def tuple + t=if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ + txt=@md.classify.format + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_identifier + def name + 'classify_identifier' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_identify]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document identifier [DC10]';} + end + def tuple + t=if defined? @md.classify.identifier \ + and @md.classify.identifier=~/\S+/ + txt=@md.classify.identifier + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_relation + def name + 'classify_relation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document relation [DC13]';} + end + def tuple + t=if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ + txt=@md.classify.relation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_coverage + def name + 'classify_coverage' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document coverage [DC14]';} + end + def tuple + t=if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ + txt=@md.classify.coverage + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_keywords + def name + 'classify_keywords' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document keywords';} + end + def tuple + t=if defined? @md.classify.keywords \ + and @md.classify.keywords=~/\S+/ + txt=@md.classify.keywords + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% notes +@notes: + :abstract: + :comment: + :description: + :history: + :prefix: + :prefix_a: + :prefix_b: + :suffix: +=end + def notes_abstract + def name + 'notes_abstract' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes abstract';} + end + def tuple + t=if defined? @md.notes.abstract \ + and @md.notes.abstract=~/\S+/ + txt=@md.notes.abstract + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_comment + def name + 'notes_comment' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes comment';} + end + def tuple + t=if defined? @md.notes.comment \ + and @md.notes.comment=~/\S+/ + txt=@md.notes.comment + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_description + def name + 'notes_description' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes description [DC4]';} + end + def tuple + t=if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ + txt=@md.notes.description + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_history #check, consider removal + def name + 'notes_history' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes history';} + end + def tuple + t=if defined? @md.notes.history \ + and @md.notes.history=~/\S+/ + txt=@md.notes.history + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix + def name + 'notes_prefix' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix';} + end + def tuple + t=if defined? @md.notes.prefix \ + and @md.notes.prefix=~/\S+/ + txt=@md.notes.prefix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_a + def name + 'notes_prefix_a' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_a';} + end + def tuple + t=if defined? @md.notes.prefix_a \ + and @md.notes.prefix_a=~/\S+/ + txt=@md.notes.prefix_a + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_b + def name + 'notes_prefix_b' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_b';} + end + def tuple + t=if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b=~/\S+/ + txt=@md.notes.prefix_b + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_suffix + def name + 'notes_suffix' + end + def create_column # keep text + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes suffix';} + end + def tuple + t=if defined? @md.notes.suffix \ + and @md.notes.suffix=~/\S+/ + txt=@md.notes.suffix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% misc +@make: + :skin: +@links: +=end + def filename + def name + 'filename' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document filename';} + end + def tuple + t=if defined? @md.fns \ + and @md.fns=~/\S+/ + txt=@md.fns + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def sisutxt # consider naming sisusrc + def name + 'sisutxt' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup text (if shared)';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@sisutxt}', "] + else ['',''] + end + end + self + end + def fulltext + def name + 'fulltext' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document full text clean, searchable';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@fulltext}', "] + else ['',''] + end + end + self + end + def word_count + def name + 'word_count' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document word count';} + end + def tuple + t=if defined? @md.wc_words \ + and @md.wc_words=~/\S+/ + txt=@md.wc_words + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def digest + def name + 'dgst' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document hash digest sha256 (or md5)';} + end + def tuple + t=if defined? @md.dgst \ + and @md.dgst=~/\S+/ + txt=@md.dgst + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin_name #check + def name + 'skin_name' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document skin name';} + end + def tuple + t=if defined? @md.notes.skin_name \ + and @md.notes.skin_name=~/\S+/ + txt=@md.notes.skin_name + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin # you likely want a separate table for skins + def name + 'skin' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document skin';} + end + def tuple + t=if defined? @md.skin \ + and @md.skin=~/\S+/ + txt=@md.skin + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def links + def name + 'links' + end + def create_column + "#{name} TEXT NULL," + #"#{name} VARCHAR(#{links.size}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document links';} + end + def tuple + t=if defined? @md.notes.links \ + and @md.notes.links=~/\S+/ + txt=@md.notes.links + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + self end + end + class Column_size def document_clean # restriction not necessary 60000 end diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index feba670c..8ed638dc 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -59,7 +59,7 @@ =end module SiSU_DB_create require "#{SiSU_lib}/db_columns" # db_columns.rb - class Create < SiSU_DB_columns::Column_size + class Create < SiSU_DB_columns::Columns require "#{SiSU_lib}/sysenv" # sysenv.rb @@dl=nil def initialize(opt,conn,file,sql_type='pg') @@ -82,7 +82,7 @@ module SiSU_DB_create @env=SiSU_Env::Info_env.new(@opt.fns) tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) tell.colorize unless @opt.cmd =~/q/ - SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub end def output_dir? dir=SiSU_Env::Info_env.new('') @@ -91,102 +91,129 @@ module SiSU_DB_create end end def create_table - def metadata + def metadata_and_text print %{ currently using sisu dbi module - to be populated from documents files - create tables metadata + to be populated from document files + create tables metadata_and_text data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ - CREATE TABLE metadata ( + CREATE TABLE metadata_and_text ( tid BIGINT PRIMARY KEY, - title VARCHAR(#{lt_title}) NULL, - subtitle VARCHAR(#{lt_subtitle}) NULL, - author VARCHAR(#{lt_author}) NULL, -/* plan to replace creator field, currently used, with author field */ - creator VARCHAR(#{lt_author}) NULL, - author_title VARCHAR(#{lt_author_title}) NULL, - author_nationality VARCHAR(#{lt_author_nationality}) NULL, - illustrator VARCHAR(#{lt_illustrator}) NULL, - translator VARCHAR(#{lt_translator}) NULL, - subject VARCHAR(#{lt_subject}) NULL, - date VARCHAR(#{lt_date}) NULL, - date_added_to_site VARCHAR(#{lt_date}) NULL, - date_created VARCHAR(#{lt_date}) NULL, - date_issued VARCHAR(#{lt_date}) NULL, - date_available VARCHAR(#{lt_date}) NULL, - date_valid VARCHAR(#{lt_date}) NULL, - date_modified VARCHAR(#{lt_date}) NULL, - date_translated VARCHAR(#{lt_date}) NULL, -/* date DATE, */ -/* date_added_to_site DATE, */ -/* date_created DATE, */ -/* date_issued DATE, */ -/* date_available DATE, */ -/* date_valid DATE, */ -/* date_modified DATE, */ -/* date_translated DATE, */ - type VARCHAR(#{lt_type}) NULL, - description VARCHAR(#{lt_description}) NULL, - publisher VARCHAR(#{lt_publisher}) NULL, - contributor VARCHAR(#{lt_contributor}) NULL, - prepared_by VARCHAR(#{lt_prepared_by}) NULL, - digitized_by VARCHAR(#{lt_digitized_by}) NULL, - format VARCHAR(#{lt_format}) NULL, - identifier VARCHAR(#{lt_identifier}) NULL, - source VARCHAR(#{lt_source}) NULL, - language VARCHAR(#{lt_language}) NULL, - language_original VARCHAR(#{lt_language_original}) NULL, - relation VARCHAR(#{lt_relation}) NULL, - coverage VARCHAR(#{lt_coverage}) NULL, - rights VARCHAR(#{lt_rights}) NULL, - copyright VARCHAR(#{lt_copyright}) NULL, - owner VARCHAR(#{lt_owner}) NULL, - keywords VARCHAR(#{lt_keywords}) NULL, - comment VARCHAR(#{lt_comment}) NULL, - loc VARCHAR(#{lt_loc}) NULL, - dewey VARCHAR(#{lt_dewey}) NULL, - isbn VARCHAR(#{lt_isbn}) NULL, - pg VARCHAR(#{lt_pg}) NULL, - abstract VARCHAR(#{lt_abstract}) NULL, - prefix_a TEXT NULL, - prefix_b TEXT NULL, - skin VARCHAR(#{lt_skin}) NULL, - markup VARCHAR(#{lt_markup}) NULL, - links VARCHAR(#{lt_links}) NULL, - information VARCHAR(#{lt_information}) NULL, - contact VARCHAR(#{lt_contact}) NULL, - suffix VARCHAR(#{lt_suffix}) NULL, - filename VARCHAR(#{lt_filename}) NULL UNIQUE, - types CHAR(#{lt_types}) NULL, - subj VARCHAR(#{lt_subj}) NULL, - original_publication VARCHAR(#{lt_orig_pub}) NULL, - original_publication_date VARCHAR(#{lt_orig_pub_date}) NULL, - original_publication_institution VARCHAR(#{lt_orig_pub_institution}) NULL, - original_publication_nationality VARCHAR(#{lt_orig_pub_nationality}) NULL, - writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL, - topic_register VARCHAR(#{lt_topic_register}) NULL + /* title */ + #{column.title.create_column} + #{column.title_main.create_column} + #{column.title_sub.create_column} + #{column.title_short.create_column} + #{column.title_edition.create_column} + #{column.title_note.create_column} + #{column.title_language.create_column} + #{column.title_language_char.create_column} + /* creator */ + #{column.creator_author.create_column} + #{column.creator_author_honorific.create_column} + #{column.creator_author_nationality.create_column} + #{column.creator_contributor.create_column} + #{column.creator_illustrator.create_column} + #{column.creator_photographer.create_column} + #{column.creator_translator.create_column} + #{column.creator_prepared_by.create_column} + #{column.creator_digitized_by.create_column} + #{column.creator_audio.create_column} + #{column.creator_video.create_column} + /* language */ + #{column.language_document.create_column} + #{column.language_document_char.create_column} + #{column.language_original.create_column} + #{column.language_original_char.create_column} + /* date */ + #{column.date_added_to_site.create_column} + #{column.date_available.create_column} + #{column.date_created.create_column} + #{column.date_issued.create_column} + #{column.date_modified.create_column} + #{column.date_published.create_column} + #{column.date_valid.create_column} + #{column.date_translated.create_column} + #{column.date_original_publication.create_column} + #{column.date_generated.create_column} + /* publisher */ + #{column.publisher.create_column} + /* original */ + #{column.original_publisher.create_column} + #{column.original_language.create_column} + #{column.original_language_char.create_column} + #{column.original_source.create_column} + #{column.original_institution.create_column} + #{column.original_nationality.create_column} + /* rights */ + #{column.rights_all.create_column} + #{column.rights_copyright_text.create_column} + #{column.rights_copyright_translation.create_column} + #{column.rights_copyright_illustrations.create_column} + #{column.rights_copyright_photographs.create_column} + #{column.rights_copyright_preparation.create_column} + #{column.rights_copyright_digitization.create_column} + #{column.rights_copyright_audio.create_column} + #{column.rights_copyright_video.create_column} + #{column.rights_license.create_column} + /* classify */ + #{column.classify_topic_register.create_column} + #{column.classify_subject.create_column} + #{column.classify_type.create_column} + #{column.classify_loc.create_column} + #{column.classify_dewey.create_column} + #{column.classify_pg.create_column} + #{column.classify_isbn.create_column} + #{column.classify_format.create_column} + #{column.classify_identifier.create_column} + #{column.classify_relation.create_column} + #{column.classify_coverage.create_column} + #{column.classify_keywords.create_column} + /* notes */ + #{column.notes_abstract.create_column} + #{column.notes_comment.create_column} + #{column.notes_description.create_column} + #{column.notes_history.create_column} + #{column.notes_prefix.create_column} + #{column.notes_prefix_a.create_column} + #{column.notes_prefix_b.create_column} + #{column.notes_suffix.create_column} + /* misc */ + #{column.filename.create_column} + #{column.sisutxt.create_column} + #{column.fulltext.create_column} + #{column.word_count.create_column} + #{column.digest.create_column} + #{column.skin_name.create_column} + #{column.skin.create_column} + #{column.links.create_column.gsub(/,$/,'')} +/* subj VARCHAR(64) NULL, */ +/* contact VARCHAR(100) NULL, */ +/* information VARCHAR(100) NULL, */ +/* types CHAR(1) NULL, */ +/* writing_focus_nationality VARCHAR(100) NULL, */ ); }) - @comment.psql.metadata if @comment + @comment.psql.metadata_and_text if @comment end - def documents # create documents base + def doc_objects # create doc_objects base print %{ to be populated from documents files - create tables documents document_trade document_env + create tables doc_objects data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ - CREATE TABLE documents ( + CREATE TABLE doc_objects ( lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata, + metadata_tid BIGINT REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - seg VARCHAR(#{document_seg}) NULL, + seg VARCHAR(120) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, lev1 SMALLINT, @@ -210,7 +237,7 @@ module SiSU_DB_create types CHAR(1) NULL ); }) - @comment.psql.documents if @comment + @comment.psql.doc_objects if @comment end def endnotes print %{ @@ -221,7 +248,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -229,7 +256,7 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes if @comment @@ -243,7 +270,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes_asterisk ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -251,7 +278,7 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes_asterisk if @comment @@ -265,7 +292,7 @@ module SiSU_DB_create @conn.execute(%{ CREATE TABLE endnotes_plus ( nid BIGINT PRIMARY KEY, - document_lid BIGINT REFERENCES documents, + document_lid BIGINT REFERENCES doc_objects, nr SMALLINT, clean TEXT NULL, body TEXT NULL, @@ -273,21 +300,21 @@ module SiSU_DB_create ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), - metadata_tid BIGINT REFERENCES metadata + metadata_tid BIGINT REFERENCES metadata_and_text ); }) @comment.psql.endnotes_plus if @comment end - def urls # create documents file links mapping + def urls # create doc_objects file links mapping print %{ currently using sisu dbi module - to be populated from documents files + to be populated from doc_objects files create tables urls data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE urls ( - metadata_tid BIGINT REFERENCES metadata, + metadata_tid BIGINT REFERENCES metadata_and_text, plaintext varchar(512), html_toc varchar(512), html_doc varchar(512), @@ -311,7 +338,7 @@ module SiSU_DB_create self end end - class Comment + class Comment < SiSU_DB_columns::Columns def initialize(conn,sql_type='pg') @conn=conn if sql_type =~ /pg/; psql @@ -325,162 +352,146 @@ module SiSU_DB_create end end end - def metadata + def metadata_and_text sql_arr=[ - %{COMMENT ON Table metadata - IS 'contains SiSU documents metadata with metadata';}, - %{COMMENT ON COLUMN metadata.tid + %{COMMENT ON Table metadata_and_text + IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, + %{COMMENT ON COLUMN metadata_and_text.tid IS 'unique';}, - %{COMMENT ON COLUMN metadata.filename - IS 'document filename';}, - %{COMMENT ON COLUMN metadata.title - IS 'metadata title (dublin core element 1)';}, - %{COMMENT ON COLUMN metadata.subtitle - IS 'document subtitle';}, - %{COMMENT ON COLUMN metadata.creator - IS 'metadata creator (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.author - IS 'metadata author (dublin core element 2)';}, - %{COMMENT ON COLUMN metadata.illustrator - IS 'metadata illustrator';}, - %{COMMENT ON COLUMN metadata.translator - IS 'metadata translator';}, - %{COMMENT ON COLUMN metadata.subject - IS 'metadata subject (dublin core element 3)';}, - %{COMMENT ON COLUMN metadata.date - IS 'metadata date (dublin core element 7)';}, - %{COMMENT ON COLUMN metadata.date_created - IS 'metadata date created (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_issued - IS 'metadata date of issue (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_available - IS 'metadata date available (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_valid - IS 'metadata date valid (dublin core)';}, - %{COMMENT ON COLUMN metadata.date_modified - IS 'metadata date modified (dublin core)';}, - %{COMMENT ON COLUMN metadata.type - IS 'metadata type (dublin core element 8)';}, - %{COMMENT ON COLUMN metadata.description - IS 'metadata description (dublin core element 4)';}, - %{COMMENT ON COLUMN metadata.publisher - IS 'metadata publisher (dublin core element 5)';}, - %{COMMENT ON COLUMN metadata.contributor - IS 'metadata contributor (dublin core element 6)';}, - %{COMMENT ON COLUMN metadata.prepared_by - IS 'metadata markup prepared by';}, - %{COMMENT ON COLUMN metadata.digitized_by - IS 'metadata digitized by';}, - %{COMMENT ON COLUMN metadata.format - IS 'metadata format (dublin core element 9)';}, - %{COMMENT ON COLUMN metadata.identifier - IS 'metadata identifier (dublin core element 10)';}, - %{COMMENT ON COLUMN metadata.source - IS 'metadata source (dublin core element 11)';}, - %{COMMENT ON COLUMN metadata.language - IS 'metadata language (dublin core element 12)';}, - %{COMMENT ON COLUMN metadata.language_original - IS 'metadata original language';}, - %{COMMENT ON COLUMN metadata.relation - IS 'metadata (dublin core element 13)';}, - %{COMMENT ON COLUMN metadata.coverage - IS 'metadata coverage (dublin core element 14)';}, - %{COMMENT ON COLUMN metadata.rights - IS 'metadata rights / copyright / license (dublin core element 15)';}, - %{COMMENT ON COLUMN metadata.owner - IS 'metadata owner';}, - %{COMMENT ON COLUMN metadata.keywords - IS 'metadata keywords';}, - %{COMMENT ON COLUMN metadata.comment - IS 'metadata comment';}, - %{COMMENT ON COLUMN metadata.abstract - IS 'metadata abstract';}, - %{COMMENT ON COLUMN metadata.loc - IS 'metadata library of congress';}, - %{COMMENT ON COLUMN metadata.dewey - IS 'metadata dewey';}, - %{COMMENT ON COLUMN metadata.isbn - IS 'metadata isbn';}, - %{COMMENT ON COLUMN metadata.pg - IS 'metadata project gutenberg number';}, - %{COMMENT ON COLUMN metadata.prefix_a - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.prefix_b - IS 'metadata prefix';}, - %{COMMENT ON COLUMN metadata.skin - IS 'metadata sisu skin';}, - %{COMMENT ON COLUMN metadata.markup - IS 'metadata markup source';}, - %{COMMENT ON COLUMN metadata.links - IS 'metadata links';}, - %{COMMENT ON COLUMN metadata.information - IS 'metadata information';}, - %{COMMENT ON COLUMN metadata.contact - IS 'metadata contact';}, - %{COMMENT ON COLUMN metadata.suffix - IS 'metadata sisu suffix (output related)';}, - %{COMMENT ON COLUMN metadata.filename - IS 'metadata source filename';}, - %{COMMENT ON COLUMN metadata.types - IS 'document types scroll 1, seg 2, both 3';}, - %{COMMENT ON COLUMN metadata.subj - IS 'subject areas - no way to populate at present as not mapped';}, + %{#{column.title.column_comment}}, + %{#{column.title_main.column_comment}}, + %{#{column.title_sub.column_comment}}, + %{#{column.title_short.column_comment}}, + %{#{column.title_edition.column_comment}}, + %{#{column.title_note.column_comment}}, + %{#{column.title_language.column_comment}}, + %{#{column.title_language_char.column_comment}}, + %{#{column.creator_author.column_comment}}, + %{#{column.creator_author_honorific.column_comment}}, + %{#{column.creator_author_nationality.column_comment}}, + %{#{column.creator_contributor.column_comment}}, + %{#{column.creator_illustrator.column_comment}}, + %{#{column.creator_photographer.column_comment}}, + %{#{column.creator_translator.column_comment}}, + %{#{column.creator_prepared_by.column_comment}}, + %{#{column.creator_digitized_by.column_comment}}, + %{#{column.creator_audio.column_comment}}, + %{#{column.creator_video.column_comment}}, + %{#{column.language_document.column_comment}}, + %{#{column.language_document_char.column_comment}}, + %{#{column.language_original.column_comment}}, + %{#{column.language_original_char.column_comment}}, + %{#{column.date_added_to_site.column_comment}}, + %{#{column.date_available.column_comment}}, + %{#{column.date_created.column_comment}}, + %{#{column.date_issued.column_comment}}, + %{#{column.date_modified.column_comment}}, + %{#{column.date_published.column_comment}}, + %{#{column.date_valid.column_comment}}, + %{#{column.date_translated.column_comment}}, + %{#{column.date_original_publication.column_comment}}, + %{#{column.date_generated.column_comment}}, + %{#{column.publisher.column_comment}}, + %{#{column.original_publisher.column_comment}}, + %{#{column.original_language.column_comment}}, + %{#{column.original_language_char.column_comment}}, + %{#{column.original_source.column_comment}}, + %{#{column.original_institution.column_comment}}, + %{#{column.original_nationality.column_comment}}, + %{#{column.rights_all.column_comment}}, + %{#{column.rights_copyright_text.column_comment}}, + %{#{column.rights_copyright_translation.column_comment}}, + %{#{column.rights_copyright_illustrations.column_comment}}, + %{#{column.rights_copyright_photographs.column_comment}}, + %{#{column.rights_copyright_preparation.column_comment}}, + %{#{column.rights_copyright_digitization.column_comment}}, + %{#{column.rights_copyright_audio.column_comment}}, + %{#{column.rights_copyright_video.column_comment}}, + %{#{column.rights_license.column_comment}}, + %{#{column.classify_topic_register.column_comment}}, + %{#{column.classify_subject.column_comment}}, + %{#{column.classify_type.column_comment}}, + %{#{column.classify_loc.column_comment}}, + %{#{column.classify_dewey.column_comment}}, + %{#{column.classify_pg.column_comment}}, + %{#{column.classify_isbn.column_comment}}, + %{#{column.classify_format.column_comment}}, + %{#{column.classify_identifier.column_comment}}, + %{#{column.classify_relation.column_comment}}, + %{#{column.classify_coverage.column_comment}}, + %{#{column.classify_keywords.column_comment}}, + %{#{column.notes_abstract.column_comment}}, + %{#{column.notes_comment.column_comment}}, + %{#{column.notes_description.column_comment}}, + %{#{column.notes_history.column_comment}}, + %{#{column.notes_prefix.column_comment}}, + %{#{column.notes_prefix_a.column_comment}}, + %{#{column.notes_prefix_b.column_comment}}, + %{#{column.notes_suffix.column_comment}}, + %{#{column.filename.column_comment}}, + %{#{column.sisutxt.column_comment}}, + %{#{column.fulltext.column_comment}}, + %{#{column.word_count.column_comment}}, + %{#{column.digest.column_comment}}, + %{#{column.skin_name.column_comment}}, + %{#{column.skin.column_comment}}, + %{#{column.links.column_comment}}, ] conn_execute_array(sql_arr) end - def documents + def doc_objects sql_arr=[ - %{COMMENT ON Table documents - IS 'contains searchable text of SiSU documents';}, - %{COMMENT ON COLUMN documents.lid + %{COMMENT ON Table doc_objects + IS 'contains searchable text of SiSU document objects';}, + %{COMMENT ON COLUMN doc_objects.lid IS 'unique';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata';}, - %{COMMENT ON COLUMN documents.lev_an + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text';}, + %{COMMENT ON COLUMN doc_objects.lev_an IS 'doc level A-C 1-6';}, - %{COMMENT ON COLUMN documents.lev + %{COMMENT ON COLUMN doc_objects.lev IS 'doc level 1-6 \d\~';}, - %{COMMENT ON COLUMN documents.seg + %{COMMENT ON COLUMN doc_objects.seg IS 'segment name from level number 4 (lv 1)';}, - %{COMMENT ON COLUMN documents.ocn + %{COMMENT ON COLUMN doc_objects.ocn IS 'object citation number';}, - %{COMMENT ON COLUMN documents.en_a + %{COMMENT ON COLUMN doc_objects.en_a IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, - %{COMMENT ON COLUMN documents.en_z + %{COMMENT ON COLUMN doc_objects.en_z IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, - %{COMMENT ON COLUMN documents.en_a_asterisk + %{COMMENT ON COLUMN doc_objects.en_a_asterisk IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_z_asterisk + %{COMMENT ON COLUMN doc_objects.en_z_asterisk IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, - %{COMMENT ON COLUMN documents.en_a_plus + %{COMMENT ON COLUMN doc_objects.en_a_plus IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, - %{COMMENT ON COLUMN documents.en_z_plus + %{COMMENT ON COLUMN doc_objects.en_z_plus IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, - %{COMMENT ON COLUMN documents.types + %{COMMENT ON COLUMN doc_objects.types IS 'document types seg scroll';}, - %{COMMENT ON COLUMN documents.clean + %{COMMENT ON COLUMN doc_objects.clean IS 'text object - substantive text: clean, stripped of markup';}, - %{COMMENT ON COLUMN documents.body + %{COMMENT ON COLUMN doc_objects.body IS 'text object - substantive text: light html markup';}, - %{COMMENT ON COLUMN documents.lev1 + %{COMMENT ON COLUMN doc_objects.lev1 IS 'document structure, level number 1';}, - %{COMMENT ON COLUMN documents.lev2 + %{COMMENT ON COLUMN doc_objects.lev2 IS 'document structure, level number 2';}, - %{COMMENT ON COLUMN documents.lev3 + %{COMMENT ON COLUMN doc_objects.lev3 IS 'document structure, level number 3';}, - %{COMMENT ON COLUMN documents.lev4 + %{COMMENT ON COLUMN doc_objects.lev4 IS 'document structure, level number 4';}, - %{COMMENT ON COLUMN documents.lev5 + %{COMMENT ON COLUMN doc_objects.lev5 IS 'document structure, level number 5';}, - %{COMMENT ON COLUMN documents.lev6 + %{COMMENT ON COLUMN doc_objects.lev6 IS 'document structure, level number 6';}, - %{COMMENT ON COLUMN documents.t_of + %{COMMENT ON COLUMN doc_objects.t_of IS 'document structure, type of object (object is of)';}, - %{COMMENT ON COLUMN documents.t_is + %{COMMENT ON COLUMN doc_objects.t_is IS 'document structure, object is';}, - %{COMMENT ON COLUMN documents.node + %{COMMENT ON COLUMN doc_objects.node IS 'document structure, object node if heading';}, - %{COMMENT ON COLUMN documents.parent + %{COMMENT ON COLUMN doc_objects.parent IS 'document structure, object parent (is a heading)';} ] conn_execute_array(sql_arr) @@ -501,15 +512,15 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_asterisk sql_arr=[ %{COMMENT ON Table endnotes_asterisk - IS 'contains searchable text of SiSU documents endnotes asterisk';}, + IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, %{COMMENT ON COLUMN endnotes_asterisk.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_asterisk.document_lid @@ -522,15 +533,15 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_asterisk.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';} + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} ] conn_execute_array(sql_arr) end def endnotes_plus sql_arr=[ %{COMMENT ON Table endnotes_plus - IS 'contains searchable text of SiSU documents endnotes';}, + IS 'contains searchable text of SiSU documents endnotes marked with plus';}, %{COMMENT ON COLUMN endnotes_plus.nid IS 'unique';}, %{COMMENT ON COLUMN endnotes_plus.document_lid @@ -543,8 +554,8 @@ module SiSU_DB_create IS 'endnote substantive content';}, %{COMMENT ON COLUMN endnotes_plus.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';}, ] conn_execute_array(sql_arr) end @@ -552,8 +563,8 @@ module SiSU_DB_create sql_arr=[ %{COMMENT ON Table urls IS 'contains base url links to different SiSU output';}, - %{COMMENT ON COLUMN documents.metadata_tid - IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, %{COMMENT ON COLUMN urls.plaintext IS 'plaintext utf-8';}, %{COMMENT ON COLUMN urls.html_toc diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 673c5f8f..7189da56 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -70,8 +70,8 @@ module SiSU_DB_drop cascade='CASCADE' end @drop_table=[ - "DROP TABLE metadata #{cascade};", - "DROP TABLE documents #{cascade};", + "DROP TABLE metadata_and_text #{cascade};", + "DROP TABLE doc_objects #{cascade};", "DROP TABLE urls #{cascade};", "DROP TABLE endnotes #{cascade};", "DROP TABLE endnotes_asterisk #{cascade};", @@ -84,15 +84,35 @@ module SiSU_DB_drop msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary" case @sql_type when /sqlite/ - @conn.transaction - @drop_table.each do |d| - @conn.execute(d) - end - @conn.commit puts msg_sqlite ans=@ans.response?('remove sql database?') - if ans and File.exist?(@db_info.sqlite.db) + if ans \ + and File.exist?(@db_info.sqlite.db) + @conn.close File.unlink(@db_info.sqlite.db) + db=SiSU_Env::Info_db.new + conn=db.sqlite.conn_sqlite3 + sdb=SiSU_DB_DBI::Create.new(@opt,conn,@db_info,@sql_type) + sdb_index=SiSU_DB_DBI::Index.new(@opt,conn,@db_info,@sql_type) + sdb.output_dir? + begin + sdb.create_db + sdb.create_table.metadata_and_text + sdb.create_table.doc_objects + sdb.create_table.endnotes + sdb.create_table.endnotes_asterisk + sdb.create_table.endnotes_plus + sdb.create_table.urls + sdb_index.create_indexes + rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? + end + exit + else + @conn.transaction + @drop_table.each do |d| + @conn.execute(d) + end + @conn.commit end else @drop_table.each do |d| @@ -115,33 +135,33 @@ module SiSU_DB_drop end def indexes #% drop all indexes #@conn.do(%{ - # DROP INDEX object_nr ON documents(ocn); - # DROP INDEX body ON documents(body); - # DROP INDEX clean ON documents(clean); - # DROP INDEX lev1 ON documents(lev1); - # DROP INDEX lev2 ON documents(lev2); - # DROP INDEX lev3 ON documents(lev3); - # DROP INDEX lev4 ON documents(lev4); - # DROP INDEX lev5 ON documents(lev5); - # DROP INDEX lev6 ON documents(lev6); + # DROP INDEX object_nr ON doc_objects(ocn); + # DROP INDEX body ON doc_objects(body); + # DROP INDEX clean ON doc_objects(clean); + # DROP INDEX lev1 ON doc_objects(lev1); + # DROP INDEX lev2 ON doc_objects(lev2); + # DROP INDEX lev3 ON doc_objects(lev3); + # DROP INDEX lev4 ON doc_objects(lev4); + # DROP INDEX lev5 ON doc_objects(lev5); + # DROP INDEX lev6 ON doc_objects(lev6); # DROP INDEX endnote_nr ON endnotes(nr); # DROP INDEX endnote ON endnotes(body); - # DROP INDEX title ON metadata(title); - # DROP INDEX filename ON metadata(filename) + # DROP INDEX title ON metadata_and_text(title); + # DROP INDEX filename ON metadata_and_text(filename) # /* - # DROP INDEX object_nr ON documents(ocn) CASCADE; - # DROP INDEX body ON documents(body) CASCADE; - # DROP INDEX clean ON documents(clean) CASCADE; - # DROP INDEX lev1 ON documents(lev1) CASCADE; - # DROP INDEX lev2 ON documents(lev2) CASCADE; - # DROP INDEX lev3 ON documents(lev3) CASCADE; - # DROP INDEX lev4 ON documents(lev4) CASCADE; - # DROP INDEX lev5 ON documents(lev5) CASCADE; - # DROP INDEX lev6 ON documents(lev6) CASCADE; + # DROP INDEX object_nr ON doc_objects(ocn) CASCADE; + # DROP INDEX body ON doc_objects(body) CASCADE; + # DROP INDEX clean ON doc_objects(clean) CASCADE; + # DROP INDEX lev1 ON doc_objects(lev1) CASCADE; + # DROP INDEX lev2 ON doc_objects(lev2) CASCADE; + # DROP INDEX lev3 ON doc_objects(lev3) CASCADE; + # DROP INDEX lev4 ON doc_objects(lev4) CASCADE; + # DROP INDEX lev5 ON doc_objects(lev5) CASCADE; + # DROP INDEX lev6 ON doc_objects(lev6) CASCADE; # DROP INDEX endnote_nr ON endnotes(nr) CASCADE; # DROP INDEX endnote ON endnotes(body) CASCADE; - # DROP INDEX title ON metadata(title) CASCADE; - # DROP INDEX filename ON metadata(filename) CASCADE + # DROP INDEX title ON metadata_and_text(title) CASCADE; + # DROP INDEX filename ON metadata_and_text(filename) CASCADE # */ #}) end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 1f795e68..5610a1d0 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -60,9 +60,10 @@ module SiSU_DB_import require "#{SiSU_lib}/db_columns" # db_columns.rb require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb require 'sqlite3' - class Import < SiSU_DB_columns::Column_size + class Import < SiSU_DB_text::Prepare include SiSU_Param include SiSU_Screen @@dl=nil @@ -86,7 +87,7 @@ module SiSU_DB_import @counter={} @db=SiSU_Env::Info_db.new @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false - sql='SELECT MAX(lid) FROM documents' + sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 @col[:lid]=if @driver_sqlite3 @@ -122,7 +123,7 @@ module SiSU_DB_import tell.print_grey if @opt.cmd =~/v/ file_exist=if @sql_type=~/sqlite/; nil else - @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) + @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) end if (@sql_type!~/sqlite/ and not file_exist) \ or @sql_type=~/sqlite/ @@ -192,28 +193,6 @@ module SiSU_DB_import end end end - def special_character_escape(str) - str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") - str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") - str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check - str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') - str - end - def strip_markup(str) #define rules, make same as in dal clean - str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') - str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') - str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables - str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables - str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later - str.gsub!(/<.+?>/,'') - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search - str.gsub!(/\s\s+/,' ') - str.strip! - str - end def pf_db_import_transaction_open end def pf_db_import_transaction_close @@ -222,12 +201,23 @@ module SiSU_DB_import print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/ @tp={} @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt + if FileTest.exist?(@md.fns) + txt_arr=IO.readlines(@md.fns,'') + src=txt_arr.join("\n") + src=special_character_escape(src) + @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " + txt=clean_searchable_text(txt_arr) + #special_character_escape(txt) + @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " + end +#% title if defined? @md.title.full \ and @md.title.full=~/\S+/ # DublinCore 1 - title - @tp[:title]=@md.title.full - special_character_escape(@tp[:title]) - @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " - sql='SELECT MAX(tid) FROM metadata' + #@tp[:title]=@md.title.full + #special_character_escape(@tp[:title]) + #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " + sql='SELECT MAX(tid) FROM metadata_and_text' begin @@id_t ||=0 id_t=if @driver_sqlite3 @@ -242,220 +232,9 @@ module SiSU_DB_import @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/ end - if defined? @md.creator.author \ - and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author) - txt=@md.creator.author #dc - special_character_escape(txt) - @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', " - end - if defined? @md.creator.contributor \ - and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor - txt=@md.creator.contributor #dc - special_character_escape(txt) - @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', " - end - if defined? @md.creator.translator \ - and @md.creator.translator=~/\S+/ - txt=@md.creator.translator - special_character_escape(txt) - @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', " - end - if defined? @md.creator.illustrator \ - and @md.creator.illustrator=~/\S+/ - txt=@md.creator.illustrator - special_character_escape(txt) - @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', " - end - if defined? @md.publisher \ - and @md.publisher - txt=@md.publisher #dc - special_character_escape(txt) - @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', " - end - if defined? @md.creator.prepared_by \ - and @md.creator.prepared_by=~/\S+/ - txt=@md.creator.prepared_by - special_character_escape(txt) - @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', " - end - if defined? @md.creator.digitized_by \ - and @md.creator.digitized_by=~/\S+/ - txt=@md.creator.digitized_by - special_character_escape(txt) - @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', " - end - if defined? @md.classify.subject \ - and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) - txt=@md.classify.subject #dc - special_character_escape(txt) - @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', " - end - if defined? @md.notes.description \ - and @md.notes.description=~/\S+/ # DublinCore 4 - description - txt=@md.notes.description #dc - special_character_escape(txt) - @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', " - end - if defined? @md.classify.subject \ - and @md.classify.subject=~/\S+/ # DublinCore 8 - type (genre eg. report, convention etc) - txt=@md.classify.abstract - special_character_escape(txt) - @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', " - end - if defined? @md.rights.all \ - and @md.rights.all=~/\S+/ # DublinCore 15 - rights - txt=@md.rights.all #dc - special_character_escape(txt) - @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', " - end - if defined? @md.date.published \ - and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd - txt=@md.date.published #dc - special_character_escape(txt) - @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', " - end - if defined? @md.date.created \ - and @md.date.created=~/\S+/ - txt=@md.date.created #dc - special_character_escape(txt) - @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', " - end - if defined? @md.date.issued \ - and @md.date.issued=~/\S+/ - txt=@md.date.issued #dc - special_character_escape(txt) - @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', " - end - if defined? @md.date.available \ - and @md.date.available=~/\S+/ - txt=@md.date.available #dc - special_character_escape(txt) - @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', " - end - if defined? @md.date.modified \ - and @md.date.modified=~/\S+/ - txt=@md.date.modified #dc - special_character_escape(txt) - @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', " - end - if defined? @md.date.valid \ - and @md.date.valid=~/\S+/ - txt=@md.date.valid #dc - special_character_escape(txt) - @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', " - end - if defined? @md.title.language \ - and @md.title.language=~/\S+/ - txt=@md.title.language - special_character_escape(txt) - @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', " - end - if defined? @md.original.language \ - and @md.original.language=~/\S+/ - txt=@md.original.language - special_character_escape(txt) - @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', " - end - if defined? @md.classify.format \ - and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type) - txt=@md.classify.format #dc - special_character_escape(txt) - @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', " - end - if defined? @md.classify.identifier \ - and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free) - txt=@md.classify.identifier #dc - special_character_escape(txt) - @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', " - end - if defined? @md.original.source \ - and @md.original.source=~/\S+/ # DublinCore 11 - source (document source) - txt=@md.original.source #dc - special_character_escape(txt) - @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', " - end - if defined? @md.classify.relation \ - and @md.classify.relation=~/\S+/ # DublinCore 13 - relation - txt=@md.classify.relation #dc - special_character_escape(txt) - @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', " - end - if defined? @md.classify.coverage \ - and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage - txt=@md.classify.coverage #dc - special_character_escape(txt) - @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', " - end - if defined? @md.classify.keywords \ - and @md.classify.keywords=~/\S+/ - txt=@md.classify.keywords - special_character_escape(txt) - @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', " - end - if defined? @md.notes.comment \ - and @md.notes.comment=~/\S+/ - txt=@md.notes.comments - special_character_escape(txt) - @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', " - end - if defined? @md.classify.loc \ - and @md.classify.loc=~/\S+/ - txt=@md.classify.loc - special_character_escape(txt) - @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', " - end - if defined? @md.classify.dewey \ - and @md.classify.dewey=~/\S+/ - txt=@md.classify.dewey - special_character_escape(txt) - @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', " - end - if defined? @md.classify.pg \ - and @md.classify.pg=~/\S+/ - txt=@md.classify.pg - special_character_escape(txt) - @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', " - end - if defined? @md.classify.isbn \ - and @md.classify.isbn=~/\S+/ - txt=@md.classify.isbn - special_character_escape(txt) - @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', " - end - if defined? @md.notes.prefix_a \ - and @md.notes.prefix_a=~/\S+/ - txt=@md.notes.prefix_a - special_character_escape(txt) - @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', " - end - if defined? @md.notes.prefix_b \ - and @md.notes.prefix_b=~/\S+/ - txt=@md.notes.prefix_b - special_character_escape(txt) - @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', " - end - if defined? @md.fns \ - and @md.fns=~/\S+/ - txt=@md.fns - special_character_escape(txt) - @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', " - end - if @md.wc_words; txt=@md.wc_words - @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', " - end - if defined? @md.dgst \ - and @md.dgst.class==Array - txt=@md.dgst[1] - @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', " - end - if @md.sc_date; txt=@md.sc_date - @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', " - end - if @md.generated; txt=@md.generated - @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', " - end + ################ CLEAR ############## SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) - t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file) + t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file) tuple=t.tuple tuple end @@ -482,13 +261,7 @@ module SiSU_DB_import and data.ln.inspect=~/[123]/ @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @col[:lid]+=1 - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ - txt=endnotes(txt).clean_text - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -521,13 +294,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -553,13 +320,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -585,13 +346,7 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -613,15 +368,9 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" - if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - endnotes(txt).range - @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ - @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ - @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ - txt=endnotes(txt).clean_text(@base_url) - end + txt=endnotes(txt).extract_any if @sql_type=~/pg/ \ - and txt.size > (document_clean - 1) #% examine pg build & remove limitation + and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") @@ -660,7 +409,7 @@ module SiSU_DB_import #special_character_escape(body) #special_character_escape(txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -698,7 +447,7 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -736,7 +485,7 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - if txt.size > (endnote_clean - 1) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -772,6 +521,16 @@ module SiSU_DB_import end def endnotes(txt) @txt=txt + def extract_any + if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ + endnotes(@txt).range + @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ + @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ + @txt=endnotes(@txt).clean_text + end + @txt + end def standard x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) else nil diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index a64fb362..3cbcc20c 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,15 +73,15 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX object_nr ON documents(ocn);}, - %{CREATE INDEX digest_clean ON documents(digest_clean);}, - %{CREATE INDEX digest_all ON documents(digest_all);}, - %{CREATE INDEX lev1 ON documents(lev1);}, - %{CREATE INDEX lev2 ON documents(lev2);}, - %{CREATE INDEX lev3 ON documents(lev3);}, - %{CREATE INDEX lev4 ON documents(lev4);}, - %{CREATE INDEX lev5 ON documents(lev5);}, - %{CREATE INDEX lev6 ON documents(lev6);}, + %{CREATE INDEX object_nr ON doc_objects(ocn);}, + %{CREATE INDEX digest_clean ON doc_objects(digest_clean);}, + %{CREATE INDEX digest_all ON doc_objects(digest_all);}, + %{CREATE INDEX lev1 ON doc_objects(lev1);}, + %{CREATE INDEX lev2 ON doc_objects(lev2);}, + %{CREATE INDEX lev3 ON doc_objects(lev3);}, + %{CREATE INDEX lev4 ON doc_objects(lev4);}, + %{CREATE INDEX lev5 ON doc_objects(lev5);}, + %{CREATE INDEX lev6 ON doc_objects(lev6);}, %{CREATE INDEX endnote_nr ON endnotes(nr);}, %{CREATE INDEX digest_en ON endnotes(digest_clean);}, %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, @@ -90,15 +90,15 @@ module SiSU_DB_index %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, - %{CREATE INDEX title ON metadata(title);}, - %{CREATE INDEX filename ON metadata(filename)}, + %{CREATE INDEX title ON metadata_and_text(title);}, + %{CREATE INDEX filename ON metadata_and_text(filename)}, ] conn_execute_array(sql_arr) end def text print "\n create documents text indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX clean ON documents(clean);}, + %{CREATE INDEX clean ON doc_objects(clean);}, %{CREATE INDEX endnote ON endnotes(clean);} ] conn_execute_array(sql_arr) diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 2fc3a455..cc00b74a 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -58,6 +58,7 @@ =end module SiSU_DB_tuple + require "#{SiSU_lib}/db_columns" # db_columns.rb class Load_documents require "#{SiSU_lib}/param" # param.rb include SiSU_Param @@ -78,10 +79,10 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO documents (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ @@ -113,13 +114,174 @@ module SiSU_DB_tuple sql_entry end end - class Load_metadata - def initialize(conn,tp,id,opt,file) - @conn,@tp,@id,@opt,@file=conn,tp,id,opt,file + class Load_metadata #< SiSU_DB_columns::Columns + def initialize(conn,id,md,file) + @conn,@id,@opt,@file=conn,id,md,file + @tp=SiSU_DB_columns::Columns.new(md) end def tuple - sql_entry="INSERT INTO metadata (#{@tp[:fns_f]} #{@tp[:suffix_f]} #{@tp[:title_f]} #{@tp[:subtitle_f]} #{@tp[:creator_f]} #{@tp[:illustrator_f]} #{@tp[:translator_f]} #{@tp[:subject_f]} #{@tp[:description_f]} #{@tp[:publisher_f]} #{@tp[:contributor_f]} #{@tp[:prepared_by_f]} #{@tp[:digitized_by_f]} #{@tp[:date_f]} #{@tp[:date_created_f]} #{@tp[:date_issued_f]} #{@tp[:date_valid_f]} #{@tp[:date_available_f]} #{@tp[:date_modified_f]} #{@tp[:type_f]} #{@tp[:format_f]} #{@tp[:identifier_f]} #{@tp[:source_f]} #{@tp[:language_f]} #{@tp[:language_original_f]} #{@tp[:relation_f]} #{@tp[:coverage_f]} #{@tp[:rights_f]} #{@tp[:copyright_f]} #{@tp[:owner_f]} #{@tp[:keywords_f]} #{@tp[:abstract_f]} #{@tp[:comment_f]} #{@tp[:loc_f]} #{@tp[:dewey_f]} #{@tp[:isbn_f]} #{@tp[:pg_f]} #{@tp[:prefix_a_f]} #{@tp[:prefix_b_f]} tid) " + - "VALUES (#{@tp[:fns_i]} #{@tp[:suffix_i]} #{@tp[:title_i]} #{@tp[:subtitle_i]} #{@tp[:creator_i]} #{@tp[:illustrator_i]} #{@tp[:translator_i]} #{@tp[:subject_i]} #{@tp[:description_i]} #{@tp[:publisher_i]} #{@tp[:contributor_i]} #{@tp[:prepared_by_i]} #{@tp[:digitized_by_i]} #{@tp[:date_i]} #{@tp[:date_created_i]} #{@tp[:date_issued_i]} #{@tp[:date_valid_i]} #{@tp[:date_available_i]} #{@tp[:date_modified_i]} #{@tp[:type_i]} #{@tp[:format_i]} #{@tp[:identifier_i]} #{@tp[:source_i]} #{@tp[:language_i]} #{@tp[:language_original_i]} #{@tp[:relation_i]} #{@tp[:coverage_i]} #{@tp[:rights_i]} #{@tp[:copyright_i]} #{@tp[:owner_i]} #{@tp[:keywords_i]} #{@tp[:abstract_i]} #{@tp[:comment_i]} #{@tp[:loc_i]} #{@tp[:dewey_i]} #{@tp[:isbn_i]} #{@tp[:pg_i]} #{@tp[:prefix_a_i]} #{@tp[:prefix_b_i]} #{@id});" + sql_entry="INSERT INTO metadata_and_text ( +#{@tp.column.title.tuple[0]} +#{@tp.column.title_main.tuple[0]} +#{@tp.column.title_sub.tuple[0]} +#{@tp.column.title_short.tuple[0]} +#{@tp.column.title_edition.tuple[0]} +#{@tp.column.title_note.tuple[0]} +#{@tp.column.title_language.tuple[0]} +#{@tp.column.title_language_char.tuple[0]} +#{@tp.column.creator_author.tuple[0]} +#{@tp.column.creator_author_honorific.tuple[0]} +#{@tp.column.creator_author_nationality.tuple[0]} +#{@tp.column.creator_contributor.tuple[0]} +#{@tp.column.creator_illustrator.tuple[0]} +#{@tp.column.creator_photographer.tuple[0]} +#{@tp.column.creator_translator.tuple[0]} +#{@tp.column.creator_prepared_by.tuple[0]} +#{@tp.column.creator_digitized_by.tuple[0]} +#{@tp.column.creator_audio.tuple[0]} +#{@tp.column.creator_video.tuple[0]} +#{@tp.column.language_document.tuple[0]} +#{@tp.column.language_document_char.tuple[0]} +#{@tp.column.language_original.tuple[0]} +#{@tp.column.language_original_char.tuple[0]} +#{@tp.column.date_added_to_site.tuple[0]} +#{@tp.column.date_available.tuple[0]} +#{@tp.column.date_created.tuple[0]} +#{@tp.column.date_issued.tuple[0]} +#{@tp.column.date_modified.tuple[0]} +#{@tp.column.date_published.tuple[0]} +#{@tp.column.date_valid.tuple[0]} +#{@tp.column.date_translated.tuple[0]} +#{@tp.column.date_original_publication.tuple[0]} +#{@tp.column.date_generated.tuple[0]} +#{@tp.column.publisher.tuple[0]} +#{@tp.column.original_publisher.tuple[0]} +#{@tp.column.original_language.tuple[0]} +#{@tp.column.original_language_char.tuple[0]} +#{@tp.column.original_source.tuple[0]} +#{@tp.column.original_institution.tuple[0]} +#{@tp.column.original_nationality.tuple[0]} +#{@tp.column.rights_all.tuple[0]} +#{@tp.column.rights_copyright_text.tuple[0]} +#{@tp.column.rights_copyright_translation.tuple[0]} +#{@tp.column.rights_copyright_illustrations.tuple[0]} +#{@tp.column.rights_copyright_photographs.tuple[0]} +#{@tp.column.rights_copyright_preparation.tuple[0]} +#{@tp.column.rights_copyright_digitization.tuple[0]} +#{@tp.column.rights_copyright_audio.tuple[0]} +#{@tp.column.rights_copyright_video.tuple[0]} +#{@tp.column.rights_license.tuple[0]} +#{@tp.column.classify_topic_register.tuple[0]} +#{@tp.column.classify_subject.tuple[0]} +#{@tp.column.classify_type.tuple[0]} +#{@tp.column.classify_loc.tuple[0]} +#{@tp.column.classify_dewey.tuple[0]} +#{@tp.column.classify_pg.tuple[0]} +#{@tp.column.classify_isbn.tuple[0]} +#{@tp.column.classify_format.tuple[0]} +#{@tp.column.classify_identifier.tuple[0]} +#{@tp.column.classify_relation.tuple[0]} +#{@tp.column.classify_coverage.tuple[0]} +#{@tp.column.classify_keywords.tuple[0]} +#{@tp.column.notes_abstract.tuple[0]} +#{@tp.column.notes_comment.tuple[0]} +#{@tp.column.notes_description.tuple[0]} +#{@tp.column.notes_history.tuple[0]} +#{@tp.column.notes_prefix.tuple[0]} +#{@tp.column.notes_prefix_a.tuple[0]} +#{@tp.column.notes_prefix_b.tuple[0]} +#{@tp.column.notes_suffix.tuple[0]} +#{@tp.column.filename.tuple[0]} +#{@tp.column.sisutxt.tuple[0]} +#{@tp.column.fulltext.tuple[0]} +#{@tp.column.word_count.tuple[0]} +#{@tp.column.digest.tuple[0]} +#{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin.tuple[0]} +#{@tp.column.links.tuple[0]} +tid) +" + + "VALUES ( +#{@tp.column.title.tuple[1]} +#{@tp.column.title_main.tuple[1]} +#{@tp.column.title_sub.tuple[1]} +#{@tp.column.title_short.tuple[1]} +#{@tp.column.title_edition.tuple[1]} +#{@tp.column.title_note.tuple[1]} +#{@tp.column.title_language.tuple[1]} +#{@tp.column.title_language_char.tuple[1]} +#{@tp.column.creator_author.tuple[1]} +#{@tp.column.creator_author_honorific.tuple[1]} +#{@tp.column.creator_author_nationality.tuple[1]} +#{@tp.column.creator_contributor.tuple[1]} +#{@tp.column.creator_illustrator.tuple[1]} +#{@tp.column.creator_photographer.tuple[1]} +#{@tp.column.creator_translator.tuple[1]} +#{@tp.column.creator_prepared_by.tuple[1]} +#{@tp.column.creator_digitized_by.tuple[1]} +#{@tp.column.creator_audio.tuple[1]} +#{@tp.column.creator_video.tuple[1]} +#{@tp.column.language_document.tuple[1]} +#{@tp.column.language_document_char.tuple[1]} +#{@tp.column.language_original.tuple[1]} +#{@tp.column.language_original_char.tuple[1]} +#{@tp.column.date_added_to_site.tuple[1]} +#{@tp.column.date_available.tuple[1]} +#{@tp.column.date_created.tuple[1]} +#{@tp.column.date_issued.tuple[1]} +#{@tp.column.date_modified.tuple[1]} +#{@tp.column.date_published.tuple[1]} +#{@tp.column.date_valid.tuple[1]} +#{@tp.column.date_translated.tuple[1]} +#{@tp.column.date_original_publication.tuple[1]} +#{@tp.column.date_generated.tuple[1]} +#{@tp.column.publisher.tuple[1]} +#{@tp.column.original_publisher.tuple[1]} +#{@tp.column.original_language.tuple[1]} +#{@tp.column.original_language_char.tuple[1]} +#{@tp.column.original_source.tuple[1]} +#{@tp.column.original_institution.tuple[1]} +#{@tp.column.original_nationality.tuple[1]} +#{@tp.column.rights_all.tuple[1]} +#{@tp.column.rights_copyright_text.tuple[1]} +#{@tp.column.rights_copyright_translation.tuple[1]} +#{@tp.column.rights_copyright_illustrations.tuple[1]} +#{@tp.column.rights_copyright_photographs.tuple[1]} +#{@tp.column.rights_copyright_preparation.tuple[1]} +#{@tp.column.rights_copyright_digitization.tuple[1]} +#{@tp.column.rights_copyright_audio.tuple[1]} +#{@tp.column.rights_copyright_video.tuple[1]} +#{@tp.column.rights_license.tuple[1]} +#{@tp.column.classify_topic_register.tuple[1]} +#{@tp.column.classify_subject.tuple[1]} +#{@tp.column.classify_type.tuple[1]} +#{@tp.column.classify_loc.tuple[1]} +#{@tp.column.classify_dewey.tuple[1]} +#{@tp.column.classify_pg.tuple[1]} +#{@tp.column.classify_isbn.tuple[1]} +#{@tp.column.classify_format.tuple[1]} +#{@tp.column.classify_identifier.tuple[1]} +#{@tp.column.classify_relation.tuple[1]} +#{@tp.column.classify_coverage.tuple[1]} +#{@tp.column.classify_keywords.tuple[1]} +#{@tp.column.notes_abstract.tuple[1]} +#{@tp.column.notes_comment.tuple[1]} +#{@tp.column.notes_description.tuple[1]} +#{@tp.column.notes_history.tuple[1]} +#{@tp.column.notes_prefix.tuple[1]} +#{@tp.column.notes_prefix_a.tuple[1]} +#{@tp.column.notes_prefix_b.tuple[1]} +#{@tp.column.notes_suffix.tuple[1]} +#{@tp.column.filename.tuple[1]} +#{@tp.column.sisutxt.tuple[1]} +#{@tp.column.fulltext.tuple[1]} +#{@tp.column.word_count.tuple[1]} +#{@tp.column.digest.tuple[1]} +#{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin.tuple[1]} +#{@tp.column.links.tuple[1]} +#{@id} +);" if @opt.cmd =~/M/ puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}" @file.puts sql_entry diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb index 99640cdf..0a51b892 100644 --- a/lib/sisu/v2/db_remove.rb +++ b/lib/sisu/v2/db_remove.rb @@ -68,19 +68,19 @@ module SiSU_DB_remove def remove driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false del_id=if driver_sqlite3 - @conn.get_first_value(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }).to_i + @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i else - x=@conn.select_one(%{ SELECT tid FROM metadata WHERE filename LIKE '#{@opt.fns}'; }) - del=x ? (x.join.to_i) : nil + x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }) + x ? (x.join.to_i) : nil end if del_id sql_entry=[ "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';", "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", - "DELETE FROM documents WHERE metadata_tid = '#{del_id}';", + "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';", "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", - "DELETE FROM metadata WHERE tid = '#{del_id}';", + "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';", ] if driver_sqlite3 @conn.transaction diff --git a/lib/sisu/v2/db_select.rb b/lib/sisu/v2/db_select.rb index 33441b45..1ac9195f 100644 --- a/lib/sisu/v2/db_select.rb +++ b/lib/sisu/v2/db_select.rb @@ -69,6 +69,8 @@ module SiSU_DB_select if @opt.mod.inspect =~/update|import/ @sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type) @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) + elsif @opt.mod.inspect =~/remove/ + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file) end end def sql_maintenance_file @@ -98,8 +100,8 @@ module SiSU_DB_select when /^--(?:init(?:ialize)?|create(?:all)?)$/ @sdb.output_dir? begin - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -110,8 +112,8 @@ module SiSU_DB_select when /^--createtable(s)?$/ @sdb.output_dir? begin - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -123,8 +125,8 @@ module SiSU_DB_select @sdb.output_dir? begin @sdb_no.drop.tables - @sdb.create_table.metadata - @sdb.create_table.documents + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects @sdb.create_table.endnotes @sdb.create_table.endnotes_asterisk @sdb.create_table.endnotes_plus @@ -135,13 +137,13 @@ module SiSU_DB_select when /^--cr(eate)?lex$/ @sdb.output_dir? begin - @sdb.create_table.documents + @sdb.create_table.doc_objects rescue; @sdb.output_dir? end when /^--cr(eate)?metadata$/ @sdb.output_dir? begin - @sdb.create_table.metadata + @sdb.create_table.metadata_and_text rescue; @sdb.output_dir? end when /^--import$/ diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb new file mode 100644 index 00000000..f120b95f --- /dev/null +++ b/lib/sisu/v2/db_sqltxt.rb @@ -0,0 +1,115 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DB_text + class Prepare + def special_character_escape(str) + str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") + str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') + str + end + def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source + txt_arr,en=[],[] + arr.each do |s| + s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2') + s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'') + s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') + if s =~/^:A~/ + s.gsub!(/@author/,@md.creator.author) + s.gsub!(/@title/,@md.title.full) + end + s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'') + s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'') + s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'') + s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block + s.gsub!(/
/,' ') + en << s.scan(/~\{\s*(.+?)\s*\}~/) + s.gsub!(/~\{.+?\}~/,'') + s.gsub!(/ \s+/,' ') + #special_character_escape(s) + s + end + txt_arr << arr << en + #txt_arr=txt_arr.flatten + txt=txt_arr.flatten.join("\n") + txt=special_character_escape(txt) + txt + end + def strip_markup(str) #define rules, make same as in dal clean + str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') + str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables + str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables + str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later + str.gsub!(/<.+?>/,'') + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search + str.gsub!(/\s\s+/,' ') + str.strip! + str + end + end +end +__END__ + -- cgit v1.2.3