class SiSU_DbImport::Import

Attributes

tp[RW]

Public Class Methods

new(opt,conn,file_maint,sql_type=:pg) click to toggle source
# File lib/sisu/db_import.rb, line 67
def initialize(opt,conn,file_maint,sql_type=:pg)
  @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type
  @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX
  @env=SiSU_Env::InfoEnv.new(@opt.fns)
  @dal="#{@env.processing_path.ao}"
  @fnb=if @opt.fns.empty? \
  or @opt.selections.str.empty?
    ''
  else
    @md=SiSU_Param::Parameters.new(@opt).get
    @md.fnb
  end
  @fnc="#{@dal}/#{@opt.fns}.content.rbm"
  @@seg,@@seg_full='',''                                  #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg  is 7
  @col=Hash.new('')
  @col[:ocn]=''
  @counter={}
  @db=SiSU_Env::InfoDb.new
  if @sql_type==:sqlite
    @driver_sqlite3=
    (@conn.inspect.match(/^(.{10})/)[1] \
    == @db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \
    ? true
    : false
  end
  sql='SELECT MAX(lid) FROM doc_objects'
  begin
    @col[:lid] ||=0
    @col[:lid]=@driver_sqlite3 \
    ? @conn.execute( sql ).join.to_i
    : @conn.exec( sql ).getvalue(0,0).to_i
  rescue
    if @opt.act[:maintenance][:set]==:on
      puts "#{__FILE__}:#{__LINE__}"
    end
  end
  @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty?
  sql='SELECT MAX(nid) FROM endnotes'
  begin
    @id_n=@driver_sqlite3 \
    ? @conn.execute( sql ).join.to_i
    : @conn.exec( sql ).getvalue(0,0).to_i
    @id_n ||=0
  rescue
    if @opt.act[:maintenance][:set]==:on
      puts "#{__FILE__}:#{__LINE__}"
    end
  end
  @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty?
  @col[:lv0]=@col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=@col[:lv7]=0
  @db=SiSU_Env::InfoDb.new
  @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename
  @@dl ||=SiSU_Env::InfoEnv.new.digest.length
end

Public Instance Methods

asterisk() click to toggle source
# File lib/sisu/db_import.rb, line 757
def asterisk
  (@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
  ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
  : nil
end
book_idx_hash_to_str(book_idx) click to toggle source
# File lib/sisu/db_import.rb, line 256
def book_idx_hash_to_str(book_idx)
  book_idx=book_idx ? book_idx : ''
  book_idx_str,book_subidx_part='',''
  if not book_idx.empty?
    book_idx_str=''
    book_idx.each_pair do |k0,v0|
      book_idx_str << %{#{k0}+#{v0[:plus]}}
      book_subidx_part=''
      if v0[:sub].length > 0
        v0[:sub].each do |subterms|
           subterms.each_pair do |k1,v1|
             book_subidx_part << %{\n  #{k1}+#{v1[:plus]} | }
           end
        end
        book_idx_str=book_idx_str + ':' + book_subidx_part
      end
    end
  end
  book_idx_str
end
clean_text(base_url=nil) click to toggle source
# File lib/sisu/db_import.rb, line 767
def clean_text(base_url=nil)
  @txt=if base_url
    @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
      gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
      gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
  else
    @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'<sup>\1</sup>').
      gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>').
      gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
  end
  @txt
end
db_import_documents(ao_array) click to toggle source
# File lib/sisu/db_import.rb, line 325
def db_import_documents(ao_array)                                     #% import documents - populate main database table, import into substantive database tables (tuple)
  begin
    @col[:tid]=@@id_t
    @en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
    @col[:en_a],@col[:en_z]=nil,nil
    ao_array.each do |data|
      data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
        gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
        gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
        gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
        gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
        gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
        gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
        gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
        gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
        gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
        gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
      @col[:seg]=@@seg
      if data.of ==:para \
      || data.of ==:heading \
      || data.of ==:heading_insert \
      || data.of ==:block \
      || data.of ==:group      # regular text what of code-blocks grouped text etc.
        notedata=data.obj.dup
                                                                           #% :headings
        if data.is==:heading \
        && (data.ln.inspect=~/[0-3]/)
          (
            @col[:lev],
            txt,@col[:ocn],
            @col[:lev_an],
            @col[:ocnd],@col[:ocns],
            @col[:t_of],@col[:t_is],
            @col[:node],@col[:parent],
            @col[:digest_clean],@col[:digest_all]=
            data.ln,
            data.obj,data.ocn,
            data.lv,
            data.odv,data.osp,
            data.of,data.is,
            data.node,data.parent,
            '',''
          )
          @col[:lid]+=1
          txt=endnotes(txt).extract_any
          body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          if @en[0] then @en_a,@en_z=@en[0].first,@en[0].last
          end
          if @en_ast[0] then @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
          end
          if @en_pls[0] then @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
          end
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          case @col[:lev]
          when /0/ then @col[:lv0]+=1
          when /1/ then @col[:lv1]+=1
          when /2/ then @col[:lv2]+=1
          when /3/ then @col[:lv3]+=1
          when /4/ then @col[:lv4]+=1
          end
          @col[:lev]=@col[:plaintext]=@col[:body]=''
        elsif data.is==:heading \
        && data.ln==4
          (
            @@seg,txt,
            @col[:ocn],@col[:lev_an],
            @col[:ocnd],@col[:ocns],
            @col[:t_of],@col[:t_is],
            @col[:node],@col[:parent],
            @col[:digest_clean],@col[:digest_all]=
            data.name,data.obj,
            data.ocn,data.lv,
            data.odv,data.osp,
            data.of,data.is,
            data.node,data.parent,
            '',''
          )
          @col[:seg]=@@seg
          @col[:lv4]+=1
          @col[:lid]+=1
          @col[:lev]=4
          @hname=if @col[:seg] \
          and not @col[:seg].to_s.empty?
            @@hname=@col[:seg].to_s
          else @@hname
          end
          @env=SiSU_Env::InfoEnv.new(@md.fns)
          @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
          txt=endnotes(txt).extract_any
          body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
          @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
          @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          @col[:lev]=@col[:plaintext]=@col[:body]=''
        elsif data.is==:heading \
        && data.ln==5
          (
            txt,
            @col[:ocn],@col[:lev_an],
            @col[:ocnd],@col[:ocns],
            @col[:t_of],@col[:t_is],
            @col[:node],@col[:parent],
            @col[:digest_clean],@col[:digest_all]=
            data.obj,
            data.ocn,data.lv,
            data.odv,data.osp,
            data.of,data.is,
            data.node,data.parent,
            '',''
          )
          @@seg_full=data.name if data.is==:heading \
          && data.ln==5 \
          && data.name #check data.name
          @@seg ||='' #nil # watch
          @col[:seg]=@@seg
          @col[:lv5]+=1
          @col[:lid]+=1
          @col[:lev]=5
          @hname=if @col[:seg] \
          and not @col[:seg].to_s.empty?
            @@hname=@col[:seg].to_s
          else @@hname
          end
          @env=SiSU_Env::InfoEnv.new(@md.fns)
          @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
          txt=endnotes(txt).extract_any
          body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
          @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
          @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          @col[:lev]=@col[:plaintext]=@col[:body]=''
        elsif data.is==:heading \
        && data.ln==6
          txt,       @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=
            data.obj,data.ocn,  data.lv,      data.odv,   data.osp,   data.of,    data.is,    data.node,  data.parent,  '',                 ''
          @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name
          @@seg ||='' #nil # watch
          @col[:seg]=@@seg
          @col[:lv6]+=1
          @col[:lid]+=1
          @col[:lev]=6
          @hname=if @col[:seg] \
          and not @col[:seg].to_s.empty?
            @@hname=@col[:seg].to_s
          else @@hname
          end
          @env=SiSU_Env::InfoEnv.new(@md.fns)
          @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
          txt=endnotes(txt).extract_any
          body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
          @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
          @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          @col[:lev]=@col[:plaintext]=@col[:body]=''
        elsif data.is==:heading \
        && data.ln==7
          txt,       @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=
            data.obj,data.ocn,  data.lv,      data.odv,   data.osp,   data.of,    data.is,    data.node,  data.parent,  '',                 ''
          @@seg_full=data.name if data.is==:heading && data.ln==7 && data.name #check data.name
          @@seg ||='' #nil # watch
          @col[:seg]=@@seg
          @col[:lv7]+=1
          @col[:lid]+=1
          @col[:lev]=7
          @hname=if @col[:seg] \
          and not @col[:seg].to_s.empty?
            @@hname=@col[:seg].to_s
          else @@hname
          end
          @env=SiSU_Env::InfoEnv.new(@md.fns)
          @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
          txt=endnotes(txt).extract_any
          body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
          @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
          @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          @col[:lev]=@col[:plaintext]=@col[:body]=''
                                                                           #% :structure :layout :comment
        elsif data.of==:structure \
        || data.of==:layout \
        || data.of==:comment
          #added watch
                                                                           #% :
        else                                                               #% regular text
          @col[:lid]+=1
          (
            txt=''
            txt,@col[:ocn],
            @col[:ocnd],@col[:ocns],
            @col[:t_of],@col[:t_is],
            @col[:node],@col[:parent],
            @col[:digest_clean],@col[:digest_all],
            @col[:lev]=
            data.obj,data.ocn,
            data.odv,data.osp,
            data.of,data.is,
            '',data.parent,
            '','',
            9
          )
          @hname=if @col[:seg] \
          and not @col[:seg].to_s.empty?
            @@hname=@col[:seg].to_s
          else @@hname
          end
          @env=SiSU_Env::InfoEnv.new(@md.fns)
          @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
          txt=endnotes(txt).extract_any
          if @sql_type==:pg \
          and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1)             # examine pg build & remove limitation
            puts "\n\nTOO LARGE (TXT - see error log)\n\n"
            open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
              error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
            end
            txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
          end
          @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
          @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
          @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
          body=if data.is==:table
            SiSU_FormatShared::CSS_Format.new(@md,data).html_table
          elsif data.is==:code
            SiSU_FormatShared::CSS_Format.new(@md,data).code
          elsif defined? data.indent \
          and defined? data.hang \
          and data.indent =~/[1-9]/ \
          and data.indent == data.hang
            SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent)
          elsif defined? data.indent \
          and defined? data.hang \
          and data.hang =~/[0-9]/ \
          and data.indent != data.hang
            SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent)
          else
            SiSU_FormatShared::CSS_Format.new(@md,data).norm
          end
          @col[:body]=clean_document_objects_body(body)
          plaintext=@col[:body].dup
          plaintext=strip_markup(plaintext)
          @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
          book_idx=book_idx_hash_to_str(data.idx)
          @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
          t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
          @tuple_array << t.tuple
          @en,@en_ast,@en_pls=[],[],[]
          @col[:en_a]=@col[:en_z]=nil
          @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
        end
        if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/                                         #% import into database endnotes tables
          endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
          endnote_array.each do |inf|
            if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/]
              if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/]
                nr,txt,digest_clean=$1,$2.strip,0
              end
              @id_n ||=0
              @id_n+=1
              txt=special_character_escape(txt)
              body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
              txt=strip_markup(txt)
              if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                  error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                end
                txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
              end
              if txt
                en={
                  type: 'endnotes',
                  id:      @id_n,
                  lid:     @col[:lid],
                  nr:      nr,
                  txt:     txt,
                  body:    body,
                  ocn:     @col[:ocn],
                  ocnd:    @col[:ocnd],
                  ocns:    @col[:ocns],
                  id_t:    @@id_t,
                  hash:    digest_clean
                }
                t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                @tuple_array << t.tuple
              end
            end
          end
          word_mode=notedata.scan(/\S+/)
        end
        if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/                                      #% import into database endnotes tables
          endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/)
          endnote_array.each do |inf|
            if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/]                    # dal new endnotes 2003w31/1
              if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/]           # dal new endnotes 2003w31/1
                nr,txt,digest_clean=$1,$2.strip,0
              end
              @id_n+=1
              txt=special_character_escape(txt)
              body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
              txt=strip_markup(txt)
              if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                  error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                end
                txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
              end
              if txt
                en={
                  type: 'endnotes_asterisk',
                  id:      @id_n,
                  lid:     @col[:lid],
                  nr:      nr,
                  txt:     txt,
                  body:    body,
                  ocn:     @col[:ocn],
                  ocnd:    @col[:ocnd],
                  ocns:    @col[:ocns],
                  id_t:    @@id_t,
                  hash:    digest_clean
                }
                t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                @tuple_array << t.tuple
              end
            end
          end
          word_mode=notedata.scan(/\S+/)
        end
        if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/                                           #% import into database endnotes tables
          endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/)
          endnote_array.each do |inf|
            if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/]                        # dal new endnotes 2003w31/1
              if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/]               # dal new endnotes 2003w31/1
                nr,txt,digest_clean=$1,$2.strip,0
              end
              @id_n+=1
              txt=special_character_escape(txt)
              body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
              txt=strip_markup(txt)
              if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
                puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                  error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                end
                txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
              end
              if txt
                en={
                  type: 'endnotes_plus',
                  id:      @id_n,
                  lid:     @col[:lid],
                  nr:      nr,
                  txt:     txt,
                  body:    body,
                  ocn:     @col[:ocn],
                  ocnd:    @col[:ocnd],
                  ocns:    @col[:ocns],
                  id_t:    @@id_t,
                  hash:    digest_clean
                }
                t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
                @tuple_array << t.tuple
              end
            end
          end
          word_mode=notedata.scan(/\S+/)
        end
      end
    end
  rescue
    SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
      __LINE__.to_s + ':' + __FILE__
    end
  ensure
  end
  @tuple_array
end
db_import_metadata() click to toggle source
# File lib/sisu/db_import.rb, line 276
    def db_import_metadata                                                       #% import documents - populate database
      if (@opt.act[:verbose][:set]==:on \
      || @opt.act[:verbose_plus][:set]==:on \
      || @opt.act[:maintenance][:set]==:on)
        print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} }
      end
      @tp={}
      @md=SiSU_Param::Parameters.new(@opt).get
#% sisutxt & fulltxt
      if FileTest.exist?(@md.fns)
        txt_arr=IO.readlines(@md.fns,'')
        src=txt_arr.join("\n")
        src=special_character_escape(src)
        @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', "
        txt=clean_searchable_text_from_document_source(txt_arr)
        #txt=special_character_escape(txt)
        @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', "
      end
#% title
      if defined? @md.title.full \
      and @md.title.full=~/\S+/                                              # DublinCore 1 - title
        #@tp[:title]=@md.title.full
        #special_character_escape(@tp[:title])
        #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
        sql='SELECT MAX(tid) FROM metadata_and_text;'
        begin
          @@id_t ||=0
          id_t=@driver_sqlite3 \
          ? @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i }
          : @conn.exec( sql ).getvalue(0,0).to_i
          @@id_t=id_t if id_t
        rescue
          if @opt.act[:maintenance][:set]==:on
            puts "#{__FILE__} #{__LINE__}"
          end
        end
        @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:
        if (@opt.act[:verbose][:set]==:on \
        || @opt.act[:verbose_plus][:set]==:on \
        || @opt.act[:maintenance][:set]==:on)
          puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}}
        end
      end
      ################ CLEAR ##############
      SiSU_DbDBI::Test.new(self,@opt).verify                          #% import title names, filenames (tuple)
      t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint)
      tuple=t.tuple
      tuple
    end
db_import_urls(dbi_unit,content) click to toggle source
# File lib/sisu/db_import.rb, line 795
def db_import_urls(dbi_unit,content)                                           #% import documents OID - populate database
  begin
    @fnc=content
    @env=SiSU_Env::InfoEnv.new(@opt.fns)
    f,u={},{}
    if @fnb.empty? \
    or @fnb.nil?
      p 'file output path error' #remove
    end
    if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true
      f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}',"
    end
    if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_segtoc}")==true
      f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_segtoc}',"
    end
    if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true
      f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}',"
    end
    if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true
      f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}',"
    end
    if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true
      f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}',"
    end
    if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true
      f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}',"
    end
    if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true
      f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}',"
    end
    if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true
      f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}',"
    end
    if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\
    #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true
      f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}',"
    end
    if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\
    #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true
      f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}',"
    end
    if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true
      f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}',"
    end
    #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
    #  f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
    #end
    ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true
    ##  f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex',"
    ##end
    #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
    #  f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
    #end
    ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true
    ##  f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex',"
    ##end
    if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true
      f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}',"
    end
    if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html
      f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}',"
    end
    if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true
      f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}',"
    end
    if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true
      f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}',"
    end
    t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint)
    tuple=t.tuple
  rescue
    SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
      __LINE__.to_s + ':' + __FILE__
    end
  ensure
  end
  tuple
end
endnotes(txt) click to toggle source
# File lib/sisu/db_import.rb, line 740
def endnotes(txt)
  @txt=txt
  def extract_any
    if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
      endnotes(@txt).range
      @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
      @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
      @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
      @txt=endnotes(@txt).clean_text
    end
    @txt
  end
  def standard
    (@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
    ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
    : nil
  end
  def asterisk
    (@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
    ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
    : nil
  end
  def plus
    (@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
    ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
    : nil
  end
  def clean_text(base_url=nil)
    @txt=if base_url
      @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
        gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
        gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
    else
      @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'<sup>\1</sup>').
        gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>').
        gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
    end
    @txt
  end
  def range
    @col[:en_a]=@col[:en_z]=nil
    if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/
      word_array=@txt.scan(/\S+/)
      word_array.each do |w|
        if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/]                                                # not tested since change 2003w31
          @col[:en_a]=$1 unless @col[:en_a]
          @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
          @col[:en_z]=$1 if @col[:en_a]
        end
      end
    end
    @col
  end
  self
end
extract_any() click to toggle source
# File lib/sisu/db_import.rb, line 742
def extract_any
  if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
    endnotes(@txt).range
    @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
    @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
    @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
    @txt=endnotes(@txt).clean_text
  end
  @txt
end
marshal_load() click to toggle source
# File lib/sisu/db_import.rb, line 121
def marshal_load
  require_relative 'ao'                               # ao.rb
  @ao_array=SiSU_AO::Source.new(@opt).get             # ao file drawn here
  if (@opt.act[:verbose][:set]==:on \
  || @opt.act[:verbose_plus][:set]==:on \
  || @opt.act[:maintenance][:set]==:on)
    SiSU_Screen::Ansi.new(
      @opt.act[:color_state][:set],
      "#{@db.psql.db}::#{@opt.fns}"
    ).puts_blue
  end
  SiSU_Screen::Ansi.new(
    @opt.act[:color_state][:set],
    'Marshal Load',
    @fnc
  ).puts_grey if @opt.act[:verbose][:set]==:on
  select_first_match=%{
    SELECT metadata_and_text.tid
    FROM metadata_and_text
    WHERE metadata_and_text.src_filename = '#{@md.fns}'
    AND metadata_and_text.language_document_char = '#{@opt.lng}'
  ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns
  file_exist=if @sql_type==:sqlite
    begin
      @conn.get_first_value(select_first_match)
    rescue SQLite3::Exception => e
      # not tested
      puts "Exception occurred"
      puts e
      SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark(
        "\n" \
        + 'Attempting to initialize db' + "\n" \
        + 'Creating db tables'
      )
      sdb={
        create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type),
        index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type),
      }
      db_action(sdb).create
    end
  else
    begin
      @conn.exec(select_first_match).field_values("tid")[0]
    rescue PG::Error => e
      err=[
        e.result.error_field( PG::Result::PG_DIAG_SEVERITY ),
        e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ),
        e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ),
        e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ),
        e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ),
        e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ),
        e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ),
        e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ),
        e.result.error_field( PG::Result::PG_DIAG_CONTEXT ),
        e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ),
        e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ),
        e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ),
      ]
      p err
      if err[2] =~/relation "\S+?" does not exist/ \
      or err.inspect =~/relation "\S+?" does not exist/
        SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark(
          "\n" \
          + err[2] + "\n" \
          + 'Attempting to initialize db' + "\n" \
          + 'Creating db tables'
        )
        sdb={
          create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type),
          index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type),
        }
        db_action(sdb).create
        retry
      end
    end
  end
  if not file_exist
    t_d=[]                                                              # transaction_data
    t_d << db_import_metadata
    t_d << db_import_documents(@ao_array)
    t_d << db_import_urls(@ao_array,@fnc)                              #import OID on/off
    t_d=t_d.flatten
    if (@opt.act[:verbose_plus][:set]==:on \
    || @opt.act[:maintenance][:set]==:on)
      puts @conn.class if defined? @conn.class
      puts @conn.driver_name if defined? @conn.driver_name
      puts @conn.driver if defined? @conn.driver
    end
    begin                                                               #% sql
      if @sql_type==:sqlite
        @conn.transaction do |conn|
          t_d.each do |sql|
            conn.execute(sql)
          end
        end
        #also 'execute' works for sqlite
        #@conn.execute("BEGIN")
        #  t_d.each do |sql|
        #    @conn.execute(sql)
        #  end
        #@conn.execute("COMMIT")
      else
        #'do' works for postgresql
        @conn.exec("BEGIN")
          t_d.each do |sql|
            @conn.exec(sql)
          end
        @conn.exec("COMMIT")
      end
    rescue
      SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
        __LINE__.to_s + ':' + __FILE__
      end
      sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql"
      sql=File.new(sqlfn,'w')
      t_d.each {|i| sql.puts i}
      p sqlfn
      if @opt.act[:maintenance][:set]==:on
        puts sql
        p @conn.methods.sort
        puts "#{__FILE__}:#{__LINE__}"
      end
    ensure
    end
  else
    if file_exist
      @db=SiSU_Env::InfoDb.new
      puts "\nfile #{@opt.fns} in language code #{@opt.lng} already exists in database #{@db.psql.db}, use --update instead?"
    end
  end
end
pf_db_import_transaction_close() click to toggle source
# File lib/sisu/db_import.rb, line 254
def pf_db_import_transaction_close
end
pf_db_import_transaction_open() click to toggle source
# File lib/sisu/db_import.rb, line 252
def pf_db_import_transaction_open
end
plus() click to toggle source
# File lib/sisu/db_import.rb, line 762
def plus
  (@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
  ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
  : nil
end
range() click to toggle source
# File lib/sisu/db_import.rb, line 779
def range
  @col[:en_a]=@col[:en_z]=nil
  if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/
    word_array=@txt.scan(/\S+/)
    word_array.each do |w|
      if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/]                                                # not tested since change 2003w31
        @col[:en_a]=$1 unless @col[:en_a]
        @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
        @col[:en_z]=$1 if @col[:en_a]
      end
    end
  end
  @col
end
standard() click to toggle source
# File lib/sisu/db_import.rb, line 752
def standard
  (@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
  ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
  : nil
end