class SiSU_DbText::Prepare

Public Instance Methods

clean_document_objects_body(arr) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 84
def clean_document_objects_body(arr)
  en=[]
  arr=(arr.is_a?(String)) ? [ arr ] : arr
  txt_arr=arr.each.map do |s|
    en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
    s=s.
      gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,
        '<sup>\1</sup>').
      gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
      gsub(/ \s+/m,' ')
    s
  end
  en_arr=en.flatten.each.map do |e|
    e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
  end
  txt_arr=txt_arr << en_arr
  txt=txt_arr.flatten.join("\n<br>")
  special_character_escape(txt)
end
clean_searchable_text_from_document_objects(arr) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 66
def clean_searchable_text_from_document_objects(arr)
  en=[]
  arr=(arr.is_a?(String)) ? [ arr ] : arr
  txt_arr=arr.each.map do |s|
    s=s.gsub(/#{Mx[:fa_o]}[a-z]{1,4}#{Mx[:fa_o_c]}/m,'').
        gsub(/#{Mx[:fa_c_o]}[a-z]{1,4}#{Mx[:fa_c]}/m,'').
        gsub(/<br>/m,' ')
    en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
    s=s.gsub(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m,'').
      gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
      gsub(/ \s+/m,' ')
    #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
    s
  end
  txt_arr=txt_arr << en
  txt=txt_arr.flatten.join("\n")
  special_character_escape(txt)
end
clean_searchable_text_from_document_source(arr) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 103
def clean_searchable_text_from_document_source(arr)
  txt_arr,en=[],[]
  arr=(arr.is_a?(String)) ? arr.split(/\n+/m) : arr
  arr.each do |s|
    s=s.gsub(/([*\/_-])\{(.+?)\}\1/m,'\2').
      gsub(/^(?:block|group|poem|code)\{/m,'').
      gsub(/^\}(?:block|group|poem|code)/m,'').
      gsub(/\A(?:@\S+:\s+.+)\Z/m,'')
    if s =~/^:A~/
      if defined? @md.creator \
      and defined? @md.creator.author \
      and not @md.creator.author.empty?
        s=s.gsub(/@author/,@md.creator.author)
      else
        SiSU_Screen::Ansi.new(
          'v',
          'WARNING Document Author information missing; provide @creator: :author:',
          @md.fnb
        ).warn unless @md.opt.act[:quiet][:set]==:on
      end
      if defined? @md.title \
      and defined? @md.title.full \
      and not @md.title.full.empty?
        s=s.gsub(/@title/,@md.title.full)
      else
        SiSU_Screen::Ansi.new(
          'v',
          'WARNING Document Title missing; provide @title:',
          @md.fnb
        ).warn unless @md.opt.act[:quiet][:set]==:on
      end
    end
    s=s.gsub(/^(?:_[1-9]\*?|_\*)\s+/m,'').
      gsub(/^(?:[1-9]\~(\S+)?)\s+/m,'').
      gsub(/^(?::?[A-C]\~(\S+)?)\s+/m,'').
      gsub(/^%{1,3} .+/m,''). #removed even if contained in code block
      gsub(/<br>/m,' ')
    #en << s.scan(/~\{\s*(.+?)\s*\}~/m)
    s=s.gsub(/~\{.+?\}~/m,'').
      gsub(/ \s+/m,' ')
    ##special_character_escape(s)
    #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
    s
  end
  txt_arr << arr << en
  txt=txt_arr.flatten.join("\n")
  txt=special_character_escape(txt)
  txt
end
special_character_escape(str) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 57
def special_character_escape(str)
  str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
    gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
    gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
    gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
    gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/m,'[image: \1] \2').
    gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/m,'\1\2').
    gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/m,'\1')
end
strip_markup(str) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 152
def strip_markup(str) #define rules, make same as in dal clean
  str=str.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
    gsub(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ').
    gsub(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1').         #tables
    gsub(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ').                          #tables
    gsub(/#{Mx[:tc_p]}/u,' ').                                                     #tables tidy later
    gsub(/<.+?>/,'').
    gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] '). # else image names found in search
    gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]'). # else image names found in search
    gsub(/\s\s+/,' ').
    strip
end
unique_words(str) click to toggle source
# File lib/sisu/db_sqltxt.rb, line 164
def unique_words(str)
  a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/)
  str=a.uniq.sort.join(' ')
  str
end