class SiSU_TeX_Pdf::SpecialCharacters

Public Class Methods

new(md,str,is=:default) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1233
def initialize(md,str,is=:default)
  @md,@txt,@is=md,str,is
  @tex2pdf=@@tex3pdf ||=SiSU_Env::SystemCall.new.tex2pdf_engine
end

Public Instance Methods

characters_code_listings() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1471
def characters_code_listings                                             # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_code_listings(str,is) unless str.nil?
  @txt=str
end
special_characters() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1438
def special_characters                                                   # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=special_characters_unsafe_1(str) unless str.nil? #xetex_special_characters_unsafe_1(@txt)
  str=xetex_special_characters_2(str,is) unless str.nil? #issues with xetex
  str=xetex_special_characters_3(str) unless str.nil?
  @txt=str
end
special_characters_code() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1476
def special_characters_code
  str=@txt
  str=str.gsub(/ \\\\([ #{Mx[:br_nl]}]+|$)/,' \textbackslash\textbackslash\hardspace\1')
  str
end
special_characters_code_fix(str) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1429
def special_characters_code_fix(str)
  str=str.gsub(/<=tilde>/,'{$\tilde$}')
  str
end
special_characters_safe() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1457
def special_characters_safe                                              # special characters - some substitutions are sequence sensitive, rearrange with care
  str,is=@txt,@is
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=xetex_special_characters_2(str,is) unless str.nil?                 # remove this to start with, causes issues
  str=special_characters_safe_close(str) unless str.nil?
  @txt=str
end
special_characters_safe_close(str) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1422
def special_characters_safe_close(str)
  str=str.gsub(/<=tilde>/,'{$\tilde$}').
    gsub(/<=hash>/,'{\#}').
    gsub(/<=amp>/,'{\\\&}'). #changed ... 2005
    gsub(/<=copymark>\s*(.+)/,
      '^\copyright \textnormal{\1} \2') # watch likely to be problematic
end
special_characters_safe_no_urls() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1464
def special_characters_safe_no_urls
  str,is=@txt,:no_urls
  str=xetex_special_characters_1(str,is) unless str.nil?
  str=xetex_special_characters_2(str,is) unless str.nil? # remove this to start with, causes issues
  str=special_characters_safe_close(str) unless str.nil?
  @txt=str
end
special_characters_unsafe_1(str) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1433
def special_characters_unsafe_1(str) #depreciated, make obsolete
  # some substitutions are sequence sensitive, rearrange with care.
  str=str.gsub(/\\textbackslash (copyright|clearpage|newpage)/,"\\\\\\1")  #kludge bad solution, find out where tail is sent through specChar !
  str
end
special_number_break_points() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1452
def special_number_break_points
  str=@txt
  str=str.gsub(/([0-9a-f]{8})/i,'\1\-')
  @txt=str
end
special_word_break_points() click to toggle source
# File lib/sisu/texpdf_format.rb, line 1446
def special_word_break_points
  str=@txt
  str=str.gsub(/([_,.;:\/|=])/,'\1\-').
    gsub(/(--)(\S{4,})/,'\1\-\2')
  @txt=str
end
xetex_code_listings(str,is=:default) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1237
def xetex_code_listings(str,is=:default)                                 # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list
  word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/
  para_array=[]
  str=if word
    word.each do |w| # _ - / # | : ! ^ ~
      w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>').
        gsub(/[\\]?~/,'~').
        gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,"\n").              #watch
        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~').                #126 usual
        gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|')                   #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX
      para_array << w
    end
    str=para_array.join(' ')
    str=str.strip unless is==:code
    str
  else ''
  end
  str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ').
    gsub(/.+?<-#>/,'').
    gsub(/#{Mx[:br_eof]}/,'').
    gsub(/#{Mx[:br_endnotes]}/,'').
  #problem sequence ->
    gsub(/&(?:lt|#060);/,'<').                                           # < SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'>').                     # > SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').                            # { SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').                            # } SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~').                    # ~ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').                            # SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').                            # ! SiSU not really special sisu character but done, also LaTeX
   #gsub(/(^|\s)\*\s/,'\1\asterisk ').                                   # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').                            # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').                            # - SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'&').                            #unless @txt=~/<:code>/  # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').                            # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\').                           # \ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').                            # _ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|').                            # | SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':').                            # : SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'^').                         # ^ SiSU not really special sisu character but done, also LaTeX
  ##watch placement, problem sequence ^
    gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ').
    gsub(/\\copy(right|mark)?/,'<=copymark>')                            # ok problem with superscript
end
xetex_special_characters_1(str,is=:default) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1282
def xetex_special_characters_1(str,is=:default)                          # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list
  word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/
  para_array=[]
  str=if word
    word.each do |w| # _ - / # | : ! ^ ~
      if w !~/https?:/ \
      and w=~/\/\S+?\// \
      and w.length > 6
        w=w.gsub(/([_.\/])/,'\1\-')
      end
      w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>').
        gsub(/[\\]?~/,'<=tilde>').
        gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,' \newline ').      #watch
        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>').         #126 usual
        gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'\pipe')               #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX
      if w !~/#{Mx[:rel_o]}/ \
      and w !~/#{Mx[:gl_o]}#/
        w=w.gsub(/\#/,'<=hash>')
      end
      para_array << w
    end
    str=para_array.join(' ')
    str=str.strip unless is==:code
    str
  else ''
  end
  str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ').
    gsub(/.+?<-#>/,'').
    gsub(/#{Mx[:br_eof]}/,'').
    gsub(/#{Mx[:br_endnotes]}/,'')
  #problem sequence ->
  str=str.gsub(/&(?:nbsp);|#{Mx[:nbsp]}/,'\hardspace') unless is==:code  # < SiSU special character also LaTeX
  str=str.gsub(/&(?:lt|#060);/,'\lt').                                   # < SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'\gt').                   # > SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'\curlyopen').                   # { SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'\curlyclose').                  # } SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>').             # ~ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'\#').                           # # SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').                            # ! SiSU not really special sisu character but done, also LaTeX
    gsub(/(^|\s)\*\s/,'\1\asterisk ').                                   # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'\*').                           # * should you wish to escape astrisk e.g. describing \*{bold}*
    gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').                            # - SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',').                            # + SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'<=amp>'). #unless @txt=~/<:code>/  # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'\slash').                       # / SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\textbackslash').               # \ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'<=underscore>').                # _ SiSU special character also LaTeX
    gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|').                            # | SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':').                            # : SiSU not really special sisu character but done, also LaTeX
    gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'\caret').                    # ^ SiSU not really special sisu character but done, also LaTeX
  ##watch placement, problem sequence ^
    gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ').
    gsub(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript
end
xetex_special_characters_2(str,is=:default) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1337
def xetex_special_characters_2(str,is=:default)
  str=str.gsub(/#{Mx[:gl_o]}#156#{Mx[:gl_c]}/,'\oe ').
    gsub(/\$/,'\$').
    gsub(/\#/,'\#').
    gsub(/\%/,'\%').
    gsub(/\~/,'\~') #revist, should not be necessary to mark remaining tildes
  if str !~/^\s*#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}image\s/
    str=str.gsub(/_/,'\_')
  end
  str=str.gsub(/\{/,'\{').
    gsub(/\}/,'\}')
  str=if is==:code
    str.gsub(/&/,'{\\\&}').
      gsub(/\\~(\\\{)/,'{$\tilde$}\1').
      gsub(/(\\\})\\~/,'\1{$\tilde$}').
      gsub(/\\~(\[)/,'{$\tilde$}\1').
      gsub(/(\])\\~/,'\1{$\tilde$}').
      gsub(/<=tilde>/,'{$\tilde$}').
      gsub(/<=hash>/,'{\#}')
  else
    str.gsub(/&nbsp;|#{Mx[:nbsp]}/,'~'). # ~ character for hardspace
      gsub(/&/,'<=amp>')
  end
  str=str.gsub(/&\S+?;/,' ').
    gsub(/§/u,'\S'). #latex: space between next character not preserved? #str.gsub(/§ /,'\S ')
    gsub(/£/u,'\pounds').
    gsub(/<a href=".+?">/,' ').
    gsub(/<\/a>/,' ')
  unless is==:no_urls
    str=str.gsub(/((?:^|\s)#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
        '\1\begin{scriptsize}\url{\2}\end{scriptsize}\3'). #special case \{ e.g. \}http://url
      gsub(/#{Mx[:url_o]}\\_(\S+?)#{Mx[:url_c]}/,
        '\begin{scriptsize}\url{\1}\end{scriptsize}'). #special case \{ e.g. \}http://url
      gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
        '\begin{scriptsize}\\url{\1}\end{scriptsize}') #specially escaped url no decoration
  end
  if is !=:code \
  and is !=:no_urls
    str=str.gsub(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"\s]+?)([;.,]?(?=\s|$))/,
      "\\1#{url_decoration.tex_open}\\begin{scriptsize}\\url{\\2}\\end{scriptsize}#{url_decoration.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
  end
  str=str.gsub(/<:ee>/,'').
    gsub(/<!>/,' ').  #proposed change, insert, but may be redundant
    gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{Tex[:backslash]*2} "). # Work Area
    gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\begin{bfseries}\1 \end{bfseries}').
    gsub(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}').
    gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\emph{\1}').
    gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\uline{\1}'). # ulem
    gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,"``\\1''"). # quote #CHECK
    gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\uline{\1}'). # ulem
    gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\sout{\1}'). # ulem
    gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,"\$^{\\textrm{\\1}}\$").
    gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,"\$_{\\textrm{\\1}}\$").
    gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\begin{monosp}\1\end{monosp}')
  unless is==:code
    str=str.gsub(/"(.+?)"/,'“\1”').  # quote marks / quotations open & close " need condition exclude for code
      gsub(/\s+"/,' “').                                # open "
      gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*"/,'\1“'). #fix Mx[:lv_o] # open "
      gsub(/"(\s|\.|,|:|;)/,'”\1').                     # close "
      gsub(/"(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*$/,'”\1'). #fix Mx[:lv_o] # close "
      gsub(/"(\.|,)/,'”').                              # close "
      gsub(/\s+'/,' `').                                # open '
      gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*'/,'\1`') #fix Mx[:lv_o] # open '
  end
  str=str.gsub(/(<font.*?>|<\/font>)/,'').
    gsub(/\s*#{Mx[:fa_superscript_o]}(\S+?)#{Mx[:fa_superscript_c]}/,'^\1')
  str
end
xetex_special_characters_3(str) click to toggle source
# File lib/sisu/texpdf_format.rb, line 1405
def xetex_special_characters_3(str)
  str=str.gsub(/<br(\s*[^\/][^>])/,'\1'). # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
    gsub(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder
  while str =~/(https?:\/\/\S+?)(?:<=tilde>\S+)+/ #tilde in urls \href treated differently from text #FIX
    str=str.gsub(/(https?:\/\/\S+?)(?:<=tilde>(\S+))+/,'\1~\2')
  end
  str=str.gsub(/<=tilde>/,'{$\tilde$}').
    gsub(/(https?:\/\/\S+?)(?:(?:<=hash>)(\S+))+/,'\1#\2'). #hash in urls \href treated differently from text #FIX
    gsub(/<=hash>/,'{\#}')
  while str =~/(https?:\/\/\S+?)(?:<=amp>\S+)+/ #amp in urls \href treated differently from text #FIX
    str=str.gsub(/(https?:\/\/\S+?)(?:<=amp>(\S+))+/,'\1&\2')
  end
  str=str.gsub(/<=amp>/,'{\\\&}'). #changed ... 2005
    gsub(/<=copymark>\s*(.+)/,
      '^\copyright \textnormal{\1} \2') # watch likely to be problematic
  str
end