class SiSU_TeX_Pdf::SpecialCharacters
Public Class Methods
new(md,str,is=:default)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1233 def initialize(md,str,is=:default) @md,@txt,@is=md,str,is @tex2pdf=@@tex3pdf ||=SiSU_Env::SystemCall.new.tex2pdf_engine end
Public Instance Methods
characters_code_listings()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1471 def characters_code_listings # special characters - some substitutions are sequence sensitive, rearrange with care str,is=@txt,@is str=xetex_code_listings(str,is) unless str.nil? @txt=str end
special_characters()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1438 def special_characters # special characters - some substitutions are sequence sensitive, rearrange with care str,is=@txt,@is str=xetex_special_characters_1(str,is) unless str.nil? str=special_characters_unsafe_1(str) unless str.nil? #xetex_special_characters_unsafe_1(@txt) str=xetex_special_characters_2(str,is) unless str.nil? #issues with xetex str=xetex_special_characters_3(str) unless str.nil? @txt=str end
special_characters_code()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1476 def special_characters_code str=@txt str=str.gsub(/ \\\\([ #{Mx[:br_nl]}]+|$)/,' \textbackslash\textbackslash\hardspace\1') str end
special_characters_code_fix(str)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1429 def special_characters_code_fix(str) str=str.gsub(/<=tilde>/,'{$\tilde$}') str end
special_characters_safe()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1457 def special_characters_safe # special characters - some substitutions are sequence sensitive, rearrange with care str,is=@txt,@is str=xetex_special_characters_1(str,is) unless str.nil? str=xetex_special_characters_2(str,is) unless str.nil? # remove this to start with, causes issues str=special_characters_safe_close(str) unless str.nil? @txt=str end
special_characters_safe_close(str)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1422 def special_characters_safe_close(str) str=str.gsub(/<=tilde>/,'{$\tilde$}'). gsub(/<=hash>/,'{\#}'). gsub(/<=amp>/,'{\\\&}'). #changed ... 2005 gsub(/<=copymark>\s*(.+)/, '^\copyright \textnormal{\1} \2') # watch likely to be problematic end
special_characters_safe_no_urls()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1464 def special_characters_safe_no_urls str,is=@txt,:no_urls str=xetex_special_characters_1(str,is) unless str.nil? str=xetex_special_characters_2(str,is) unless str.nil? # remove this to start with, causes issues str=special_characters_safe_close(str) unless str.nil? @txt=str end
special_characters_unsafe_1(str)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1433 def special_characters_unsafe_1(str) #depreciated, make obsolete # some substitutions are sequence sensitive, rearrange with care. str=str.gsub(/\\textbackslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar ! str end
special_number_break_points()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1452 def special_number_break_points str=@txt str=str.gsub(/([0-9a-f]{8})/i,'\1\-') @txt=str end
special_word_break_points()
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1446 def special_word_break_points str=@txt str=str.gsub(/([_,.;:\/|=])/,'\1\-'). gsub(/(--)(\S{4,})/,'\1\-\2') @txt=str end
xetex_code_listings(str,is=:default)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1237 def xetex_code_listings(str,is=:default) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/ para_array=[] str=if word word.each do |w| # _ - / # | : ! ^ ~ w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>'). gsub(/[\\]?~/,'~'). gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,"\n"). #watch gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~'). #126 usual gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|') #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX para_array << w end str=para_array.join(' ') str=str.strip unless is==:code str else '' end str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' '). gsub(/.+?<-#>/,''). gsub(/#{Mx[:br_eof]}/,''). gsub(/#{Mx[:br_endnotes]}/,''). #problem sequence -> gsub(/&(?:lt|#060);/,'<'). # < SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'>'). # > SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). # { SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). # } SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'~'). # ~ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). # SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). # ! SiSU not really special sisu character but done, also LaTeX #gsub(/(^|\s)\*\s/,'\1\asterisk '). # * should you wish to escape astrisk e.g. describing \*{bold}* gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). # * should you wish to escape astrisk e.g. describing \*{bold}* gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). # - SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+'). # + SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,','). # + SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'&'). #unless @txt=~/<:code>/ # / SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). # / SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\'). # \ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). # _ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|'). # | SiSU not really special sisu character but done, also LaTeX gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':'). # : SiSU not really special sisu character but done, also LaTeX gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'^'). # ^ SiSU not really special sisu character but done, also LaTeX ##watch placement, problem sequence ^ gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' '). gsub(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript end
xetex_special_characters_1(str,is=:default)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1282 def xetex_special_characters_1(str,is=:default) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list word=str.scan(/\S+|\n/) #unless line =~/^(?:@\S|%+\s)/ para_array=[] str=if word word.each do |w| # _ - / # | : ! ^ ~ if w !~/https?:/ \ and w=~/\/\S+?\// \ and w.length > 6 w=w.gsub(/([_.\/])/,'\1\-') end w=w.gsub(/#{Mx[:gl_o]}#lt#{Mx[:gl_c]}/,'<').gsub(/#{Mx[:gl_o]}#gt#{Mx[:gl_c]}/,'>'). gsub(/[\\]?~/,'<=tilde>'). gsub(/[#{Mx[:br_line]}#{Mx[:br_paragraph]}]/,' \newline '). #watch gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>'). #126 usual gsub(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'\pipe') #unless is=='code' #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX if w !~/#{Mx[:rel_o]}/ \ and w !~/#{Mx[:gl_o]}#/ w=w.gsub(/\#/,'<=hash>') end para_array << w end str=para_array.join(' ') str=str.strip unless is==:code str else '' end str=str.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' '). gsub(/.+?<-#>/,''). gsub(/#{Mx[:br_eof]}/,''). gsub(/#{Mx[:br_endnotes]}/,'') #problem sequence -> str=str.gsub(/&(?:nbsp);|#{Mx[:nbsp]}/,'\hardspace') unless is==:code # < SiSU special character also LaTeX str=str.gsub(/&(?:lt|#060);/,'\lt'). # < SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'\gt'). # > SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'\curlyopen'). # { SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'\curlyclose'). # } SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>'). # ~ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'\#'). # # SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). # ! SiSU not really special sisu character but done, also LaTeX gsub(/(^|\s)\*\s/,'\1\asterisk '). # * should you wish to escape astrisk e.g. describing \*{bold}* gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'\*'). # * should you wish to escape astrisk e.g. describing \*{bold}* gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). # - SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+'). # + SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,','). # + SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'<=amp>'). #unless @txt=~/<:code>/ # / SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'\slash'). # / SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\textbackslash'). # \ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'<=underscore>'). # _ SiSU special character also LaTeX gsub(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|'). # | SiSU not really special sisu character but done, also LaTeX gsub(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':'). # : SiSU not really special sisu character but done, also LaTeX gsub(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'\caret'). # ^ SiSU not really special sisu character but done, also LaTeX ##watch placement, problem sequence ^ gsub(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' '). gsub(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript end
xetex_special_characters_2(str,is=:default)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1337 def xetex_special_characters_2(str,is=:default) str=str.gsub(/#{Mx[:gl_o]}#156#{Mx[:gl_c]}/,'\oe '). gsub(/\$/,'\$'). gsub(/\#/,'\#'). gsub(/\%/,'\%'). gsub(/\~/,'\~') #revist, should not be necessary to mark remaining tildes if str !~/^\s*#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}image\s/ str=str.gsub(/_/,'\_') end str=str.gsub(/\{/,'\{'). gsub(/\}/,'\}') str=if is==:code str.gsub(/&/,'{\\\&}'). gsub(/\\~(\\\{)/,'{$\tilde$}\1'). gsub(/(\\\})\\~/,'\1{$\tilde$}'). gsub(/\\~(\[)/,'{$\tilde$}\1'). gsub(/(\])\\~/,'\1{$\tilde$}'). gsub(/<=tilde>/,'{$\tilde$}'). gsub(/<=hash>/,'{\#}') else str.gsub(/ |#{Mx[:nbsp]}/,'~'). # ~ character for hardspace gsub(/&/,'<=amp>') end str=str.gsub(/&\S+?;/,' '). gsub(/§/u,'\S'). #latex: space between next character not preserved? #str.gsub(/§ /,'\S ') gsub(/£/u,'\pounds'). gsub(/<a href=".+?">/,' '). gsub(/<\/a>/,' ') unless is==:no_urls str=str.gsub(/((?:^|\s)#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, '\1\begin{scriptsize}\url{\2}\end{scriptsize}\3'). #special case \{ e.g. \}http://url gsub(/#{Mx[:url_o]}\\_(\S+?)#{Mx[:url_c]}/, '\begin{scriptsize}\url{\1}\end{scriptsize}'). #special case \{ e.g. \}http://url gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, '\begin{scriptsize}\\url{\1}\end{scriptsize}') #specially escaped url no decoration end if is !=:code \ and is !=:no_urls str=str.gsub(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"\s]+?)([;.,]?(?=\s|$))/, "\\1#{url_decoration.tex_open}\\begin{scriptsize}\\url{\\2}\\end{scriptsize}#{url_decoration.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start end str=str.gsub(/<:ee>/,''). gsub(/<!>/,' '). #proposed change, insert, but may be redundant gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{Tex[:backslash]*2} "). # Work Area gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\begin{bfseries}\1 \end{bfseries}'). gsub(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}'). gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\emph{\1}'). gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\uline{\1}'). # ulem gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,"``\\1''"). # quote #CHECK gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\uline{\1}'). # ulem gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\sout{\1}'). # ulem gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,"\$^{\\textrm{\\1}}\$"). gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,"\$_{\\textrm{\\1}}\$"). gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\begin{monosp}\1\end{monosp}') unless is==:code str=str.gsub(/"(.+?)"/,'“\1”'). # quote marks / quotations open & close " need condition exclude for code gsub(/\s+"/,' “'). # open " gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*"/,'\1“'). #fix Mx[:lv_o] # open " gsub(/"(\s|\.|,|:|;)/,'”\1'). # close " gsub(/"(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*$/,'”\1'). #fix Mx[:lv_o] # close " gsub(/"(\.|,)/,'”'). # close " gsub(/\s+'/,' `'). # open ' gsub(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*'/,'\1`') #fix Mx[:lv_o] # open ' end str=str.gsub(/(<font.*?>|<\/font>)/,''). gsub(/\s*#{Mx[:fa_superscript_o]}(\S+?)#{Mx[:fa_superscript_c]}/,'^\1') str end
xetex_special_characters_3(str)
click to toggle source
# File lib/sisu/texpdf_format.rb, line 1405 def xetex_special_characters_3(str) str=str.gsub(/<br(\s*[^\/][^>])/,'\1'). # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder gsub(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder while str =~/(https?:\/\/\S+?)(?:<=tilde>\S+)+/ #tilde in urls \href treated differently from text #FIX str=str.gsub(/(https?:\/\/\S+?)(?:<=tilde>(\S+))+/,'\1~\2') end str=str.gsub(/<=tilde>/,'{$\tilde$}'). gsub(/(https?:\/\/\S+?)(?:(?:<=hash>)(\S+))+/,'\1#\2'). #hash in urls \href treated differently from text #FIX gsub(/<=hash>/,'{\#}') while str =~/(https?:\/\/\S+?)(?:<=amp>\S+)+/ #amp in urls \href treated differently from text #FIX str=str.gsub(/(https?:\/\/\S+?)(?:<=amp>(\S+))+/,'\1&\2') end str=str.gsub(/<=amp>/,'{\\\&}'). #changed ... 2005 gsub(/<=copymark>\s*(.+)/, '^\copyright \textnormal{\1} \2') # watch likely to be problematic str end