class HwpScriptToLatex::Converter
한글 수식스크립트를 LaTeX 문법으로 변환
Constants
- LEFT_TERM_REGEX
명령어에 사용되는 좌, 우항 정규표현식
- RIGHT_TERM_REGEX
Public Class Methods
new()
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 14 def initialize @processor = Processor.new @keyword_commands = KEYWORD_COMMANDS @default_commands = DEFAULT_COMMANDS @block_commands = BLOCK_COMMANDS @meta = META.sort_by { |hash| -1 * hash[:regex].inspect.length } @symbols = SYMBOL.sort_by { |hash| -1 * hash[:regex].inspect.length } @reserved_words = RESERVED_WORD.sort_by { |hash| -1 * hash[:regex].inspect.length } end
Public Instance Methods
convert(script, math_mode: false, display_mode: false)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 25 def convert(script, math_mode: false, display_mode: false) # Data pre processing result = @processor.pre_process(script) # 파라미터가 없는 명령어 result = replace_keyword_commands(result) # 1개의 우항을 가지는 명령어 result = replace_default_commands(result) # 행렬, 케이스등 블록 명령어 # cases {...} => \begin{cases}...\end{cases} # dmatrix {...} => \begin{vmatrix}...\end{vmatrix} # bmatrix {...} => \begin{Bmatrix}...\end{Bmatrix} # pmatrix {...} => \begin{pmatrix}...\end{pmatrix} # matrix {...} => \begin{matrix}...\end{matrix} result = replace_block_commands(result) # 특수 형태의 명령어 치환 # Case 1. 루트 변환 # sqrt A => \sqrt {A} # sqrt A of B => \sqrt [A]{B} # Case 2. 분수 변환 # A over B => \dfrac {A}{B} result = replace_sqrt(result) # Case 1 result = replace_fractions(result) # Case 2 # 단순 치환 키워드 result = replace_keywords(result) # 전체 수식에 디스플레이 스타일 적용 result = decorate_displaystyle(result) if display_mode # Data post processing result = @processor.post_process(result) # Math mode result = "$#{result}$" if math_mode return result end
Private Instance Methods
decorate_displaystyle(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 211 def decorate_displaystyle(script) script = script.gsub(/\\sum/, "\\displaystyle \\sum") script = script.gsub(/\\int/, "\\displaystyle \\int") script = script.gsub(/\\oint/, "\\displaystyle \\oint") return script end
remove_curly_brackets(term)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 219 def remove_curly_brackets(term) return term.gsub(/(^{)|(}$)/, '') end
replace_block_commands(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 114 def replace_block_commands(script) @block_commands.each do |command| command_regex = %r((?<![a-zA-Z])(?i:#{command[:regex]})\s*(?<block_content>{(?>[^{}]+|(?:\g<block_content>))*})) match_data = script.match(command_regex) while match_data # 시작, 끝 중괄호 제거 block_content = remove_curly_brackets(match_data['block_content']) # sub 메서드에 블록 문법을 사용한 이유: # gsub, sub에서 백슬래시 문제 script = script.sub(command_regex) { " \\begin{#{command[:latex]}} %s \\end{#{command[:latex]}} " % block_content } match_data = script.match(command_regex) end end return script end
replace_default_commands(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 94 def replace_default_commands(script) right_term_group_name = "rt" @default_commands.each do |command| right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name] command_regex = %r((?<![a-zA-Z\\])(?i:#{command[:regex]})\s*#{right_term_regex}) match_data = script.match(command_regex) while match_data right_term = remove_curly_brackets(match_data[right_term_group_name]) script = script.sub(command_regex, " #{command[:latex]} {%{#{right_term_group_name}}} " % { "#{right_term_group_name}".to_sym => right_term }) match_data = script.match(command_regex) end end return script end
replace_fractions(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 160 def replace_fractions(script) left_term_group_name = "lt" right_term_group_name = "rt" left_term_regex = LEFT_TERM_REGEX % [left_term_group_name, left_term_group_name] right_term_regex = RIGHT_TERM_REGEX % [right_term_group_name, right_term_group_name] fraction_regex = %r(#{left_term_regex}\s*(?<!\\)(?i:over(?!line)|atop)\s*#{right_term_regex}) match_data = script.match(fraction_regex) while match_data matched_groups = Hash[match_data.names.map(&:to_sym).zip( match_data.captures.map {|term| remove_curly_brackets(term) } )] script = script.sub(fraction_regex, " \\dfrac {%{lt}}{%{rt}} " % matched_groups) match_data = script.match(fraction_regex) end return script end
replace_keyword_commands(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 66 def replace_keyword_commands(script) commands = @keyword_commands matched_count = 0 # 1star(2star(3starcdotsstar(99star100))cdots) # 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에 # 제대로 변환되지 않음. # 그러므로 전체 키워드를 검사하는 루프(loop 2)를 # 이중 루프로 변환이 완료될 때 까지 반복함(loop 1) # 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함 loop do # loop 1 matched_count = 0 commands.each do |command| command_regex = rule_regex(command) before_script = script script = script.gsub(command_regex, command[:latex]) while before_script != script # loop 2 matched_count += 1 before_script = script script = script.gsub(command_regex, command[:latex]) end end break if matched_count == 0 end return script end
replace_keywords(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 132 def replace_keywords(script) keywords = @meta + @symbols + @reserved_words matched_count = 0 # 1star(2star(3starcdotsstar(99star100))cdots) # 위와 같은 경우 이중 루프 cdots 부터 매치되기 시작하면 star 때문에 # 제대로 변환되지 않음. # 그러므로 전체 키워드를 검사하는 루프(loop 2)를 # 이중 루프로 변환이 완료될 때 까지 반복함(loop 1) # 속도는 느려지겠지만 대안을 찾을 때 까지 이대로 사용하려고 함 loop do # loop 1 matched_count = 0 keywords.each do |keyword| keyword_regex = rule_regex(keyword) before_script = script script = script.gsub(keyword_regex, keyword[:latex]) while before_script != script # loop 2 matched_count += 1 before_script = script script = script.gsub(keyword_regex, keyword[:latex]) end end break if matched_count == 0 end return script end
replace_sqrt(script)
click to toggle source
# File lib/hwp_script_to_latex/converter.rb, line 179 def replace_sqrt(script) right_term_name1 = 'rt1' right_term_name2 = 'rt2' right_term_regex1 = RIGHT_TERM_REGEX % [right_term_name1, right_term_name1] right_term_regex2 = RIGHT_TERM_REGEX % [right_term_name2, right_term_name2] sqrt_regex = %r((?<!\\)(?i:sqrt|root)\s*#{right_term_regex1}(\s*(?i:of)\s*#{right_term_regex2})?) match_data = script.match(sqrt_regex) while match_data has_right_term2 = !!match_data[right_term_name2] # 첫 번째 항 중괄호 제거 right_term_content1 = remove_curly_brackets(match_data[right_term_name1]) if has_right_term2 # 두 번째 항 중괄호 제거 right_term_content2 = remove_curly_brackets(match_data[right_term_name2]) end script = script.sub(sqrt_regex) do if has_right_term2 " \\sqrt [%s]{%s} " % [right_term_content1, right_term_content2] else " \\sqrt {%s} " % right_term_content1 end end match_data = script.match(sqrt_regex) end return script end