module Tonkachi
Constants
- VERSION
Public Class Methods
free_rowspan_colspan(table_node)
click to toggle source
# File lib/tonkachi.rb, line 9 def self.free_rowspan_colspan(table_node) # free colspan table_node.css('tr').each{|tr| tr.css('th, td').each{|td| if td.has_attribute?('colspan') then colspan_count = td.attr('colspan').to_i td.delete('colspan') (colspan_count - 1).times{ td.next = td.dup } end } } # fill td col_count_max = 0 rowspan_count_array = Array.new table_node.css('tr').each{|tr| col_count = tr.css('th, td').length + rowspan_count_array.length if col_count_max < col_count then col_count_max = col_count end rowspan_count_array.map!{|rowspan_count| rowspan_count - 1 } rowspan_count_array.delete(0) tr.css('th, td').each{|td| if td.has_attribute?('rowspan') then rowspan_count = td.attr('rowspan').to_i - 1 unless rowspan_count == 0 then rowspan_count_array.push(rowspan_count) end end } } rowspan_count_array = Array.new table_node.css('tr').each{|tr| col_count = tr.css('th, td').length + rowspan_count_array.length lack_td_count = col_count_max - col_count lack_td_count.times{ tr.add_child('<td> </td>') } rowspan_count_array.map!{|rowspan_count| rowspan_count - 1 } rowspan_count_array.delete(0) tr.css('th, td').each{|td| if td.has_attribute?('rowspan') then rowspan_count = td.attr('rowspan').to_i - 1 unless rowspan_count == 0 then rowspan_count_array.push(rowspan_count) end end } } # free rowspan col_max_idx = table_node.at_css('tr').css('th, td').length - 1 (0..col_max_idx).each{|col_idx| rowspan = Hash.new if col_idx == col_max_idx then table_node.css('tr').each{|tr| if rowspan.has_key?(:count) then tr.css('th, td')[col_idx - 1].next = rowspan[:td].dup rowspan[:count] -= 1 if rowspan[:count] == 0 then rowspan = Hash.new end elsif tr.css('th, td')[col_idx]&.has_attribute?('rowspan') then rowspan_count = tr.css('th, td')[col_idx].attr('rowspan').to_i - 1 unless rowspan_count == 0 then rowspan[:count] = rowspan_count rowspan[:td] = tr.css('th, td')[col_idx] end tr.css('th, td')[col_idx].delete('rowspan') end } else table_node.css('tr').each{|tr| if rowspan.has_key?(:count) then if tr.css('th, td')[col_idx] then tr.css('th, td')[col_idx].previous = rowspan[:td].dup else tr.add_child(rowspan[:td].dup) end rowspan[:count] -= 1 if rowspan[:count] == 0 then rowspan = Hash.new end elsif tr.css('th, td')[col_idx]&.has_attribute?('rowspan') then rowspan_count = tr.css('th, td')[col_idx].attr('rowspan').to_i - 1 unless rowspan_count == 0 then rowspan[:count] = rowspan_count rowspan[:td] = tr.css('th, td')[col_idx] end tr.css('th, td')[col_idx].delete('rowspan') end } end } # return result return table_node end
get_css_path(node)
click to toggle source
# File lib/tonkachi.rb, line 141 def self.get_css_path(node) parent_nodes = [get_class_id(node)] parent_node = node.parent parent_nodes.push(get_class_id(parent_node)) while parent_node.name != 'html' do parent_node = parent_node.parent parent_nodes.push(get_class_id(parent_node)) end return insert_class_id(parent_nodes.reverse, node.css_path) end
get_nodes(node)
click to toggle source
# File lib/tonkachi.rb, line 134 def self.get_nodes(node) @nodes = Array.new node_with_flag = [node, false] dfs(node_with_flag) return @nodes end
transpose_nokogiri_table(table_node)
click to toggle source
# File lib/tonkachi.rb, line 113 def self.transpose_nokogiri_table(table_node) # init transpose table node transpose_table_node = Nokogiri::XML::Node.new('table', table_node) # prepare tr num_of_org_col = table_node.at_css('tr').css('th, td').length num_of_org_col.times{ transpose_table_node.add_child('<tr></tr>') } # pick up th and td from original table and put them in transpose table table_node.css('tr').each{|tr| tr.css('th, td').each_with_index{|td, col_idx| transpose_table_node.css('tr')[col_idx].add_child(td.dup) } } # return result return transpose_table_node end
Private Class Methods
dfs(node_with_flag)
click to toggle source
# File lib/tonkachi.rb, line 153 def self.dfs(node_with_flag) node_with_flag[1] = true @nodes.push(node_with_flag[0]) children = node_with_flag[0].children children_with_flag = children.map{|child| [child, false] } children_with_flag.each{|child_with_flag| unless child_with_flag[1] then dfs(child_with_flag) end } return nil end
get_class_id(node)
click to toggle source
# File lib/tonkachi.rb, line 168 def self.get_class_id(node) node_attributes = [node.name, {}] if node.attributes.has_key?('class') then node_attributes[1]['class'] = node.attributes['class'].value end if node.attributes.has_key?('id') then node_attributes[1]['id'] = node.attributes['id'].value end return node_attributes end
insert_class_id(parent_nodes, node_css_path)
click to toggle source
# File lib/tonkachi.rb, line 179 def self.insert_class_id(parent_nodes, node_css_path) return_node_css_path = Array.new node_css_path.split(' > ').each_with_index{|tag, idx| unless parent_nodes[idx][1].empty? then if parent_nodes[idx][1].has_key?('class') then tag += '.' + parent_nodes[idx][1]['class'] end if parent_nodes[idx][1].has_key?('id') then tag += '#' + parent_nodes[idx][1]['id'] end end return_node_css_path.push(tag) } return return_node_css_path.join(' > ') end