class JMDict
Constants
- VERSION
Public Class Methods
new(filename)
click to toggle source
# File lib/jmdict.rb, line 7 def initialize(filename) @jmdict_file = Nokogiri::XML::Reader(File.open(filename)) end
Public Instance Methods
each_entry() { |parse_entry(XML(outer_xml))| ... }
click to toggle source
# File lib/jmdict.rb, line 11 def each_entry() @jmdict_file.each do |node| next if node.name != 'entry' next if node.node_type != Nokogiri::XML::Reader::TYPE_ELEMENT yield(parse_entry(Nokogiri::XML(node.outer_xml))) end end
fill_elems(tag, element)
click to toggle source
Many elements consists of text only subelements
# File lib/jmdict.rb, line 20 def fill_elems(tag, element) aux = [] element.css(tag).each do |elem| aux << elem.text end aux end
parse_entry(entry)
click to toggle source
For the fields info see the DTD Rev 1.09 (www.edrdg.org/jmdict/dtd-jmdict.xml)
# File lib/jmdict.rb, line 29 def parse_entry(entry) this_entry = {} this_entry["ent_seq"] = entry.css("ent_seq").text.to_i this_entry["k_ele"] = [] entry.css("k_ele").each do |elem| k_ele = {} k_ele['keb'] = elem.css('keb').text k_ele['ke_inf'] = fill_elems('ke_inf', elem) k_ele['ke_pri'] = fill_elems('ke_pri', elem) this_entry['k_ele'] << k_ele end this_entry["r_ele"] = [] entry.css("r_ele").each do |elem| r_ele = {} r_ele['reb'] = elem.css('reb').text r_ele['re_nokanji'] = elem.css('re_nokanji').text r_ele['re_restr'] = fill_elems('re_restr', elem) r_ele['re_inf'] = fill_elems('re_inf', elem) r_ele['re_pri'] = fill_elems('re_pri', elem) this_entry['r_ele'] << r_ele end this_entry["sense"] = [] entry.css("sense").each do |s| sense = {} sense['stagk'] = fill_elems('stagk', s) sense['stagr'] = fill_elems('stagr', s) sense['pos'] = fill_elems('pos', s) sense['xref'] = fill_elems('xref', s) sense['ant'] = fill_elems('ant', s) sense['field'] = fill_elems('field', s) sense['misc'] = fill_elems('misc', s) sense['s_inf'] = fill_elems('s_inf', s) sense['l_source'] = [] s.css('l_source').each do |ls| l_source = {} l_source['lang'] = ls.lang l_source['ls_type'] = ls.ls_type l_source['ls_wasei'] = ls.ls_wasei l_source['ls_source'] = ls.ls_source.text sense['l_source'] << l_source end sense['dial'] = fill_elems('dial', s) sense['gloss'] = [] s.css('gloss').each do |g| gloss = {} gloss['lang'] = g.lang # Give errors if accesed the same way as lang attribute when are not present gloss['g_gend'] = g['g_gend'] if g.key? 'g_gend' gloss['g_type'] = g['g_type'] if g.key? 'g_type' # The gloss could have a text PCData or a 'pri' element. # Currently in rev 1.09 there is no 'pri' with content at all. Just for the sake of DTD. gloss['pri'] = g.at_css('pri').text unless g.at_css('pri').nil? gloss['gloss'] = g.text sense['gloss'] << gloss end this_entry['sense'] << sense end return this_entry end