class CBETA::CharCount
Public Class Methods
new(xml_root)
click to toggle source
# File lib/cbeta/char_count.rb, line 2 def initialize(xml_root) @xml_root = xml_root @result = {} end
Public Instance Methods
char_count(canon=nil)
click to toggle source
# File lib/cbeta/char_count.rb, line 7 def char_count(canon=nil) stat_all if canon.nil? stat_canon(canon) @result end
Private Instance Methods
handle_node(e)
click to toggle source
# File lib/cbeta/char_count.rb, line 15 def handle_node(e) return if e.comment? return handle_text(e) if e.text? return if %w(foreign mulu rdg reg sic).include? e.name case e.name when 'g' then @result[@work] += 1 when 'note' then handle_note(e) when 't' then handle_t(e) else traverse(e) end end
handle_note(e)
click to toggle source
# File lib/cbeta/char_count.rb, line 28 def handle_note(e) if %w(inline interlinear).include? e['place'] traverse(e) end end
handle_t(e)
click to toggle source
# File lib/cbeta/char_count.rb, line 34 def handle_t(e) if e.has_attribute? 'place' and e['place'].include? 'foot' return end traverse(e) end
handle_text(e)
click to toggle source
# File lib/cbeta/char_count.rb, line 41 def handle_text(e) s = e.content().chomp return if s.empty? return if e.parent.name == 'app' # cbeta xml 文字之間會有多餘的換行 s.gsub!(/[\n\r]/, '') @result[@work] += s.size end
stat_all()
click to toggle source
# File lib/cbeta/char_count.rb, line 52 def stat_all Dir.entries(@xml_root).sort.each do |canon| next if canon.start_with? '.' next if canon == 'schema' stat_canon(canon) end end
stat_canon(canon)
click to toggle source
# File lib/cbeta/char_count.rb, line 60 def stat_canon(canon) return if canon.nil? puts 'stat canon: ' + canon folder = File.join(@xml_root, canon) Dir.entries(folder).sort.each do |vol| next if vol.start_with? '.' p = File.join(folder, vol) stat_vol(p) end end
stat_file(fn)
click to toggle source
# File lib/cbeta/char_count.rb, line 71 def stat_file(fn) @work = File.basename(fn, '.xml') @work.sub!(/^([A-Z])\d{2,3}n(.*)$/, '\1\2') @work = 'T0220' if @work.start_with?('T0220') unless @result.key? @work puts "stat work: #{@work}" @result[@work] = 0 end doc = CBETA.open_xml(fn) body = doc.at_xpath('/TEI/text/body') traverse(body) end
stat_vol(vol_folder)
click to toggle source
# File lib/cbeta/char_count.rb, line 85 def stat_vol(vol_folder) Dir.entries(vol_folder).sort.each do |f| next if f.start_with? '.' p = File.join(vol_folder, f) stat_file(p) end end
traverse(e)
click to toggle source
# File lib/cbeta/char_count.rb, line 93 def traverse(e) e.children.each { |c| handle_node(c) } end