class Datasets::Wikipedia::ArticlesListener
Public Class Methods
new(block)
click to toggle source
# File lib/datasets/wikipedia.rb, line 78 def initialize(block) @block = block @page = nil @revision = nil @contributor = nil @current_tag = nil @tag_stack = [] @text_stack = [""] @first_page = true end
Public Instance Methods
cdata(contnet)
click to toggle source
# File lib/datasets/wikipedia.rb, line 156 def cdata(contnet) @text_stack.last << content end
tag_end(name)
click to toggle source
# File lib/datasets/wikipedia.rb, line 103 def tag_end(name) case name when "page" on_page(@page) @page = nil when "title" @page.title = @text_stack.last when "ns" @page.namespace = Integer(@text_stack.last) when "id" id = Integer(@text_stack.last) case @tag_stack[-2] when "page" @page.id = id when "revision" @revision.id = id when "contributor" @contributor.id = id end when "restrictions" @page.restrictions = @text_stack.last.split(":") when "revision" @page.revision = @revision @revision = nil when "parentid" @revision.parent_id = Integer(@text_stack.last) when "timestamp" @revision.timestamp = Time.iso8601(@text_stack.last) when "contributor" @revision.contributor = @contributor @contributor = nil when "username" @contributor.user_name = @text_stack.last when "minor" # TODO when "comment" @revision.comment = @text_stack.last when "model" @revision.model = @text_stack.last when "format" @revision.format = @text_stack.last when "text" @revision.text = @text_stack.last when "sha1" @revision.sha1 = @text_stack.last end pop_stacks end
tag_start(name, attributes)
click to toggle source
# File lib/datasets/wikipedia.rb, line 89 def tag_start(name, attributes) push_stacks(name) case name when "page" @page = Page.new when "revision" @revision = Revision.new when "contributor" @contributor = Contributor.new when "redirect" @page.redirect = attributes["title"] end end
text(data)
click to toggle source
# File lib/datasets/wikipedia.rb, line 152 def text(data) @text_stack.last << data end
Private Instance Methods
on_page(page)
click to toggle source
# File lib/datasets/wikipedia.rb, line 161 def on_page(page) @block.call(page) end
pop_stacks()
click to toggle source
# File lib/datasets/wikipedia.rb, line 170 def pop_stacks @text_stack.pop @tag_stack.pop end
push_stacks(tag)
click to toggle source
# File lib/datasets/wikipedia.rb, line 165 def push_stacks(tag) @tag_stack << tag @text_stack << "" end