class Datasets::Wikipedia::ArticlesListener

Public Class Methods

new(block) click to toggle source
# File lib/datasets/wikipedia.rb, line 78
def initialize(block)
  @block = block
  @page = nil
  @revision = nil
  @contributor = nil
  @current_tag = nil
  @tag_stack = []
  @text_stack = [""]
  @first_page = true
end

Public Instance Methods

cdata(contnet) click to toggle source
# File lib/datasets/wikipedia.rb, line 156
def cdata(contnet)
  @text_stack.last << content
end
tag_end(name) click to toggle source
# File lib/datasets/wikipedia.rb, line 103
def tag_end(name)
  case name
  when "page"
    on_page(@page)
    @page = nil
  when "title"
    @page.title = @text_stack.last
  when "ns"
    @page.namespace = Integer(@text_stack.last)
  when "id"
    id = Integer(@text_stack.last)
    case @tag_stack[-2]
    when "page"
      @page.id = id
    when "revision"
      @revision.id = id
    when "contributor"
      @contributor.id = id
    end
  when "restrictions"
    @page.restrictions = @text_stack.last.split(":")
  when "revision"
    @page.revision = @revision
    @revision = nil
  when "parentid"
    @revision.parent_id = Integer(@text_stack.last)
  when "timestamp"
    @revision.timestamp = Time.iso8601(@text_stack.last)
  when "contributor"
    @revision.contributor = @contributor
    @contributor = nil
  when "username"
    @contributor.user_name = @text_stack.last
  when "minor"
    # TODO
  when "comment"
    @revision.comment = @text_stack.last
  when "model"
    @revision.model = @text_stack.last
  when "format"
    @revision.format = @text_stack.last
  when "text"
    @revision.text = @text_stack.last
  when "sha1"
    @revision.sha1 = @text_stack.last
  end
  pop_stacks
end
tag_start(name, attributes) click to toggle source
# File lib/datasets/wikipedia.rb, line 89
def tag_start(name, attributes)
  push_stacks(name)
  case name
  when "page"
    @page = Page.new
  when "revision"
    @revision = Revision.new
  when "contributor"
    @contributor = Contributor.new
  when "redirect"
    @page.redirect = attributes["title"]
  end
end
text(data) click to toggle source
# File lib/datasets/wikipedia.rb, line 152
def text(data)
  @text_stack.last << data
end

Private Instance Methods

on_page(page) click to toggle source
# File lib/datasets/wikipedia.rb, line 161
def on_page(page)
  @block.call(page)
end
pop_stacks() click to toggle source
# File lib/datasets/wikipedia.rb, line 170
def pop_stacks
  @text_stack.pop
  @tag_stack.pop
end
push_stacks(tag) click to toggle source
# File lib/datasets/wikipedia.rb, line 165
def push_stacks(tag)
  @tag_stack << tag
  @text_stack << ""
end