class Datahen::Scraper::RubyParserExecutor

Attributes

limbo_self[RW]

Limbo self page flag. @return [Boollean]

refetch_self[RW]

Refetch self page flag. @return [Boollean] @note It is stronger than reparse_self flag.

reparse_self[RW]

Reparse self page flag. @return [Boollean] @note It is stronger than limbo_self flag.

save[RW]

Public Class Methods

exposed_methods() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 26
def self.exposed_methods
  [
    :content,
    :failed_content,
    :outputs,
    :pages,
    :page,
    :save_pages,
    :save_outputs,
    :find_output,
    :find_outputs,
    :refetch,
    :reparse,
    :limbo
  ].freeze
end
new(options={}) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 17
def initialize(options={})
  @filename = options.fetch(:filename) { raise "Filename is required"}
  @page = options.fetch(:page) { nil }
  @gid = (self.page || {})['gid'] || options.fetch(:gid) { raise "GID or a page with a GID is required"}
  @job_id = options.fetch(:job_id)
  @page_vars = options.fetch(:vars) { {} }
  @keep_outputs = !!(options.fetch(:keep_outputs) { false })
end

Public Instance Methods

content() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 214
def content
  @content ||= get_content(job_id, gid)
end
eval_parser_script(save=false) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 164
def eval_parser_script(save=false)
  update_parsing_starting_status

  proc = Proc.new do
    page = init_page
    outputs = []
    pages = []
    page = init_page_vars(page)
    self.refetch_self = false
    self.reparse_self = false
    self.limbo_self = false

    begin
      context = isolated_binding({
        outputs: outputs,
        pages: pages,
        page: page
      })
      eval_with_context filename, context
    rescue Error::SafeTerminateError => e
      # do nothing, this is fine
    rescue SyntaxError => e
      handle_error(e) if save
      raise e
    rescue => e
      handle_error(e) if save
      raise e
    end

    puts "=========== Parsing Executed ==========="
    begin
      save_pages_and_outputs(pages, outputs, :parsing) unless refetch_self
    rescue => e
      handle_error(e) if save
      raise e
    end

    if refetch_self
      refetch_page gid
    elsif reparse_self
      reparse_page gid
    elsif limbo_self
      limbo_page gid
    else
      update_parsing_done_status
    end
  end
  proc.call
end
exec_parser(save=false) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 43
def exec_parser(save=false)
  @save = save
  if save
    puts "Executing parser script"
  else
    puts "Trying parser script"
  end

  eval_parser_script(save)
end
failed_content() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 218
def failed_content
  @failed_content ||= get_failed_content(job_id, gid)
end
handle_error(e) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 222
def handle_error(e)
  error = ["Parsing #{e.class}: #{e.to_s} (Job:#{job_id} GID:#{gid})",clean_backtrace(e.backtrace)].join("\n")

  parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :failed,
    log_error: error)
end
init_page_vars(page) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 54
def init_page_vars(page)
  return self.page unless self.page.nil?

  if !@page_vars.nil? && !@page_vars.empty?
    page['vars'] = @page_vars
  end
  page
end
limbo(page_gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 155
def limbo page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page_gid == gid
    self.limbo_self = true
    raise Error::SafeTerminateError
  end
  limbo_page page_gid
end
limbo_page(gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 146
def limbo_page gid
  if save
    Client::JobPage.new({gid: gid}).limbo(self.job_id)
    puts "Limbo page #{gid}"
  else
    puts "Would have limbo page #{gid}"
  end
end
refetch(page_gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 119
def refetch page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page_gid == gid
    self.refetch_self = true
    raise Error::SafeTerminateError
  end
  refetch_page page_gid
end
refetch_page(gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 110
def refetch_page gid
  if save
    Client::JobPage.new({gid: gid}).refetch(self.job_id)
    puts "Refetch page #{gid}"
  else
    puts "Would have refetch page #{gid}"
  end
end
reparse(page_gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 137
def reparse page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page_gid == gid
    self.reparse_self = true
    raise Error::SafeTerminateError
  end
  reparse_page page_gid
end
reparse_page(gid) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 128
def reparse_page gid
  if save
    Client::JobPage.new({gid: gid}).reparse(self.job_id)
    puts "Reparse page #{gid}"
  else
    puts "Would have reparse page #{gid}"
  end
end
save_type() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 106
def save_type
  :parsing
end
update_parsing_done_status() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 90
def update_parsing_done_status
  return unless save

  response = parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :done)

  if response.code == 200
    puts "Page Parsing Done."
  else
    puts "Error: Unable to save Page Parsing Done Status to server: #{response.body}"
    raise "Unable to save Page Parsing Done Status to server: #{response.body}"
  end
end
update_parsing_starting_status() click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 72
def update_parsing_starting_status
  return unless save

  response = parsing_update(
    job_id: job_id,
    gid: gid,
    parsing_status: :starting,
    keep_outputs: @keep_outputs
  )

  if response.code == 200
    puts "Page Parsing Status Updated."
  else
    puts "Error: Unable to save Page Parsing Status to server: #{response.body}"
    raise "Unable to save Page Parsing Status to server: #{response.body}"
  end
end
update_to_server(opts = {}) click to toggle source
# File lib/datahen/scraper/ruby_parser_executor.rb, line 63
def update_to_server(opts = {})
  parsing_update(
    job_id: opts[:job_id],
    gid: opts[:gid],
    pages: opts[:pages],
    outputs: opts[:outputs],
    parsing_status: opts[:status])
end