module DhEasy::Core::Mock::FakeExecutor

Fake executor that emulates `Datahen` executor.

Constants

MAX_FIND_OUTPUTS_PER_PAGE

Max allowed page size when query outputs (see find_outputs).

Attributes

content[RW]

Page content. @return [String,nil]

failed_content[RW]

Failed page content. @return [String,nil]

Public Class Methods

check_compatibility(origin, fragment) click to toggle source

Validate executor methods compatibility. @private

@param [Array] origin Datahen executor method collection. @param [Array] fragment Fake executor method collection.

@return [Hash] @raise [DhEasy::Core::Exception::OutdatedError] When missing methods.

# File lib/dh_easy/core/mock/fake_executor.rb, line 26
        def self.check_compatibility origin, fragment
          report = DhEasy::Core.analyze_compatibility origin, fragment

          unless report[:new].count < 1
            # Warn when outdated
            warn <<-LONGDESC.gsub(/^\s+/,'')
              It seems datahen has new unmapped methods, try updating
              dh_easy-core gem or contacting gem maintainer to update it.
              New methods: #{report[:new].join ', '}
            LONGDESC
          end

          # Ensure no missing methods
          unless report[:is_compatible]
            message = <<-LONGDESC.gsub(/^\s+/,'')
              There are missing methods! Check your datahen gem version.
              Missing methods: #{report[:missing].join ', '}
            LONGDESC
            raise DhEasy::Core::Exception::OutdatedError.new(message)
          end

          report
        end
new(opts = {}) click to toggle source

Initialize object.

@param [Hash] opts ({}) Configuration options. @option opts [Array] :pages (nil) Array to initialize pages, can be nil for empty. @option opts [Array] :outputs (nil) Array to initialize outputs, can be nil for empty. @option opts [Integer] :job_id (nil) A number to represent the job_id. @option opts [Hash] :page (nil) Current page.

@raise [ArgumentError] When pages or outputs are not Array.

# File lib/dh_easy/core/mock/fake_executor.rb, line 88
def initialize opts = {}
  unless opts[:pages].nil? || opts[:pages].is_a?(Array)
    raise ArgumentError.new "Pages must be an array."
  end
  @pages = opts[:pages]
  unless opts[:outputs].nil? || opts[:outputs].is_a?(Array)
    raise ArgumentError.new "Outputs must be an array."
  end
  @outputs = opts[:outputs]
  self.job_id = opts[:job_id]
  self.scraper_name = opts[:scraper_name]
  self.page = opts[:page]
end

Public Instance Methods

clear_draft_outputs() click to toggle source

Remove all elements on outputs. @private

# File lib/dh_easy/core/mock/fake_executor.rb, line 70
def clear_draft_outputs
  @outputs.clear
end
clear_draft_pages() click to toggle source

Remove all elements on pages. @private

# File lib/dh_easy/core/mock/fake_executor.rb, line 64
def clear_draft_pages
  @pages.clear
end
db() click to toggle source

Fake database to represent what it is saved.

# File lib/dh_easy/core/mock/fake_executor.rb, line 75
def db
  @db ||= DhEasy::Core::Mock::FakeDb.new
end
execute_script(file_path, vars = {}) click to toggle source

Execute an script file as an executor.

@param [String] file_path Script file path to execute.

# File lib/dh_easy/core/mock/fake_executor.rb, line 341
def execute_script file_path, vars = {}
  eval(File.read(file_path), isolated_binding(vars), file_path)
  flush
end
find_output(collection = 'default', query = {}) click to toggle source

Find one output by collection and query with pagination.

@param [String] collection ('default') Collection name. @param [Hash] query ({}) Filters to query. @param [Hash] opts ({}) Configuration options. @option opts [String,nil] :scraper_name (nil) Scraper name to query

from.

@option opts [Integer,nil] :job_id (nil) Job's id to query from.

@raise [ArgumentError] collection is not String. @raise [ArgumentError] query is not a Hash.

@return [Hash, nil]

@example

find_output

@example

find_output 'my_collection'

@example

find_output 'my_collection', {}

@example Find from another scraper by name

find_output 'my_collection', {}, scraper_name: 'my_scraper'

@example Find from another scraper by job_id

find_output 'my_collection', {}, job_id: 123

@note *opts `:job_id` option is prioritize over `:scraper_name` when

both exists. If none add provided or nil values, then current job
will be used to query instead, this is the defaul behavior.
# File lib/dh_easy/core/mock/fake_executor.rb, line 333
def find_output collection = 'default', query = {}, opts = {}
  result = find_outputs(collection, query, 1, 1, opts)
  result.nil? ? nil : result.first
end
find_outputs(collection = 'default', query = {}) click to toggle source

Find outputs by collection and query with pagination.

@param [String] collection ('default') Collection name. @param [Hash] query ({}) Filters to query. @param [Integer] page (1) Page number. @param [Integer] per_page (30) Page size. @param [Hash] opts ({}) Configuration options. @option opts [String,nil] :scraper_name (nil) Scraper name to query

from.

@option opts [Integer,nil] :job_id (nil) Job's id to query from.

@raise [ArgumentError] collection is not String. @raise [ArgumentError] query is not a Hash. @raise [ArgumentError] page is not an Integer greater than 0. @raise [ArgumentError] per_page is not an Integer between 1 and 500.

@return [Array]

@example

find_outputs

@example

find_outputs 'my_collection'

@example

find_outputs 'my_collection', {}

@example

find_outputs 'my_collection', {}, 1

@example

find_outputs 'my_collection', {}, 1, 30

@example Find from another scraper by name

find_outputs 'my_collection', {}, 1, 30, scraper_name: 'my_scraper'

@example Find from another scraper by job_id

find_outputs 'my_collection', {}, 1, 30, job_id: 123

@note *opts `:job_id` option is prioritize over `:scraper_name` when

both exists. If none add provided or nil values, then current job
will be used to query instead, this is the defaul behavior.
# File lib/dh_easy/core/mock/fake_executor.rb, line 286
def find_outputs collection = 'default', query = {}, page = 1, per_page = 30, opts = {}
  raise ArgumentError.new("collection needs to be a String.") unless collection.is_a?(String)
  raise ArgumentError.new("query needs to be a Hash.") unless query.is_a?(Hash)
  unless page.is_a?(Integer) && page > 0
    raise ArgumentError.new("page needs to be an Integer greater than 0.")
  end
  unless per_page.is_a?(Integer) && per_page > 0 && per_page <= MAX_FIND_OUTPUTS_PER_PAGE
    raise ArgumentError.new("per_page needs to be an Integer between 1 and #{MAX_FIND_OUTPUTS_PER_PAGE}.")
  end

  offset = (page - 1) * per_page
  job = latest_job_by(opts[:scraper_name])
  fixed_query = query.merge(
    '_collection' => collection,
    '_job_id' => opts[:job_id] || (job.nil? ? job_id : job['job_id'])
  )
  db.query :outputs, fixed_query, offset, per_page
end
flush() click to toggle source

Save all drafts into db and clear draft queues.

# File lib/dh_easy/core/mock/fake_executor.rb, line 231
def flush
  flush_pages
  flush_outputs
  flush_self_actions
end
flush_outputs() click to toggle source

Save draft outputs into db and clear draft queue.

# File lib/dh_easy/core/mock/fake_executor.rb, line 225
def flush_outputs
  save_outputs outputs
  clear_draft_outputs
end
flush_pages() click to toggle source

Save draft pages into db and clear draft queue.

# File lib/dh_easy/core/mock/fake_executor.rb, line 219
def flush_pages
  save_pages pages
  clear_draft_pages
end
flush_self_actions() click to toggle source

Execute any action applied to current page

# File lib/dh_easy/core/mock/fake_executor.rb, line 206
def flush_self_actions
  # Save page current page before refetch/reparse
  if refetch_self || reparse_self
    temp_page_gid_override = !db.allow_page_gid_override?
    db.enable_page_gid_override if temp_page_gid_override
    save_pages [page]
    db.disable_page_gid_override if temp_page_gid_override
  end
  db.refetch(page['job_id'], page['gid']) if refetch_self
  db.reparse(page['job_id'], page['gid']) if reparse_self
end
job_id() click to toggle source

Fake job ID used by executor. @return [Integer,nil]

# File lib/dh_easy/core/mock/fake_executor.rb, line 115
def job_id
  db.job_id
end
job_id=(value) click to toggle source

Set fake job ID value.

# File lib/dh_easy/core/mock/fake_executor.rb, line 120
def job_id= value
  db.job_id = value
  page['job_id'] = value
end
latest_job_by(scraper_name, filter = {}) click to toggle source

Get latest job by scraper_name.

@param [String] scraper_name Scraper name. @param [Hash] filter ({}) Additional_filters.

@return [Hash,nil] Return nil if no scraper_name or scraper_name is

nil.
# File lib/dh_easy/core/mock/fake_executor.rb, line 244
def latest_job_by scraper_name, filter = {}
  return nil if scraper_name.nil?
  data = db.query :jobs, filter.merge('scraper_name' => scraper_name)
  data.max{|a,b| a['created_at'] <=> b['created_at']}
end
outputs() click to toggle source

Draft outputs, usually get saved after execution. @return [Array]

# File lib/dh_easy/core/mock/fake_executor.rb, line 58
def outputs
  @outputs ||= []
end
page() click to toggle source

Current page used by executor. @return [Hash,nil]

# File lib/dh_easy/core/mock/fake_executor.rb, line 127
def page
  @page ||= DhEasy::Core::Mock::FakeDb.build_fake_page job_id: job_id
end
page=(value) click to toggle source

Set current page.

# File lib/dh_easy/core/mock/fake_executor.rb, line 132
def page= value
  unless value.nil?
    value = DhEasy::Core::Mock::FakeDb.build_page value
    self.job_id = value['job_id'] unless value['job_id'].nil?
    value['job_id'] ||= job_id
    db.page_gid = value['gid'] unless value['gid'].nil?
  end
  @page = value
end
pages() click to toggle source

Draft pages, usually get saved after execution. @return [Array]

# File lib/dh_easy/core/mock/fake_executor.rb, line 52
def pages
  @pages ||= []
end
refetch(gid) click to toggle source

Refetch a page by gid.

@param [String] gid Page's gid to refetch.

# File lib/dh_easy/core/mock/fake_executor.rb, line 349
def refetch gid
  raise ArgumentError.new("gid needs to be a String.") unless gid.is_a?(String)
  if page['gid'] == gid
    self.refetch_self = true
    return
  end
  db.refetch(job_id, gid)
end
refetch_self() click to toggle source

Refetch self page flag. @return [Boollean] @note It is stronger than reparse_self flag.

# File lib/dh_easy/core/mock/fake_executor.rb, line 145
def refetch_self
  @refetch_self ||= false
end
refetch_self=(value) click to toggle source

Set refetch self page flag.

# File lib/dh_easy/core/mock/fake_executor.rb, line 150
def refetch_self= value
  @refetch_self = value
end
reparse(page_gid) click to toggle source

Reparse a page by gid.

@param [String] page_gid Page's gid to reparse.

# File lib/dh_easy/core/mock/fake_executor.rb, line 361
def reparse page_gid
  raise ArgumentError.new("page_gid needs to be a String.") unless page_gid.is_a?(String)
  if page['gid'] == page_gid
    self.reparse_self = true
    return
  end
  db.reparse(job_id, page_gid)
end
reparse_self() click to toggle source

Reparse self page flag. @return [Boollean]

# File lib/dh_easy/core/mock/fake_executor.rb, line 156
def reparse_self
  @reparse_self ||= false
end
reparse_self=(value) click to toggle source

Set reparse self page flag.

# File lib/dh_easy/core/mock/fake_executor.rb, line 161
def reparse_self= value
  @reparse_self = value
end
save_jobs(list) click to toggle source

Save a job collection on db and remove all the element from list.

@param [Array] list Collection of jobs to save.

# File lib/dh_easy/core/mock/fake_executor.rb, line 183
def save_jobs list
  list.each{|job| db.jobs << job}
  list.clear
end
save_outputs(list) click to toggle source

Save an output collection on db and remove all the element from

+list+.

@param [Array] list Collection of outputs to save.

# File lib/dh_easy/core/mock/fake_executor.rb, line 200
def save_outputs list
  list.each{|output| db.outputs << output}
  list.clear
end
save_pages(list) click to toggle source

Save a page collection on db and remove all the element from list.

@param [Array] list Collection of pages to save.

# File lib/dh_easy/core/mock/fake_executor.rb, line 191
def save_pages list
  list.each{|page| db.pages << page}
  list.clear
end
saved_jobs() click to toggle source

Retrive a list of saved jobs.

# File lib/dh_easy/core/mock/fake_executor.rb, line 166
def saved_jobs
  db.jobs
end
saved_outputs() click to toggle source

Retrive a list of saved outputs.

# File lib/dh_easy/core/mock/fake_executor.rb, line 176
def saved_outputs
  db.outputs
end
saved_pages() click to toggle source

Retrive a list of saved pages. Drafted pages can be included.

# File lib/dh_easy/core/mock/fake_executor.rb, line 171
def saved_pages
  db.pages
end
scraper_name() click to toggle source

Fake scraper name used by executor. @return [Integer,nil]

# File lib/dh_easy/core/mock/fake_executor.rb, line 104
def scraper_name
  db.scraper_name
end
scraper_name=(value) click to toggle source

Set fake scraper name value.

# File lib/dh_easy/core/mock/fake_executor.rb, line 109
def scraper_name= value
  db.scraper_name = value
end