module SpiderCore::Behavior
Protected Instance Methods
handle_element(element)
click to toggle source
# File lib/spider_core/behavior.rb, line 28 def handle_element(element) if element.is_a?(String) element elsif element.tag_name == 'input' element.value else element.text end end
handle_elements(elements) { |element| ... }
click to toggle source
# File lib/spider_core/behavior.rb, line 38 def handle_elements(elements, &block) if elements.respond_to?(:map) && block_given? elements.map { |element| yield(element) }.force elsif elements.respond_to?(:map) elements.map { |element| handle_element(element) }.force elsif block_given? yield(elements) else handle_element(elements) end end
handle_pattern(pattern)
click to toggle source
@example Handle pattern
handle_pattern('.a') # =>'.a' set :id, 'a' handle_pattern('.%{id}bc') # =>'.abc'
# File lib/spider_core/behavior.rb, line 54 def handle_pattern(pattern) scan_results = pattern.scan(/(?<=%{)[^}]*(?=})/) unless scan_results.empty? scan_results.each { |v| pattern = pattern.sub(/%\{#{v}\}/, @setted_variables[v]) } end pattern end
put(display, value)
click to toggle source
# File lib/spider_core/behavior.rb, line 62 def put(display, value) @current_location = @current_location.put(display, value) end
scan_all(pattern, opts = {})
click to toggle source
# File lib/spider_core/behavior.rb, line 6 def scan_all(pattern, opts = {}) pattern = handle_pattern(pattern) if pattern.is_a?(String) elements = all(selector, pattern).lazy if opts[:limit] && opts[:limit].to_i > 0 elements = elements.take(opts[:limit].to_i) end return elements elsif pattern.is_a?(Regexp) html.scan(pattern).lazy end end
scan_first(pattern)
click to toggle source
# File lib/spider_core/behavior.rb, line 19 def scan_first(pattern) pattern = handle_pattern(pattern) if pattern.is_a?(String) first(selector, pattern) elsif pattern.is_a?(Regexp) html[pattern, 1] end end