class Ruboty::YMCrawl::Element
セレクタにより抽出されたPageの一部を表すクラス
Public Class Methods
new(doc)
click to toggle source
# File lib/ruboty/ymcrawl/crawler.rb, line 81 def initialize(doc) @doc = doc end
Public Instance Methods
get_content(target)
click to toggle source
対象に応じてURLを返す
# File lib/ruboty/ymcrawl/crawler.rb, line 105 def get_content(target) return get_url if target == :url return get_image_url if target == :image return get_image_title if target == :image_title return get_title if target == :title return get_page_index_max if target == :page_index_max end
get_image_title()
click to toggle source
画像のタイトルを返す
# File lib/ruboty/ymcrawl/crawler.rb, line 93 def get_image_title title = (@doc.name == "img") ? @doc["title"] : @doc.content (title == nil) ? "noname" : title end
get_image_url()
click to toggle source
画像へのURLを返す
# File lib/ruboty/ymcrawl/crawler.rb, line 86 def get_image_url return @doc["href"] if @doc.name == "a" return @doc["src"] if @doc.name == "img" raise ArgumentError, "in Element" end
get_page_index_max()
click to toggle source
記事が何ページまであるかを返す
# File lib/ruboty/ymcrawl/crawler.rb, line 102 def get_page_index_max; @doc.content.to_i end
get_title()
click to toggle source
記事タイトルを返す
# File lib/ruboty/ymcrawl/crawler.rb, line 99 def get_title; @doc.content end
get_url()
click to toggle source
# File lib/ruboty/ymcrawl/crawler.rb, line 83 def get_url; @doc["href"] end