class TaiwaneseNewsParser::Parser::Tvbs

Public Class Methods

applicable?(url) click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 10
def self.applicable?(url)
  url.match(%r{tvbs\.com\.tw/entry})
end
domain() click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 2
def self.domain
  'tvbs.com.tw'
end
names() click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 6
def self.names
  ['TVBS']
end
parse_url_id(url) click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 43
def self.parse_url_id(url)
  url[%r{/entry/(\d+)},1]
end

Public Instance Methods

doc() click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 14
def doc
  @raw = open(url).read
  @doc = Nokogiri::HTML(@raw)
end
parse() click to toggle source

url = 'news.tvbs.com.tw/entry/519673'

# File lib/taiwanese_news_parser/parser/tvbs.rb, line 20
def parse
  @article[:title] = doc.at_css('article h1').text
  @article[:company_name] = parse_company_name
  @article[:content] = doc.css('article .content').text

  time = doc.at_css('article .meta-data .dateline').text[%r{時間:\d{4}/\d{1,2}/\d{1,2} \d{2}:\d{2}}]
  @article[:published_at] = Time.parse("#{time}:00")

  @article[:reporter_name] = parse_reporter_name()

  clean_up

  @article
end
parse_company_name() click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 39
def parse_company_name
  self.class.names.first
end
parse_reporter_name() click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 35
def parse_reporter_name
  doc.at_css('article .meta-data .reporter').text[%r{記者:(.+)},1]
end