class TaiwaneseNewsParser::Parser::Tvbs
Public Class Methods
applicable?(url)
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 10 def self.applicable?(url) url.match(%r{tvbs\.com\.tw/entry}) end
domain()
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 2 def self.domain 'tvbs.com.tw' end
names()
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 6 def self.names ['TVBS'] end
parse_url_id(url)
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 43 def self.parse_url_id(url) url[%r{/entry/(\d+)},1] end
Public Instance Methods
doc()
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 14 def doc @raw = open(url).read @doc = Nokogiri::HTML(@raw) end
parse()
click to toggle source
url = 'news.tvbs.com.tw/entry/519673'
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 20 def parse @article[:title] = doc.at_css('article h1').text @article[:company_name] = parse_company_name @article[:content] = doc.css('article .content').text time = doc.at_css('article .meta-data .dateline').text[%r{時間:\d{4}/\d{1,2}/\d{1,2} \d{2}:\d{2}}] @article[:published_at] = Time.parse("#{time}:00") @article[:reporter_name] = parse_reporter_name() clean_up @article end
parse_company_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 39 def parse_company_name self.class.names.first end
parse_reporter_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/tvbs.rb, line 35 def parse_reporter_name doc.at_css('article .meta-data .reporter').text[%r{記者:(.+)},1] end