class TaiwaneseNewsParser::Parser::ChinaTimesMoney
Public Class Methods
applicable?(url)
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 10 def self.applicable?(url) url.include?('money.chinatimes.com') end
domain()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 2 def self.domain 'chinatimes.com' end
names()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 6 def self.names %w{中國時報 中時電子報 工商時報 旺報 時報週刊 中天 中視 中廣 中時即時} end
parse_url_id(url)
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 65 def self.parse_url_id(url) cleaner = TaiwaneseNewsParser::UrlCleaner.new('id') cleaned_url = cleaner.clean(url) url_id = cleaned_url[%r{id=(\d+)},1] url_id end
Public Instance Methods
clean_url()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 60 def clean_url cleaner = TaiwaneseNewsParser::UrlCleaner.new('id') @article[:url] = cleaner.clean(@article[:url]) end
doc()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 14 def doc @raw = open(url).read @doc = Nokogiri::HTML(@raw) end
parse()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 20 def parse @article[:title] = doc.at_css('.articlebox h1.highlight').text @article[:company_name] = parse_company_name @article[:content] = doc.css('#zoom-area p').text #@article[:web_published_at] = Time.parse(doc.at_css('#story_update').text) @article[:reporter_name] = parse_reporter_name() @article[:published_at] = Time.parse(@doc.css('.bar-align-left>ul.inline-list>li')[0].text) clean_up @article end
parse_company_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 50 def parse_company_name n = doc.css('.bar-align-left>ul.inline-list>li')[1].text if n == '時週精選' n = '時報週刊' elsif n == '新聞速報' n = '中時電子報' end n end
parse_reporter_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/china_times_money.rb, line 38 def parse_reporter_name text = doc.css('.bar-align-left>ul.inline-list>li.last').text if match = text.match(%r{(.+?)[//╱/]}) reporter_name = match[1] elsif match = text.match(%r{【(.+?)[//╱/]}) reporter_name = match[1] else reporter_name = text end reporter_name end