class TaiwaneseNewsParser::Parser::Udn
Public Class Methods
domain()
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 2 def self.domain 'udn.com' end
names()
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 6 def self.names %{聯合報 聯合晚報} end
parse_url_id(url)
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 37 def self.parse_url_id(url) url[%r{\w+/\w+/(\d+)},1] end
Public Instance Methods
parse()
click to toggle source
url = 'udn.com/NEWS/NATIONAL/NATS5/7807573.shtml'
# File lib/taiwanese_news_parser/parser/udn.rb, line 11 def parse @article[:title] = doc.at_css('#story_title').text @article[:content] = doc.at_css('#story').text #a.web_published_at = Time.parse(doc.at_css('#story_update').text) @article[:company_name] = parse_company_name @article[:reporter_name] = parse_reporter_name @article[:published_at] = Time.parse(doc.at_css('#story_update').text) clean_up @article end
parse_company_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 27 def parse_company_name get_company_name_and_reporter_name.match(%r{^(.*?)[//╱]})[1] end
parse_reporter_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 30 def parse_reporter_name source = get_company_name_and_reporter_name name = source[%r{[//╱](?:本報)?記者(.*)[//╱]},1] name ||= source[%r{本報記者(.*)[//╱]?},1] name end
Private Instance Methods
get_company_name_and_reporter_name()
click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 43 def get_company_name_and_reporter_name doc.at_css('#story_author').text[%r{【(.*)】},1] end