class TaiwaneseNewsParser::Parser::Udn

Public Class Methods

domain() click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 2
def self.domain
  'udn.com'
end
names() click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 6
def self.names
  %{聯合報 聯合晚報}
end
parse_url_id(url) click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 37
def self.parse_url_id(url)
  url[%r{\w+/\w+/(\d+)},1]
end

Public Instance Methods

parse() click to toggle source

url = 'udn.com/NEWS/NATIONAL/NATS5/7807573.shtml'

# File lib/taiwanese_news_parser/parser/udn.rb, line 11
def parse
  @article[:title] = doc.at_css('#story_title').text
  @article[:content] = doc.at_css('#story').text

  #a.web_published_at = Time.parse(doc.at_css('#story_update').text)

  @article[:company_name] = parse_company_name
  @article[:reporter_name] = parse_reporter_name

  @article[:published_at] = Time.parse(doc.at_css('#story_update').text)

  clean_up

  @article
end
parse_company_name() click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 27
def parse_company_name
  get_company_name_and_reporter_name.match(%r{^(.*?)[//╱]})[1]
end
parse_reporter_name() click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 30
def parse_reporter_name
  source = get_company_name_and_reporter_name
  name = source[%r{[//╱](?:本報)?記者(.*)[//╱]},1]
  name ||= source[%r{本報記者(.*)[//╱]?},1]
  name
end

Private Instance Methods

get_company_name_and_reporter_name() click to toggle source
# File lib/taiwanese_news_parser/parser/udn.rb, line 43
def get_company_name_and_reporter_name
  doc.at_css('#story_author').text[%r{【(.*)】},1]
end