class TaiwaneseNewsParser::Parser::Ettoday

Public Class Methods

domain() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 2
def self.domain
  'ettoday.net'
end
names() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 6
def self.names
  %w{東森}
end
parse_url_id(url) click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 46
def self.parse_url_id(url)
  url[%r{http://www\.ettoday\.net/\w+/(\d+/\d+)},1]
end

Public Instance Methods

clean_url() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 41
def clean_url
  cleaner = TaiwaneseNewsParser::UrlCleaner.new()
  @article[:url] = cleaner.clean(@article[:url])
end
doc() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 10
def doc
  @raw = open(url).read
  @doc = Nokogiri::HTML(@raw)
end
parse() click to toggle source

url = 'www.ettoday.net/news/20130128/158005.htm'

# File lib/taiwanese_news_parser/parser/ettoday.rb, line 16
def parse
  @article[:title] = doc.css('[itemprop=headline]').text

  @article[:company_name] = '東森'

  @article[:content] = doc.css('[itemprop=articleBody]>p').text

  @article[:reporter_name] = parse_reporter_name()

  t = doc.css('.news-time').text.match(/(\d*)年(\d*)月(\d*)日 (\d*):(\d*)/)
  @article[:published_at] = Time.new(t[1],t[2],t[3],t[4],t[5])

  clean_up

  @article
end
parse_reporter_name() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 33
def parse_reporter_name
  text = doc.css('[itemprop=articleBody]').text
  if match = text.match(%r{記者(.+?)[//╱/]})
    reporter_name = match[1]
  end
  reporter_name
end
reproduced?() click to toggle source
# File lib/taiwanese_news_parser/parser/ettoday.rb, line 50
def reproduced?
  false
end