class Hongkong::News::Scrapers::OrientalDailyScraper

Constants

LIST_URL

Public Instance Methods

name() click to toggle source
# File lib/hongkong/news/scrapers/oriental_daily_scraper.rb, line 12
def name
  "orientaldaily"
end
news(url) click to toggle source

Extract article from page

# File lib/hongkong/news/scrapers/oriental_daily_scraper.rb, line 30
def news(url)
  visit url

  # wait for content to be loaded
  first("#contentCTN-right")
  
  document = Document.new
  document.source = name
  document.title = doc.css("h1").text
  document.url = url
  document.html = html
  document.content = page.evaluate_script("HongKongNews.getInnerText('#contentCTN-top')") + "\n" + page.evaluate_script("HongKongNews.getInnerText('#contentCTN-right')")
  image = doc.search("#contentCTN .photo img").first
  document.image_url = URI::join(url, image["src"]).to_s if image
  document
end