class Hongkong::News::Scrapers::MingpaoScraper

Constants

LIST_URL

Public Instance Methods

name() click to toggle source
# File lib/hongkong/news/scrapers/mingpao_scraper.rb, line 12
def name
  "mingpao"
end
news(url) click to toggle source

Extract article from page from Mingpao

# File lib/hongkong/news/scrapers/mingpao_scraper.rb, line 30
def news(url)
  visit url

  # wait for content to be loaded
  first("article p")
  
  document = Document.new
  document.source = name
  document.title = doc.css("h1").text
  document.url = url
  document.html = html
  document.content = page.evaluate_script("HongKongNews.getInnerText('article')")
  document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil
  document
end