class Hongkong::News::Scrapers::MingpaoScraper
Constants
- LIST_URL
Public Instance Methods
name()
click to toggle source
# File lib/hongkong/news/scrapers/mingpao_scraper.rb, line 12 def name "mingpao" end
news(url)
click to toggle source
Extract article from page from Mingpao
# File lib/hongkong/news/scrapers/mingpao_scraper.rb, line 30 def news(url) visit url # wait for content to be loaded first("article p") document = Document.new document.source = name document.title = doc.css("h1").text document.url = url document.html = html document.content = page.evaluate_script("HongKongNews.getInnerText('article')") document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil document end
news_links()
click to toggle source
Extract all news links from Mingpao
# File lib/hongkong/news/scrapers/mingpao_scraper.rb, line 17 def news_links visit LIST_URL links = doc.css(".listing ul li a").collect do |anchor| link = Link.new link.title = anchor.text link.url = URI::join(LIST_URL, anchor["href"]).to_s link end links end