class Hongkong::News::Scrapers::AppleDailyScraper
Public Instance Methods
name()
click to toggle source
# File lib/hongkong/news/scrapers/apple_daily_scraper.rb, line 9 def name "appledaily" end
news(url)
click to toggle source
Extract article from page from Apple Daily
# File lib/hongkong/news/scrapers/apple_daily_scraper.rb, line 27 def news(url) visit url document = Document.new document.source = name document.title = doc.search("#articleContent h1").text.strip document.url = url document.html = html document.content = page.evaluate_script("HongKongNews.getInnerText('#masterContent')") document.image_url = doc.search("//meta[@property='og:image']/@content").first.text rescue nil document end
news_links()
click to toggle source
Extract all news links from Apple Daily
# File lib/hongkong/news/scrapers/apple_daily_scraper.rb, line 14 def news_links visit "http://hk.apple.nextmedia.com/" links = doc.css("#article_ddl option").collect do |option| link = Link.new link.title = option.text link.url = option["value"] link end.reject { |l| l.url.nil? } links end