class WebPageParser::GuardianPageParserV3
Public Instance Methods
content()
click to toggle source
# File lib/web-page-parser/parsers/guardian_page_parser.rb, line 100 def content return @content if @content story_body = html_doc.css('div#article-body-blocks *, div[itemprop=articleBody] *').select do |e| e.name == 'p' or e.name == 'h2' or e.name == 'h3' or e.name == 'ul' end story_body.collect do |p| if p.name == 'ul' p.css('li').collect { |li| li.text.empty? ? nil : li.text.strip } else p.text.empty? ? nil : p.text.strip end end.flatten.compact end