class WebPageParser::BbcNewsPageParserV4

Constants

CONTENT_RE
STRIP_CAPTIONS_RE2
STRIP_EMBEDDEDHYPER_RE
STRIP_HIDDEN_A
STRIP_HYPERPUFF_RE
STRIP_MARKETDATA_RE
STRIP_PAGE_BOOKMARKS
STRIP_STORY_DATE
STRIP_STORY_FEATURE
STRIP_STORY_LASTUPDATED
STRIP_STORY_TIME
STRIP_TWITTER_WIDGET2_RE
STRIP_TWITTER_WIDGET_RE
TITLE_RE

Public Instance Methods

content_processor() click to toggle source
# File lib/web-page-parser/parsers/bbc_news_page_parser.rb, line 122
def content_processor
  @content = STRIP_PAGE_BOOKMARKS.gsub(@content, '')
  @content = STRIP_STORY_DATE.gsub(@content, '')
  @content = STRIP_STORY_LASTUPDATED.gsub(@content, '')
  @content = STRIP_STORY_TIME.gsub(@content, '')
  @content = TITLE_RE.gsub(@content, '')
  @content = STRIP_CAPTIONS_RE2.gsub(@content, '')
  @content = STRIP_HIDDEN_A.gsub(@content, '')
  @content = STRIP_STORY_FEATURE.gsub(@content, '')
  @content = STRIP_HYPERPUFF_RE.gsub(@content, '')
  @content = STRIP_MARKETDATA_RE.gsub(@content, '')
  @content = STRIP_EMBEDDEDHYPER_RE.gsub(@content, '')
  @content = STRIP_TWITTER_WIDGET_RE.gsub(@content, '')
  @content = STRIP_TWITTER_WIDGET2_RE.gsub(@content, '')
  super
end