class Splam::Rules::BadWords

Attributes

bad_word_score[RW]
suspicious_word_score[RW]

Public Instance Methods

run() click to toggle source
# File lib/splam/rules/bad_words.rb, line 10
def run
  bad_words = {}
  bad_words[:pornspam] = %w( sex sexy porn gay erotica erotico topless naked viagra erotismo porno porn lesbian amateur tit\b)
  bad_words[:pornspam] |= %w( gratis erotismo porno torrent bittorrent adulto videochat  video 3dsex)
  bad_words[:pornspam] << /pel?cula/ << /pornogr?fica/ << "portal porno" # srsly, spamming in spanish?
  bad_words[:pornspam] |= %w( webcam  free-web-host rapidshare)

  bad_words[:viagraspam] = %w( cialis viagra pharmacy prescription levitra kamagra)
  bad_words[:benzospam]  = %w( ultram tramadol pharmacy prescription )
  bad_words[:cashspam]   = %w( payday loan jihad ) << "payday loan"
  bad_words[:pharmaspam] = %w( propecia finasteride viagra )
  
  bad_words[:nigerian]   = ["million pounds sterling", "dear sirs,", "any bank account", "winning notification", "western union", "diagnosed with cancer", "bank treasury", "unclaimed inheritance"]

  # linkspammers
  bad_words[:linkspam] = ["increase traffic", "discovered your blog", "backlinks", "sent me a link", "more visitors to my site", "targeted traffic", "increase traffic to your website", "estore"]

  bad_words[:beats] = %w( beats dre headphones sale cheap shipping ) << "monster beats" << "best online"
  bad_words[:rolex] = %w( rolex watch replica watches price ) 
  bad_words[:wtf] = %w( bilete avion )
  
  # buying fake shitty brand stuff
  bad_words[:bagspam]  = %w(handbag louis louisvuitton vuitton chanel coach clearance outlet hermes bag scarf sale ralphlauren)
  bad_words[:handbags] = %w( karenmillen michaelkors kors millen bags purchase handbag chanel outlet tasche longchamp kaufen louboutin christianlouboutin)
  bad_words[:blingspam] = %w( tiffany jewellery tiffanyco clearance outlet)
  bad_words[:uggspam]  = %w(\buggs?\b \buggboots\b clearance outlet )
  bad_words[:wedding]  = ["wedding", "wedding dress", "weddingdress", "strapless"]
  
  bad_words[:webcamspam] = %w( live girls webcam adult singles) << "chat room"
  bad_words[:gamereview] = %w( games-review-it.com game-reviews-online.com )
  bad_words[:streaming]  = %w( watchmlbbaseball watchnhlhockey pspnsportstv.com )

  bad_words[:forum_spam] = ["IMG", "url="]

  suspicious_words =  %w( free buy galleries dating gallery hard hardcore video homemade celebrity ) << "credit card" << "my friend" << "friend sent me"
  suspicious_words |= %w( adult overnight shipping free hot movie nylon arab ?????? seo)
  suspicious_words << "forums/member.php?u=" << "chat room" << "free chat" << "yahoo chat" << "page.php"
  
  bad_words.each do |key,wordlist|
    counter = 0
    wordlist.each do |word|
      results = Regexp.new("\\b(#{word})\\b").match @body
      if results && results.size > 0
        counter += 1
        add_score((self.class.bad_word_score ** results.size), "nasty word: '#{word}'")

        # Add more points if the bad word is INSIDE a link
        @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
          add_score self.class.bad_word_score * 10 * match[0].scan(word).size, "nasty word inside a link: #{word}"
        end
        @body.scan(/\nhttp:\/\/(.*?#{word})\//).each do |match|
          add_score self.class.bad_word_score * 10 * match[0].scan(word).size, "nasty word inside a straight-up link: #{word}"
        end
        @body.scan(/<a.*?>(.*?)<\/a>/).each do |links|
          add_score self.class.bad_word_score * 50, "nasty word is the entire link: #{word}"
        end
        @body.scan(/<a(.*?)>/).each do |match|
          add_score self.class.bad_word_score * 10 * match[0].scan(word).size, "nasty word inside a URL: #{word}"
        end

      end
      if counter > (wordlist.size / 2)
        add_score 1000, "Lots of bad words from one genre (#{key}): #{counter}"
      end
    end
  end
  suspicious_words.each do |word|
    results = @body.downcase.scan(word) 
    if results && results.size > 0
      add_score (self.class.suspicious_word_score * results.size), "suspicious word: #{word}"
      # Add more points if the bad word is INSIDE a link
      @body.scan(/<a[^>]+>(.*?)<\/a>/).each do |match|
        add_score((self.class.suspicious_word_score * match[0].scan(word).size), "suspicious word inside a link: #{word}")
      end
    end
  end
end