class NewsCrawler::Storage::RawData::MongoStorage

Raw data storage implement using MongoDB

Constants

NAME

Public Class Methods

new(*opts) click to toggle source
# File lib/news_crawler/storage/raw_data/mongo_storage.rb, line 37
def initialize(*opts)
  config = (SimpleConfig.for :application)
  client = MongoClient.new(config.mongodb.host, config.mongodb.port)
  db = client[config.mongodb.db_name]
  @coll = db[config.prefix + '_' + config.suffix.raw_data]
  @coll.ensure_index({:url => Mongo::ASCENDING}, {:unique => true})
end

Public Instance Methods

add(url, body) click to toggle source

Add entry to raw data collection, overwrite old data @param [ String ] url @param [ String ] body

# File lib/news_crawler/storage/raw_data/mongo_storage.rb, line 48
def add(url, body)
  body.encode!('utf-8', :invalid => :replace, :undef => :replace)
  @coll.update({:url   => url},
               {:$set  => {:body => body}},
               {:upsert => true})
end
clear() click to toggle source
# File lib/news_crawler/storage/raw_data/mongo_storage.rb, line 72
def clear
  @coll.remove
end
count() click to toggle source

Get number of raw data entries

# File lib/news_crawler/storage/raw_data/mongo_storage.rb, line 68
def count
  @coll.count
end
find_by_url(url) click to toggle source

Find document with correspond url @param [ String ] url @return [ String, nil ]

# File lib/news_crawler/storage/raw_data/mongo_storage.rb, line 58
def find_by_url(url)
  result = @coll.find_one({:url => url})
  if (!result.nil?)
    result['body']
  else
    nil
  end
end