module NewsCrawler::CrawlerModule
Include this to get basic module methods
Public Instance Methods
Find one visited url with given current module process state @param [ String ] state one of unprocessed, processing, processed @param [ Fixnum ] max_depth max url depth return (inclusive) @return [ Array ] URL list
# File lib/news_crawler/crawler_module.rb, line 52 def find_all(state, max_depth = -1) URLQueue.find_all(self.class.name, state, max_depth) end
Find all visited urls with current module’s state @param [ String ] state @param [ Fixnum ] max_depth max url depth return (inclusive) @return [ String, nil ] URL or nil if url doesn’t exists
# File lib/news_crawler/crawler_module.rb, line 60 def find_one(state, max_depth = -1) URLQueue.find_one(self.class.name, state, max_depth) end
Find all visited unprocessed url @param [ Fixnum ] max_depth max url depth return (inclusive) @return [ Array ] URL list
# File lib/news_crawler/crawler_module.rb, line 44 def find_unprocessed(max_depth = -1) URLQueue.find_all(self.class.name, URLQueue::UNPROCESSED, max_depth) end
Load YAML object @param [ String ] key @return [ Object
, nil ]
# File lib/news_crawler/crawler_module.rb, line 85 def load_yaml(key, value) YAMLStor.get(self.class.name, key, value) end
# File lib/news_crawler/crawler_module.rb, line 71 def mark_all_as_unprocessed URLQueue.mark_all(self.class.name, URLQueue::UNPROCESSED) end
Mark current url process state of current module is processed @param [ String ] url
# File lib/news_crawler/crawler_module.rb, line 31 def mark_processed(url) URLQueue.mark(self.class.name, url, URLQueue::PROCESSED) end
Mark current url process state of current module is unprocessed @param [ String ] url
# File lib/news_crawler/crawler_module.rb, line 37 def mark_unprocessed(url) URLQueue.mark(self.class.name, url, URLQueue::UNPROCESSED) end
Get next unprocessed a url and mark it as processing in atomic @param [ Fixnum ] max_depth max url depth return (inclusive) @return [ String, nil ] URL or nil if url doesn’t exists
# File lib/news_crawler/crawler_module.rb, line 67 def next_unprocessed(max_depth = -1) URLQueue.next_unprocessed(self.class.name, max_depth) end
Serialize object to YAML and save it (overwrite if key existed) @param [ String ] key @param [ Object
] value
# File lib/news_crawler/crawler_module.rb, line 78 def save_yaml(key, value) YAMLStor.add(self.class.name, key, value) end