Table of Contents - news_crawler-0.0.4 Documentation
Classes and Modules
- NewsCrawler
- NewsCrawler::CrawlerConfig
- NewsCrawler::CrawlerModule
- NewsCrawler::Downloader
- NewsCrawler::LinkSelector
- NewsCrawler::LinkSelector::SameDomainSelector
- NewsCrawler::NCLogger
- NewsCrawler::Processing
- NewsCrawler::Processing::StructureAnalysis
- NewsCrawler::Storage
- NewsCrawler::Storage::RawData
- NewsCrawler::Storage::RawData::MongoStorage
- NewsCrawler::Storage::RawData::RawDataEngine
- NewsCrawler::Storage::URLQueue
- NewsCrawler::Storage::URLQueue::DuplicateURLError
- NewsCrawler::Storage::URLQueue::MongoEngine
- NewsCrawler::Storage::URLQueue::URLQueueEngine
- NewsCrawler::Storage::YAMLStor
- NewsCrawler::Storage::YAMLStor::MongoStorage
- NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- NewsCrawler::URLHelper
- Object
- Robots
Methods
- ::add — NewsCrawler::Storage::RawData
- ::add — NewsCrawler::Storage::URLQueue
- ::add — NewsCrawler::Storage::YAMLStor
- ::all — NewsCrawler::Storage::URLQueue
- ::clear — NewsCrawler::Storage::RawData
- ::clear — NewsCrawler::Storage::URLQueue
- ::clear — NewsCrawler::Storage::YAMLStor
- ::count — NewsCrawler::Storage::RawData
- ::count — NewsCrawler::Storage::YAMLStor
- ::exclude? — NewsCrawler::LinkSelector::SameDomainSelector
- ::find_all — NewsCrawler::Storage::URLQueue
- ::find_by_url — NewsCrawler::Storage::RawData
- ::find_one — NewsCrawler::Storage::URLQueue
- ::find_unvisited — NewsCrawler::Storage::URLQueue
- ::get — NewsCrawler::Storage::YAMLStor
- ::get_engines — NewsCrawler::Storage::RawData::RawDataEngine
- ::get_engines — NewsCrawler::Storage::URLQueue::URLQueueEngine
- ::get_engines — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- ::get_logger — NewsCrawler::NCLogger
- ::inherited — NewsCrawler::Storage::RawData::RawDataEngine
- ::inherited — NewsCrawler::Storage::URLQueue::URLQueueEngine
- ::inherited — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- ::load_application_config — NewsCrawler::CrawlerConfig
- ::load_samedomainselector_config — NewsCrawler::CrawlerConfig
- ::mark — NewsCrawler::Storage::URLQueue
- ::mark_all — NewsCrawler::Storage::URLQueue
- ::mark_all_unvisited — NewsCrawler::Storage::URLQueue
- ::mark_visited — NewsCrawler::Storage::URLQueue
- ::merge_config — NewsCrawler::CrawlerConfig
- ::new — NewsCrawler::Downloader
- ::new — NewsCrawler::LinkSelector::SameDomainSelector
- ::new — NewsCrawler::Processing::StructureAnalysis
- ::new — NewsCrawler::Storage::RawData::MongoStorage
- ::new — NewsCrawler::Storage::URLQueue::MongoEngine
- ::new — NewsCrawler::Storage::YAMLStor::MongoStorage
- ::new — Robots
- ::next_unprocessed — NewsCrawler::Storage::URLQueue
- ::normalize_url — NewsCrawler::Storage::URLQueue
- ::set_engine — NewsCrawler::Storage::RawData
- ::set_engine — NewsCrawler::Storage::URLQueue
- ::set_engine — NewsCrawler::Storage::YAMLStor
- ::set_level — NewsCrawler::NCLogger
- ::set_logdev — NewsCrawler::NCLogger
- #add — NewsCrawler::Storage::RawData::MongoStorage
- #add — NewsCrawler::Storage::RawData::RawDataEngine
- #add — NewsCrawler::Storage::URLQueue::MongoEngine
- #add — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #add — NewsCrawler::Storage::YAMLStor::MongoStorage
- #add — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- #all — NewsCrawler::Storage::URLQueue::MongoEngine
- #all — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #analyse — NewsCrawler::Processing::StructureAnalysis
- #classify_h2 — NewsCrawler::Processing::StructureAnalysis
- #clear — NewsCrawler::Storage::RawData::MongoStorage
- #clear — NewsCrawler::Storage::RawData::RawDataEngine
- #clear — NewsCrawler::Storage::URLQueue::MongoEngine
- #clear — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #clear — NewsCrawler::Storage::YAMLStor::MongoStorage
- #clear — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- #count — NewsCrawler::Storage::RawData::MongoStorage
- #count — NewsCrawler::Storage::RawData::RawDataEngine
- #count — NewsCrawler::Storage::YAMLStor::MongoStorage
- #count — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- #count_a_and_non_a_tag — NewsCrawler::Processing::StructureAnalysis
- #extract_content — NewsCrawler::Processing::StructureAnalysis
- #extract_url — NewsCrawler::LinkSelector::SameDomainSelector
- #find_all — NewsCrawler::Storage::URLQueue::MongoEngine
- #find_all — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #find_all — NewsCrawler::CrawlerModule
- #find_and_mark — NewsCrawler::Storage::URLQueue::MongoEngine
- #find_by_url — NewsCrawler::Storage::RawData::MongoStorage
- #find_by_url — NewsCrawler::Storage::RawData::RawDataEngine
- #find_longest_node — NewsCrawler::Processing::StructureAnalysis
- #find_lowest_ancestor_has_id — NewsCrawler::Processing::StructureAnalysis
- #find_one — NewsCrawler::Storage::URLQueue::MongoEngine
- #find_one — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #find_one — NewsCrawler::CrawlerModule
- #find_unprocessed — NewsCrawler::CrawlerModule
- #find_unvisited — NewsCrawler::Storage::URLQueue::MongoEngine
- #find_unvisited — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #get — NewsCrawler::Storage::YAMLStor::MongoStorage
- #get — NewsCrawler::Storage::YAMLStor::YAMLStorEngine
- #get_new_url — NewsCrawler::Downloader
- #get_result — NewsCrawler::Processing::StructureAnalysis
- #get_url_depth — NewsCrawler::Storage::URLQueue::MongoEngine
- #get_url_path — NewsCrawler::URLHelper
- #graceful_terminate — NewsCrawler::Downloader
- #graceful_terminate — NewsCrawler::LinkSelector::SameDomainSelector
- #hash_node — NewsCrawler::Processing::StructureAnalysis
- #is_url? — NewsCrawler::Processing::StructureAnalysis
- #load_yaml — NewsCrawler::CrawlerModule
- #mark — NewsCrawler::Storage::URLQueue::MongoEngine
- #mark — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #mark_all — NewsCrawler::Storage::URLQueue::MongoEngine
- #mark_all — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #mark_all_as_unprocessed — NewsCrawler::CrawlerModule
- #mark_all_unvisited — NewsCrawler::Storage::URLQueue::MongoEngine
- #mark_all_unvisited — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #mark_processed — NewsCrawler::CrawlerModule
- #mark_unprocessed — NewsCrawler::CrawlerModule
- #mark_visited — NewsCrawler::Storage::URLQueue::MongoEngine
- #mark_visited — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #next_unprocessed — NewsCrawler::Storage::URLQueue::MongoEngine
- #next_unprocessed — NewsCrawler::Storage::URLQueue::URLQueueEngine
- #next_unprocessed — NewsCrawler::CrawlerModule
- #node_info — NewsCrawler::Processing::StructureAnalysis
- #remove_tag — NewsCrawler::Processing::StructureAnalysis
- #run — NewsCrawler::Downloader
- #run — NewsCrawler::LinkSelector::SameDomainSelector
- #same_domain? — NewsCrawler::URLHelper
- #save_yaml — NewsCrawler::CrawlerModule
- #wait_for_url — NewsCrawler::Downloader
- #wait_for_url — NewsCrawler::LinkSelector::SameDomainSelector