class FreeSpider::Begin

Public Class Methods

new() click to toggle source
# File lib/free_spider/begin.rb, line 32
def initialize
  # 找到的链接
  @todo = []
  # 已经访问过的链接
  @visited = []
  # 暂时存放内容
  @news_teaching_content = {}
  # 文章题目(判断是否重复)
  @title_saved = []
end

Public Instance Methods

crawl() click to toggle source

程序开始函数

# File lib/free_spider/begin.rb, line 127
def crawl
  path = nil
  loop do
    # 选取找到的链接中的一个链接
    path = @todo.shift
    break if path.nil?
    # 如果是访问过的链接就重新选取
    break unless @visited.include?(path)
    # 去掉外部链接
    # 去掉特殊链接
  end
  if path.nil?
    puts "结束"
    # 输出抓取内容
    # post_title
    return
  end
  find_link(path)
end
plan(&block) click to toggle source

程序制定函数,用户选择需要抓取的网页内容

# File lib/free_spider/begin.rb, line 44
def plan(&block)
  if block_given?
    instance_eval(&block)
  else
    puts "no plan"
  end
end
site(url) click to toggle source

需要爬取的网站首页

# File lib/free_spider/begin.rb, line 148
def site(url)
  puts "--------Ready---------"
  if url.empty?
    puts "URL is blank"
  else
    @site = url
    @todo << @site
  end
end
write_results_to_database() click to toggle source

写入mysql

# File lib/free_spider/begin.rb, line 159
def write_results_to_database
  news_teaching = FreeSpider::Downloader::NewsTeaching.new(@news_teaching_content)
  if news_teaching.save
    puts "--------save success!--------"
  else
    puts "--------save error!--------"
  end
end
write_results_to_file(file_name) click to toggle source

写入文件

# File lib/free_spider/begin.rb, line 173
def write_results_to_file(file_name)
  if File.exist?(file_name) || File.new(file_name, "w")
    File.open(file_name, "w") do |f|
      f.write(@titles.uniq.compact)
    end
  end
end