module ZipGeoJp::RakeHelper

Constants

DOWNLOAD_URL_ENDPOINT
DOWNLOAD_URL_SUFFIX
GOOGLE_MAP_CRAWLING_CONCURRENCY
GOOGLE_MAP_CRAWLING_DURATION

Public Class Methods

reset() click to toggle source
# File lib/zip_geo_jp/rake_helper.rb, line 40
def reset
  ZipGeoJp::Record.reset
end
update() click to toggle source
# File lib/zip_geo_jp/rake_helper.rb, line 19
def update
  DOWNLOAD_URL_SUFFIX.map do |suffix|
    download_url = DOWNLOAD_URL_ENDPOINT + suffix
    records      = extract_zip_code(download_url)
    Parallel.each(records, in_threads: GOOGLE_MAP_CRAWLING_CONCURRENCY) do |zip_code, pref, city, block|
      puts "Item: #{zip_code} #{pref}#{city}#{block}, Worker: #{Parallel.worker_number}"
      unless ZipGeoJp::Record[zip_code]
        coordinates              = coordinates_from_google_map(zip_code)
        ZipGeoJp::Record[zip_code] = {
            prefecture: pref,
            city:       city,
            block:      block,
            latitude:   coordinates[0],
            longitude:  coordinates[1],
        }
        sleep(GOOGLE_MAP_CRAWLING_DURATION)
      end
    end
  end
end

Private Class Methods

coordinates_from_google_map(zip_code) click to toggle source
# File lib/zip_geo_jp/rake_helper.rb, line 65
def coordinates_from_google_map(zip_code)
  google_map_url  = "https://www.google.com/maps/place/%E3%80%92#{zip_code.gsub(/^(\d{3})(\d{4})$/, '\1-\2')}/"
  image_url       = Nokogiri::HTML(open(google_map_url).read).at('meta[itemprop="image"]')['content']
  coordinates_str = URI.parse(image_url).query.split('&').map { |kv| kv.split('=') }.to_h['center']
  URI.decode(coordinates_str).split(',').map(&:to_f)
end
extract_zip_code(download_url) click to toggle source
# File lib/zip_geo_jp/rake_helper.rb, line 46
def extract_zip_code(download_url)
  downloader = ZipGeoJp::Downloader.new(download_url, 'tmp')
  downloader.download unless downloader.downloaded?

  records = []
  Zip::File.open(downloader.local) do |zip_file|
    entry = zip_file.glob('*.CSV').first
    CSV.parse(
        entry.get_input_stream.read.encode(Encoding::UTF_8, Encoding::Shift_JIS),
    ).each do |rows|
      zipcode           = rows[2]
      pref, city, block = rows.slice(6, 3).map { |r| r.unicode_normalize(:nfkc) }
      records.push([zipcode, pref, city, block])
    end
  end

  records
end