class Datasets::EStatJapan::StatsData
wrapper class for e-Stat API service
Attributes
Public Class Methods
generate accessor instance for e-Stat API's endpoint `getStatsData`. for detail spec : www.e-stat.go.jp/api/api-info/e-stat-manual @param [String] id Statistical data id @param [Array<String>] areas Target areas (fetch all if omitted) @param [Array<String>] categories Category IDs (fetch all if omitted) @param [Array<String>] times Time axes (fetch all if omitted) @param [Array<Number>] skip_levels Skip levels for parsing (defaults to `[1]`) @param [String] hierarchy_selection Select target from 'child', 'parent', or 'both'. (Example: 札幌市○○区 -> 'child':札幌市○○区 only; 'parent':札幌市 only; 'both': Both selected) (defaults to `both`) @param [Boolean] skip_nil_column
Skip column if contains nil @param [Boolean] skip_nil_row Skip row if contains nil @example
stats_data = Datasets::EStatJapan::StatsData.new( "0000020201", # A Population and household (key name: A 人口・世帯) categories: ["A1101"], # Population (key name: A1101_人口総数) areas: ["01105", "01106"], # Toyohira-ku Sapporo-shi Hokkaido, Minami-ku Sapporo-shi Hokkaido times: ["1981100000", "1982100000"], hierarchy_selection: 'child', skip_child_area: true, skip_nil_column: true, skip_nil_row: false, )
Datasets::Dataset::new
# File lib/datasets/e-stat-japan.rb, line 57 def initialize(id, app_id: nil, areas: nil, categories: nil, times: nil, skip_levels: [1], hierarchy_selection: 'child', skip_nil_column: true, skip_nil_row: false, time_range: nil) @app_id = app_id || fetch_app_id if @app_id.nil? || @app_id.empty? raise ArgumentError, 'Please set app_id via `Datasets::EStatJapan.configure` method, environment var `ESTATJAPAN_APP_ID` or keyword argument `:app_id`' end super() @api_version = '3.0' @base_url = "https://api.e-stat.go.jp/rest/#{@api_version}/app/json/getStatsData" @metadata.id = "e-stat-japan-#{@api_version}" @metadata.name = "e-Stat API #{@api_version}" @metadata.url = @base_url @metadata.description = "e-Stat API #{@api_version}" @id = id @areas = areas @categories = categories @times = times @skip_levels = skip_levels case hierarchy_selection when 'child' then @skip_child_area = false @skip_parent_area = true when 'parent' then @skip_child_area = true @skip_parent_area = false else # 'both' @skip_child_area = false @skip_parent_area = false end @skip_nil_column = skip_nil_column @skip_nil_row = skip_nil_row @time_range = time_range @url = generate_url option_hash = Digest::MD5.hexdigest(@url.to_s) base_name = "e-stat-japan-#{option_hash}.json" @data_path = cache_dir_path + base_name @loaded = false end
Public Instance Methods
# File lib/datasets/e-stat-japan.rb, line 140 def areas load_data @areas end
# File lib/datasets/e-stat-japan.rb, line 150 def columns load_data @columns end
fetch data records from Remote API @example
indices = [] rows = [] map_id_name = {} estat.each do |record| # Select Hokkaido prefecture only next unless record.id.to_s.start_with? '01' indices << record.id rows << record.values map_id_name[record.id] = record.name end
# File lib/datasets/e-stat-japan.rb, line 120 def each return to_enum(__method__) unless block_given? load_data # create rows @areas.each do |a_key, a_value| rows = [] @time_tables.reject { |_key, x| x[:skip] }.each do |st_key, _st_value| row = @columns.reject { |_key, x| x[:skip] }.map do |c_key, _c_value| @indexed_data.dig(st_key, a_key, c_key) end rows << row end next if @skip_nil_row && rows.flatten.count(nil).positive? yield Record.new(a_key, a_value['@name'], rows.flatten) end end
# File lib/datasets/e-stat-japan.rb, line 155 def schema load_data @schema end
# File lib/datasets/e-stat-japan.rb, line 145 def time_tables load_data @time_tables end
Private Instance Methods
# File lib/datasets/e-stat-japan.rb, line 303 def create_header schema = [] @time_tables.reject { |_key, x| x[:skip] }.each do |_st_key, st_value| @columns.reject { |_key, x| x[:skip] }.each do |_c_key, c_value| schema << "#{st_value['@name']}_#{c_value['@name']}" end end schema end
# File lib/datasets/e-stat-japan.rb, line 177 def extract_def(data, id) rec = data.dig('GET_STATS_DATA', 'STATISTICAL_DATA', 'CLASS_INF', 'CLASS_OBJ') rec.select { |x| x['@id'] == id } end
# File lib/datasets/e-stat-japan.rb, line 200 def fetch_app_id EStatJapan.app_id || ENV['ESTATJAPAN_APP_ID'] end
# File lib/datasets/e-stat-japan.rb, line 211 def fetch_data # MEMO: # The e-stat api always returns 200 (Ok) # even if error happens dispite of its error mapping. # So we can't avoid caching retrieved response from the api. # ref: https://www.e-stat.go.jp/api/api-info/e-stat-manual3-0 download(@data_path, @url.to_s) unless @data_path.exist? end
# File lib/datasets/e-stat-japan.rb, line 162 def generate_url # generates url for query params = { appId: @app_id, lang: 'J', statsDataId: @id, metaGetFlg: 'Y', cntGetFlg: 'N', sectionHeaderFlg: '1' } params['cdArea'] = @areas.join(',') if @areas.instance_of?(Array) params['cdCat01'] = @categories.join(',') if @categories.instance_of?(Array) params['cdTime'] = @times.join(',') if @times.instance_of?(Array) URI.parse("#{@base_url}?#{URI.encode_www_form(params)}") end
# File lib/datasets/e-stat-japan.rb, line 193 def get_values(data) data.dig('GET_STATS_DATA', 'STATISTICAL_DATA', 'DATA_INF', 'VALUE') end
# File lib/datasets/e-stat-japan.rb, line 220 def index_data # parse json raw_data = File.open(@data_path) do |io| JSON.parse(io.read) end # check status api_status = raw_data.dig('GET_STATS_DATA', 'RESULT', 'STATUS') if api_status != 0 # remove error response cache manually FileUtils.rm(@data_path) error_msg = raw_data.dig('GET_STATS_DATA', 'RESULT', 'ERROR_MSG') raise APIError, "code #{api_status} : #{error_msg}" end # index data ## table_def = extract_def(raw_data, "tab") timetable_def = extract_def(raw_data, 'time') column_def = extract_def(raw_data, 'cat01') area_def = extract_def(raw_data, 'area') @time_tables = index_def(timetable_def) @columns = index_def(column_def) @areas = index_def(area_def) ## apply time_range to time_tables @time_tables.select! { |k, _v| @time_tables.keys[@time_range].include? k } if @time_range.instance_of?(Range) @indexed_data = Hash[*@time_tables.keys.map { |x| [x, {}] }.flatten] get_values(raw_data).each do |row| next unless @time_tables.key?(row['@time']) data = @indexed_data.dig(row['@time'], row['@area']) || {} new_data = data.merge(row['@cat01'] => row['$'].to_f) @indexed_data[row['@time']][row['@area']] = new_data end skip_areas skip_nil_column @schema = create_header @loaded = true end
# File lib/datasets/e-stat-japan.rb, line 185 def index_def(data_def) unless data_def.first['CLASS'].instance_of?(Array) # convert to array when number of element is 1 data_def.first['CLASS'] = [data_def.first['CLASS']] end Hash[*data_def.first['CLASS'].map { |x| [x['@code'], x] }.flatten] end
# File lib/datasets/e-stat-japan.rb, line 204 def load_data return if @loaded fetch_data index_data end
# File lib/datasets/e-stat-japan.rb, line 263 def skip_areas # skip levels @areas.reject! { |_key, x| @skip_levels.include? x['@level'].to_i } # skip area that has children if @skip_parent_area # inspect hieralchy of areas @areas.each do |_a_key, a_value| next unless @areas.key? a_value['@parentCode'] @areas[a_value['@parentCode']][:has_children] = true end # filter areas without children @areas.reject! { |_key, x| x[:has_children] } end # skip child area @areas.reject! { |_a_key, a_value| (@areas.key? a_value['@parentCode']) } if @skip_child_area end
# File lib/datasets/e-stat-japan.rb, line 283 def skip_nil_column return unless @skip_nil_column # filter time_tables and columns @areas.each do |a_key, _a_value| @time_tables.each do |st_key, st_value| unless @indexed_data[st_key].key?(a_key) st_value[:skip] = true next end @columns.each do |c_key, c_value| unless @indexed_data.dig(st_key, a_key).key?(c_key) c_value[:skip] = true next end end end end end