class Geolocal::Provider::DB_IP
Constants
- START_URL
Public Instance Methods
check_country_codes(countries, row)
click to toggle source
a bit of debugging code to print all non-matched country codes. should be deleted one day. The Countries gem doesn’t know about these country codes from the csv: CS FX UK YU TP and blank
# File lib/geolocal/provider/db_ip.rb, line 75 def check_country_codes(countries, row) @known_codes ||= countries.reduce(Set.new) { |a,(_,v)| a.merge v; a } unless @known_codes.include?(row[2]) puts "#{row[2]}: #{row[0]}..#{row[1]}" end end
csv_file()
click to toggle source
# File lib/geolocal/provider/db_ip.rb, line 32 def csv_file "#{config[:tmpdir]}/dbip-country.csv.gz" end
download_files()
click to toggle source
# File lib/geolocal/provider/db_ip.rb, line 36 def download_files # they update the file every month but no idea which day they upload it return if up_to_date?(csv_file, 86400) page = Net::HTTP.get(URI START_URL) # if we used Nokogiri: (we don't since we don't want to force the dependency) # doc = Nokogiri::HTML(page) # href = URI doc.css('a.btn-primary').attr('href').to_s elem = page.match(/<a\b[^>]*class=['"][^'"]*btn-primary[^>]*>/) or raise "no <a class='btn-primary'> element found in #{START_URL}" attr = elem.to_s.match(/href=['"]([^'"]+)['"]/) or raise "no href found in #{elem}" href = URI attr[1] # stream result because it's large FileUtils.mkdir_p(config[:tmpdir]) status "downloading #{href} to #{csv_file}\n" elapsed = time_block do File.open(csv_file, 'wb') do |file| Net::HTTP.new(href.host, href.port).request_get(href.path) do |response| total_length = response['Content-Length'].to_i status " reading #{(total_length/1024.0/1024).round(1)} MB: " response.read_body do |chunk| file.write chunk update_download_status(chunk.length, total_length) end end end end status "\n read #{@current_byte} bytes in #{elapsed.round(2)} seconds at " + "#{(@current_byte/1024/elapsed).round(1)} KB/sec\n" end
read_ranges(countries) { |name, row, row| ... }
click to toggle source
# File lib/geolocal/provider/db_ip.rb, line 82 def read_ranges countries status "computing ranges\n" row_count = 0 match_count = 0 elapsed = time_block do File.open(csv_file, 'r') do |file| gz = Zlib::GzipReader.new(file) CSV.new(gz, headers: false).each do |row| row_count += 1 countries.each do |name, country_codes| if country_codes.include?(row[2]) match_count += 1 yield name, row[0], row[1] end # check_country_codes(countries, row) end end end end status " matched #{match_count} of #{row_count} ranges in #{elapsed.round(2)} seconds\n" end
update_download_status(size, length)
click to toggle source
TODO: refactor progress and download code into a mixin?
# File lib/geolocal/provider/db_ip.rb, line 14 def update_download_status size, length @current_byte ||= 0 @previous_print ||= 0 @current_byte += size if length pct = @current_byte * 100 / length pct = (pct / 5) * 5 if pct != @previous_print @previous_print = pct status pct.to_s + '% ' end else # server didn't supply a length, display running byte count? end end