require 'net/http' require 'json' require 'rsolr' require 'find' require 'geo_combine/geo_blacklight_harvester'
namespace :geocombine do
commit_within = (ENV['SOLR_COMMIT_WITHIN'] || 5000).to_i ogm_path = ENV['OGM_PATH'] || 'tmp/opengeometadata' solr_url = ENV['SOLR_URL'] || 'http://127.0.0.1:8983/solr/blacklight-core' whitelist = %w[ https://github.com/OpenGeoMetadata/big-ten.git ] desc 'Clone OpenGeoMetadata repositories' task :clone, [:repo] do |_t, args| if args.repo ogm_repos = ["https://github.com/OpenGeoMetadata/#{args.repo}.git"] else ogm_api_uri = URI('https://api.github.com/orgs/opengeometadata/repos') ogm_repos = JSON.parse(Net::HTTP.get(ogm_api_uri)).map do |repo| repo['clone_url'] if repo['size'] > 0 end.compact ogm_repos.select! { |repo| whitelist.include?(repo) || repo =~ /(edu|org|uk)\..*\.git$/ } end ogm_repos.each do |repo| system "echo #{repo} && mkdir -p #{ogm_path} && cd #{ogm_path} && git clone --depth 1 #{repo}" end end desc '"git pull" OpenGeoMetadata repositories' task :pull, [:repo] do |_t, args| paths = if args.repo [File.join(ogm_path, args.repo)] else Dir.glob("#{ogm_path}/*") end paths.each do |path| next unless File.directory?(path) system "echo #{path} && cd #{path} && git pull origin" end end desc 'Index all of the GeoBlacklight JSON documents' task :index do puts "Indexing #{ogm_path} into #{solr_url}" solr = RSolr.connect url: solr_url, adapter: :net_http_persistent Find.find(ogm_path) do |path| next unless File.basename(path) == 'geoblacklight.json' doc = JSON.parse(File.read(path)) [doc].flatten.each do |record| begin puts "Indexing #{record['layer_slug_s']}: #{path}" if $DEBUG solr.update params: { commitWithin: commit_within, overwrite: true }, data: [record].to_json, headers: { 'Content-Type' => 'application/json' } rescue RSolr::Error::Http => error puts error end end end solr.commit end namespace :geoblacklight_harvester do desc 'Harvest documents from a configured GeoBlacklight instance' task :index, [:site] => [:environment] do |_t, args| raise ArgumentError, 'A site argument is required' unless args.site GeoCombine::GeoBlacklightHarvester.new(args.site.to_sym).index end end
end