namespace :cc_retrieve_data do
desc 'Populate license_list.yaml with an index of all available CC licenses by jurisdiction' task :license_index do |t, args| require 'mechanize' require 'yaml' require 'open-uri' # pull the list of jurisdictions from the CC license chooser page agent = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' } jurisdictions = agent.get('http://creativecommons.org/choose/').search('#field_jurisdiction option').map do |o| o['value'] end.compact # ping for the deed page of each possible combination of jurisdiction, version and type license_index = {} jurisdictions.each do |jurisdiction| license_index[jurisdiction] = {} ["1.0","2.0","2.5","3.0","4.0"].each do |version| license_index[jurisdiction][version] = {} [:by,:by_sa,:by_nd,:by_nc,:by_nc_sa,:by_nc_nd].each do |type| deed_url = "http://creativecommons.org/licenses/#{type.to_s.gsub('_','-')}/#{version}/#{jurisdiction}" license_index[jurisdiction][version][type] = false begin ping_result = open(deed_url) if ping_result.status[0] == "200" license_index[jurisdiction][version][type.to_s] = true end rescue OpenURI::HTTPError => ex # not found end sleep(1) # rate limit to avoid hammering creativecommons.org end end end license_index['unported'] = license_index[""] license_index.delete("") File.open('config/license_list.yaml', 'w') {|f| YAML::dump(license_index, f) } end desc 'Populate I18n localizations with translations from the CC License Chooser' task :localization do |t, args| require 'mechanize' require 'yaml' require 'open-uri' require 'json' notice_localization = {} jurisdiction_localization = {} agent = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' } # for each available language agent.get('http://creativecommons.org/choose/').search('#other_stuff .licensebox a').each do |a| chooser_page = agent.get(a['href']) lang = a['hreflang'].to_sym # for each license type [:by,:by_sa,:by_nd,:by_nc,:by_nc_sa,:by_nc_nd].each_with_index do |type, i| # get the license notice info directly from the License Chooser API uri = URI.parse('http://creativecommons.org/choose/xhr_api') params = case type when :by { "field_commercial" => 'y', "field_derivatives" => 'y' } when :by_sa { "field_commercial" => 'y', "field_derivatives" => 'sa' } when :by_nd { "field_commercial" => 'y', "field_derivatives" => 'n' } when :by_nc { "field_commercial" => 'n', "field_derivatives" => 'y' } when :by_nc_sa { "field_commercial" => 'n', "field_derivatives" => 'sa' } when :by_nc_nd { "field_commercial" => 'n', "field_derivatives" => 'n' } end params["lang"] = lang uri.query = URI.encode_www_form( params ) result = JSON.parse(uri.open.read) if i == 0 html = Nokogiri::HTML(result['license_html']) title = html.css('a:last-child').text notice_localization[lang.to_s] = { 'license_notice' => html.text.sub(title, "\%\{license_title\}"), 'license_title' => title.sub(result['license_title'], "\%\{license_type\}"), } end notice_localization[lang.to_s]["license_type_#{type.to_s}"] = result['license_title'].sub(/3\.0.*/, "\%\{version\} \%\{jurisdiction\}") sleep(1) # rate limit to avoid hammering creativecommons.org end # get the translations of jurisdiction names (eg. "Canada", "United States", etc.) jurisdiction_localization[lang.to_s] = {} chooser_page.search('#field_jurisdiction option').each do |o| jurisdiction_key = o['value'].empty? ? 'unported' : o['value'] jurisdiction_localization[lang.to_s][jurisdiction_key.to_s] = o.text.strip end end File.open('config/locales/cc_license_notices.yml', 'w') {|f| YAML.dump(notice_localization,f) } File.open('config/locales/cc_license_jurisdictions.yml', 'w') {|f| YAML.dump(jurisdiction_localization,f) } end
end