module RegexpPropertyValues::Updater
Constants
- BASE_URL
- EMOJI_FILES
- TMP_DIR
- UCD_FILES
Public Instance Methods
Source
# File lib/regexp_property_values/updater.rb, line 26 def call prepare_tmp_dir download_ucd_files write_values write_aliases remove_tmp_dir print_stats end
Source
# File lib/regexp_property_values/updater.rb, line 40 def download_ucd_files unicode_version = RbConfig::CONFIG.fetch('UNICODE_VERSION') emoji_version = RbConfig::CONFIG.fetch('UNICODE_EMOJI_VERSION') puts 'This will load ucd and emoji data for the CURRENT RUBY '\ "(#{unicode_version} / #{emoji_version}). Run this on the "\ 'latest Ruby version you want to support. Continue? [y/n]' return puts 'download skipped.' unless $stdin.gets =~ /^y/i Dir.chdir(TMP_DIR) do UCD_FILES.each { |f| `wget #{BASE_URL}/#{unicode_version}/ucd/#{f}` } EMOJI_FILES.each { |f| `wget #{BASE_URL}/emoji/#{emoji_version}/#{f}` } end end
Source
# File lib/regexp_property_values/updater.rb, line 116 def in_values?(string) @values.any? { |value| value.casecmp?(string) } end
Source
# File lib/regexp_property_values/updater.rb, line 35 def prepare_tmp_dir FileUtils.rm_rf(TMP_DIR) if File.exist?(TMP_DIR) FileUtils.mkdir(TMP_DIR) end
Source
# File lib/regexp_property_values/updater.rb, line 129 def print_stats print "\nFetched #{@values.size} values and #{@aliases.size} aliases.\n\n" end
Source
# File lib/regexp_property_values/updater.rb, line 125 def remove_tmp_dir FileUtils.rm_rf(TMP_DIR) end
Source
# File lib/regexp_property_values/updater.rb, line 120 def scan(file, pattern) path = File.join(TMP_DIR, file) File.read(path).scan(pattern) { yield(Regexp.last_match) } end
Source
# File lib/regexp_property_values/updater.rb, line 93 def write_aliases @aliases = Set.new scan('PropertyAliases.txt', /^(?<alias>\w+) *; (?<name>\w+)/) do |caps| if in_values?(caps[:name]) && !in_values?(caps[:alias]) @aliases << [caps[:alias], caps[:name]] end end scan('PropertyValueAliases.txt', /^[gs]c ; (?<alias1>\w+) *; (?<name>\w+)(?: *; (?<alias2>\w+))?/) do |caps| if in_values?(caps[:name]) && !in_values?(caps[:alias1]) @aliases << [caps[:alias1], caps[:name]] end if in_values?(caps[:name]) && caps[:alias2] && !in_values?(caps[:alias2]) @aliases << [caps[:alias2], caps[:name]] end end File.write(RegexpPropertyValues::ALIASES_PATH, @aliases.sort.map { |pair| pair.join(';') }.join("\n")) end
Source
# File lib/regexp_property_values/updater.rb, line 54 def write_values @values = Set.new # posix properties @values += %w[ Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII XPosixPunct ] # special properties @values += %w[Any Assigned In_No_Block Unknown] # legacy properties @values += %w[Newline] regexp = /^[0-9a-fA-F]+(?:\.\.[0-9a-fA-F]+)? *; (?<prop_name>\w+) +# / %w[ DerivedCoreProperties.txt PropList.txt Scripts.txt emoji-data.txt ].each { |file| scan(file, regexp) { |caps| @values << caps[:prop_name] } } scan('PropertyValueAliases.txt', /^gc ; \w+ *; (?<prop_name>\w+)/) do |caps| @values << caps[:prop_name] end scan('Blocks.txt', /^[\dA-F.]+ *; (?<block_name>[-\w ]+)/) do |caps| @values << 'In_' + caps[:block_name].gsub(/\W/, '_') end scan('DerivedAge.txt', /^[\dA-F.]+ *; (?<age_num>[\d.]+)/) do |caps| @values << 'Age=' + caps[:age_num] end File.write(RegexpPropertyValues::VALUES_PATH, @values.sort.join("\n")) end