module RegexpPropertyValues::Updater

Constants

BASE_URL
EMOJI_FILES
TMP_DIR
UCD_FILES

Public Instance Methods

call() click to toggle source
# File lib/regexp_property_values/updater.rb, line 26
def call
  prepare_tmp_dir
  download_ucd_files
  write_values
  write_aliases
  remove_tmp_dir
  print_stats
end
download_ucd_files() click to toggle source
# File lib/regexp_property_values/updater.rb, line 40
def download_ucd_files
  unicode_version = RbConfig::CONFIG.fetch('UNICODE_VERSION')
  emoji_version   = RbConfig::CONFIG.fetch('UNICODE_EMOJI_VERSION')
  puts 'This will load ucd and emoji data for the CURRENT RUBY '\
       "(#{unicode_version} / #{emoji_version}). Run this on the "\
       'latest Ruby version you want to support. Continue? [y/n]'
  return puts 'download skipped.' unless $stdin.gets =~ /^y/i

  Dir.chdir(TMP_DIR) do
    UCD_FILES.each   { |f| `wget #{BASE_URL}/#{unicode_version}/ucd/#{f}` }
    EMOJI_FILES.each { |f| `wget #{BASE_URL}/emoji/#{emoji_version}/#{f}` }
  end
end
in_values?(string) click to toggle source
# File lib/regexp_property_values/updater.rb, line 116
def in_values?(string)
  @values.any? { |value| value.casecmp?(string) }
end
prepare_tmp_dir() click to toggle source
# File lib/regexp_property_values/updater.rb, line 35
def prepare_tmp_dir
  FileUtils.rm_rf(TMP_DIR) if File.exist?(TMP_DIR)
  FileUtils.mkdir(TMP_DIR)
end
print_stats() click to toggle source
remove_tmp_dir() click to toggle source
# File lib/regexp_property_values/updater.rb, line 125
def remove_tmp_dir
  FileUtils.rm_rf(TMP_DIR)
end
scan(file, pattern) { |last_match| ... } click to toggle source
# File lib/regexp_property_values/updater.rb, line 120
def scan(file, pattern)
  path = File.join(TMP_DIR, file)
  File.read(path).scan(pattern) { yield(Regexp.last_match) }
end
write_aliases() click to toggle source
# File lib/regexp_property_values/updater.rb, line 93
def write_aliases
  @aliases = Set.new

  scan('PropertyAliases.txt', /^(?<alias>\w+) *; (?<name>\w+)/) do |caps|
    if in_values?(caps[:name]) && !in_values?(caps[:alias])
      @aliases << [caps[:alias], caps[:name]]
    end
  end

  scan('PropertyValueAliases.txt',
    /^[gs]c ; (?<alias1>\w+) *; (?<name>\w+)(?: *; (?<alias2>\w+))?/) do |caps|
    if in_values?(caps[:name]) && !in_values?(caps[:alias1])
      @aliases << [caps[:alias1], caps[:name]]
    end
    if in_values?(caps[:name]) && caps[:alias2] && !in_values?(caps[:alias2])
      @aliases << [caps[:alias2], caps[:name]]
    end
  end

  File.write(RegexpPropertyValues::ALIASES_PATH,
             @aliases.sort.map { |pair| pair.join(';') }.join("\n"))
end
write_values() click to toggle source
# File lib/regexp_property_values/updater.rb, line 54
def write_values
  @values = Set.new

  # posix properties
  @values += %w[
    Alpha Blank Cntrl Digit Graph Lower Print
    Punct Space Upper XDigit Word Alnum ASCII
    XPosixPunct
  ]

  # special properties
  @values += %w[Any Assigned In_No_Block Unknown]

  # legacy properties
  @values += %w[Newline]

  regexp = /^[0-9a-fA-F]+(?:\.\.[0-9a-fA-F]+)? *; (?<prop_name>\w+) +# /
  %w[
    DerivedCoreProperties.txt
    PropList.txt
    Scripts.txt
    emoji-data.txt
  ].each { |file| scan(file, regexp) { |caps| @values << caps[:prop_name] } }

  scan('PropertyValueAliases.txt', /^gc ; \w+ *; (?<prop_name>\w+)/) do |caps|
    @values << caps[:prop_name]
  end

  scan('Blocks.txt', /^[\dA-F.]+ *; (?<block_name>[-\w ]+)/) do |caps|
    @values << 'In_' + caps[:block_name].gsub(/\W/, '_')
  end

  scan('DerivedAge.txt', /^[\dA-F.]+ *; (?<age_num>[\d.]+)/) do |caps|
    @values << 'Age=' + caps[:age_num]
  end

  File.write(RegexpPropertyValues::VALUES_PATH, @values.sort.join("\n"))
end