class MESH::Tree

Public Class Methods

new() click to toggle source
# File lib/MESH/tree.rb, line 8
def initialize

  @headings_last_position = -1
  @headings = GoogleHashDenseLongToRuby.new
  @headings_by_unique_id = GoogleHashDenseLongToRuby.new
  @headings_by_tree_number = GoogleHashDenseLongToRuby.new
  @headings_by_original_heading = GoogleHashDenseLongToRuby.new
  @entries_by_term = GoogleHashDenseLongToRuby.new
  @entries_by_loose_match_term = GoogleHashDenseLongToRuby.new #case insensitive, no punctuation, normalised whitespace
  # @entries_by_word = Hash.new { |h, k| h[k] = Set.new }
  @entries_by_first_word = GoogleHashDenseLongToRuby.new
  # @entries_by_first_word = Hash.new { |h, k| h[k] = Set.new }
  @locales = [@@default_locale]

  filename = File.expand_path('../../../data/mesh_data_2014/d2014.bin.gz', __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  lines = []
  file.each_line do |line|
    case
      when line.start_with?('*NEWRECORD')
        unless lines.empty?
          mh = MESH::Heading.new(self, @@default_locale, lines)
          @headings_last_position += 1
          @headings[@headings_last_position] = mh
          @headings_by_unique_id[mh.unique_id.hash] = mh
          @headings_by_original_heading[mh.original_heading.hash] = mh
          mh.tree_numbers.each do |tree_number|
            hash = tree_number.hash
            raise if @headings_by_tree_number[hash]
            @headings_by_tree_number[hash] = mh
          end
          mh.structured_entries.each do |entry|
            @entries_by_term[entry.term.hash] = entry
            @entries_by_loose_match_term[entry.loose_match_term.hash] = entry
            entry_words = entry.term.downcase.split(/\W+/)
            hash = entry_words[0].hash
            @entries_by_first_word[hash] ||= Set.new
            @entries_by_first_word[hash] << entry
          end
          lines = [line]
        end
      else
        lines << line
    end
  end

  (0..@headings_last_position).each do |i|
    # @headings.each do |heading|
    @headings[i].connect_to_parents
    @headings[i].connect_to_forward_references
  end

end

Public Instance Methods

each() { |headings| ... } click to toggle source
# File lib/MESH/tree.rb, line 188
def each
  (0..@headings_last_position).each do |i|
    # for i in 0 ... @headings.size
    yield @headings[i] if @headings[i].useful
  end
end
find_entries_by_word(word) click to toggle source
# File lib/MESH/tree.rb, line 174
def find_entries_by_word(word)
  return @entries_by_first_word[word.hash]
end
find_entry_by_loose_match(term) click to toggle source
# File lib/MESH/tree.rb, line 170
def find_entry_by_loose_match(term)
  return @entries_by_loose_match_term[Entry.loose_match(term).hash]
end
find_entry_by_term(term) click to toggle source
# File lib/MESH/tree.rb, line 166
def find_entry_by_term(term)
  return @entries_by_term[term.hash]
end
find_heading_by_main_heading(heading) click to toggle source
# File lib/MESH/tree.rb, line 162
def find_heading_by_main_heading(heading)
  return @headings_by_original_heading[heading.hash]
end
find_heading_by_tree_number(tree_number) click to toggle source
# File lib/MESH/tree.rb, line 158
def find_heading_by_tree_number(tree_number)
  return @headings_by_tree_number[tree_number.hash]
end
find_heading_by_unique_id(unique_id) click to toggle source
# File lib/MESH/tree.rb, line 154
def find_heading_by_unique_id(unique_id)
  return @headings_by_unique_id[unique_id.hash]
end
linkify_summaries(&block) click to toggle source
# File lib/MESH/tree.rb, line 146
def linkify_summaries &block
  (0..@headings_last_position).each do |i|
    h = @headings[i]
  # @headings.each do |h|
    h.linkify_summary &block
  end
end
load_translation(locale) click to toggle source
# File lib/MESH/tree.rb, line 64
def load_translation(locale)
  return if @locales.include? locale
  filename = File.expand_path("../../../data/mesh_data_2014/d2014.#{locale}.bin.gz", __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  unique_id = nil
  lines = []
  file.each_line do |line|

    case

      when line.start_with?('*NEWRECORD')
        unless unique_id.nil? || lines.empty?
          if heading = find_heading_by_unique_id(unique_id)
            new_entries = heading.load_translation(lines, locale)
            new_entries.each do |entry|
              @entries_by_term[entry.term.hash] = entry
              @entries_by_loose_match_term[entry.loose_match_term.hash] = entry
              entry_words = entry.term.downcase.split(/\W+/)
              hash = entry_words[0].hash
              @entries_by_first_word[hash] ||= Set.new
              @entries_by_first_word[hash] << entry
            end
          else
            raise 'Translation provided for missing header'
          end

          unique_id = nil
          lines = []
        end

      when matches = line.match(/^UI = (.*)/)
        unique_id = matches[1]

    end

    lines << line

  end
  @locales << locale
end
load_wikipedia() click to toggle source
# File lib/MESH/tree.rb, line 107
def load_wikipedia
  return if @wikipedia_loaded
  filename = File.expand_path("../../../data/mesh_data_2014/d2014.wikipedia.bin.gz", __FILE__)
  gzipped_file = File.open(filename)
  file = Zlib::GzipReader.new(gzipped_file)

  unique_id = nil
  wikipedia_links = []
  file.each_line do |line|

    case

      when line.match(/^\*NEWRECORD$/)
        unless unique_id.nil?
          if heading = find_heading_by_unique_id(unique_id)
            wikipedia_links.each do |wl|
              wl[:score] = (wl[:score].to_f / heading.structured_entries.length.to_f).round(2)
            end
            heading.wikipedia_links = wikipedia_links
          end

          wikipedia_links = []
          unique_id = nil
        end

      when matches = line.match(/^UI = (.*)/)
        unique_id = matches[1]

      when matches = line.match(/^WK = (.*)/)
        hash = JSON.parse(matches[1], symbolize_names: true)
        wikipedia_links << hash

    end

  end
  @wikipedia_loaded = true
end
match_in_text(text) click to toggle source
# File lib/MESH/tree.rb, line 195
def match_in_text (text)
  return [] if text.nil?
  downcased = text.downcase
  candidate_entries = []
  text_words = @@sw.clarify(downcased).split(/\W+/)
  text_words.uniq!
  text_words.each do |word|
    entries_by_word = find_entries_by_word(word)
    candidate_entries << entries_by_word.to_a
  end
  candidate_entries.compact!
  candidate_entries.flatten!
  # candidate_entries.uniq! #30% in this uniq
  candidate_entries.keep_if { |entry| entry.heading.useful }
  # puts "\n\n****\n#{candidate_entries.length}\n*****\n\n"
  matches = []
  candidate_entries.each do |entry|
    entry_matches = entry.match_in_text(text, downcased)
    matches << entry_matches
  end

  matches.compact!
  matches.flatten!

  matches.combination(2) do |l, r|
    if (r[:index][0] >= l[:index][0]) && (r[:index][1] <= l[:index][1])
      #r is within l
      r[:delete] = true
    elsif (l[:index][0] >= r[:index][0]) && (l[:index][1] <= r[:index][1])
      #l is within r
      l[:delete] = true
    end
  end
  matches.delete_if { |match| match[:delete] }
end
where(conditions) click to toggle source
# File lib/MESH/tree.rb, line 178
def where(conditions)
  matches = []
  (0..@headings_last_position).each do |i|
  # @headings.each do |heading|
    heading = @headings[i]
    matches << heading if heading.matches(conditions)
  end
  matches
end

Private Instance Methods

entry_match_key(e) click to toggle source
# File lib/MESH/tree.rb, line 234
def entry_match_key(e)
  e.strip.upcase
end