module Wordmap::Access
Public Instance Methods
bsearch_vec(file, value, meta, ltrim_regex, trace)
click to toggle source
# File lib/wordmap/access.rb, line 151 def bsearch_vec(file, value, meta, ltrim_regex, trace) subtrace = nil if trace subtrace = [] trace << [__method__, value, subtrace] end (0..(meta[:cell_count] - 1)).bsearch { |i| cell = FileAccess.read_cells(file, i, 1, meta, subtrace)[0] value <=> extract_value(cell, ltrim_regex) } end
clause_to_index_value(clause, descriptors, indexes, ltrim_regex, trace)
click to toggle source
# File lib/wordmap/access.rb, line 165 def clause_to_index_value(clause, descriptors, indexes, ltrim_regex, trace) name, *keys = clause case name when Symbol raise "Unknown index: #{name}" unless indexes.key?(name) keys.map { |key| indexes[name][key, trace: trace].first || '' } else # For vector lookup, if keys are sorted, then positions are guaranteed # to be sorted too, which means we can get away with getting locations # lazily here. vec_iterator(descriptors, Array(clause), ltrim_regex, trace) end end
each(descriptors, indexes, vec_or_index, ltrim_regex, trace) { |value| ... }
click to toggle source
# File lib/wordmap/access.rb, line 87 def each(descriptors, indexes, vec_or_index, ltrim_regex, trace) unless block_given? return enum_for( __method__, descriptors, indexes, vec_or_index, ltrim_regex, trace ) end case vec_or_index when NilClass, Integer descriptor = vec_or_index.nil? ? 'data' : "vec#{vec_or_index}" file, meta = descriptors[descriptor].values_at(:file, :meta) subtrace = nil if trace subtrace = [] trace << [:each, descriptor, subtrace] end FileAccess.each_cell(file, meta: meta, trace: subtrace) { |cell| value = extract_value(cell, ltrim_regex) yield(value) unless value.empty? } when Symbol raise "Unknown index: #{vec_or_index}" unless indexes.key?(vec_or_index) subtrace = nil if trace subtrace = [] trace << [:each, vec_or_index, subtrace] end indexes[vec_or_index].each(0, trace: subtrace) { |cell| yield(cell) } subtrace.replace(subtrace.flat_map { |v| v[2] }) if trace else raise 'Invalid value passed into each' end end
each_by_key(descriptors, key, ltrim_regex, trace) { |value| ... }
click to toggle source
# File lib/wordmap/access.rb, line 61 def each_by_key(descriptors, key, ltrim_regex, trace) unless block_given? return enum_for(__method__, descriptors, key, ltrim_regex, trace) end index_value = index_value_by_key(descriptors, key, ltrim_regex, trace) return [].to_enum if index_value == '' seq = IndexValue.each_seq_value(index_value).to_a subtrace = nil if trace subtrace = [] trace << [:each_by_key, "#{seq.first}-#{seq.last}", subtrace] end FileAccess.each_cell(descriptors['data'][:file], seq[0], count: seq.size, meta: descriptors['data'][:meta], trace: subtrace ) { |cell| value = extract_value(cell, ltrim_regex) yield(value) unless value.empty? } end
each_by_query(descriptors, indexes, query, ltrim_regex, trace) { |value| ... }
click to toggle source
# File lib/wordmap/access.rb, line 16 def each_by_query descriptors, indexes, query, ltrim_regex, trace unless block_given? return enum_for( __method__, descriptors, indexes, query, ltrim_regex, trace ) end index_values = if query.none? { |clause| clause.is_a?(Array) } [ clause_to_index_value( query, descriptors, indexes, ltrim_regex, trace ) ] else # Proactively intersect all clauses of the same type to save on reads. map_normalized_clauses(query) { |clause| clause_to_index_value \ clause, descriptors, indexes, ltrim_regex, trace } end IndexValue .each_seq_value(*index_values) .lazy .slice_when { |a, b| b > a.succ } .each { |seq| subtrace = nil if trace subtrace = [] trace << [:each_by_query, "#{seq.first}-#{seq.last}", subtrace] end FileAccess .each_cell(descriptors['data'][:file], seq[0], count: seq.size, meta: descriptors['data'][:meta], trace: subtrace ) { |cell| value = extract_value(cell, ltrim_regex) yield(value) unless value.empty? } } end
extract_value(cell, regex)
click to toggle source
# File lib/wordmap/access.rb, line 206 def extract_value(cell, regex) cell.sub(regex, '').force_encoding('utf-8') end
index_value_by_key(descriptors, key, ltrim_regex, trace)
click to toggle source
# File lib/wordmap/access.rb, line 132 def index_value_by_key(descriptors, key, ltrim_regex, trace) key = Array(key) cell_count = descriptors['data'][:meta][:cell_count] cell_c, cell_i = 0.upto(key.size - 1).reduce([cell_count, 0]) { |(cc, ci), vi| vec_desc = descriptors["vec#{vi}"] return '' unless vec_desc vmeta = vec_desc[:meta] vfile = vec_desc[:file] vec_index = bsearch_vec(vfile, key[vi], vmeta, ltrim_regex, trace) return '' unless vec_index page_size = cc / vmeta[:cell_count] [page_size, ci + (page_size * vec_index)] } cell_c > 1 ? "#{cell_i}+#{cell_c - 1}" : "#{cell_i}" end
load_descriptors(paths, spacer)
click to toggle source
# File lib/wordmap/access.rb, line 8 def load_descriptors(paths, spacer) paths.reduce({}) { |hash, path| file = File.open(path, 'rb') meta = FileAccess.read_meta(file, spacer) hash.merge(File.basename(path) => { file: file, meta: meta }) } end
map_normalized_clauses(query) { |keys| ... }
click to toggle source
# File lib/wordmap/access.rb, line 192 def map_normalized_clauses(query) query .reduce({}) { |normalized, clause| normalized.merge( clause[0].is_a?(Symbol) ? { clause[0] => clause[1..-1] } : { '_keys' => clause } ) { |_, oldv, newv| oldv & newv } } .map { |name, keys| name == '_keys' ? yield(keys) : yield([name, *keys]) } end
vec_iterator(descriptors, keys, ltrim_regex, trace = nil) { |to_i| ... }
click to toggle source
# File lib/wordmap/access.rb, line 180 def vec_iterator(descriptors, keys, ltrim_regex, trace = nil) unless block_given? return enum_for(__method__, descriptors, keys, ltrim_regex, trace) end keys.sort.each do |key| value = index_value_by_key(descriptors, key, ltrim_regex, trace) next if value.nil? || value == '' yield(value.to_i) end end