class Bud::PushSHJoin
Attributes
all_rels_below[R]
keys[R]
localpreds[R]
origpreds[R]
relnames[R]
Private Class Methods
natural_preds(bud_instance, rels)
click to toggle source
# File lib/bud/executor/join.rb, line 420 def self.natural_preds(bud_instance, rels) preds = [] rels.each_with_index do |r,i| rels.each_with_index do |s,j| unless i >= j the_matches = r.cols & s.cols the_matches.each do |c| preds << [r.send(c), s.send(c)] end end end end preds.uniq end
Public Instance Methods
flatten(*preds, &blk)
click to toggle source
# File lib/bud/executor/join.rb, line 407 def flatten(*preds, &blk) if blk.nil? @cols = dupfree_schema(@rels[0].cols + @rels[1].cols) else @cols = [] end setup_accessors pairs(*preds) do |x,y| blk.nil? ? x + y : blk.call(x + y) end end
flush()
click to toggle source
# File lib/bud/executor/join.rb, line 54 def flush replay_join if @rescan end
insert(item, source)
click to toggle source
# File lib/bud/executor/join.rb, line 227 def insert(item, source) # If we need to reproduce the join's output, do that now before we process # the to-be-inserted tuple. This avoids needless duplicates: if the # to-be-inserted tuple produced any join output, we'd produce that output # again if we didn't rescan now. replay_join if @rescan source_tbl = source.qualified_tabname if @selfjoins.include? source_tbl offsets = [] @relnames.each_with_index{|r,i| offsets << i if r == source_tbl} else offsets = [@relnames.index(source_tbl)] end offsets.each {|offset| insert_item(item, offset)} end
invalidate_cache()
click to toggle source
# File lib/bud/executor/join.rb, line 128 def invalidate_cache @rels.each_with_index do |source_elem, i| if source_elem.rescan puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG @hash_tables[i] = {} end end end
join_offset(entry)
click to toggle source
calculate the position for a field in the result of a join: the tuple offset (“subtuple”) and the attribute position within it (“offset”)
# File lib/bud/executor/join.rb, line 140 def join_offset(entry) name, offset = entry[0], entry[1] # determine which subtuple of the collection contains the table # referenced in entry. subtuple = 0 all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i| if t.qualified_tabname == name subtuple = i break end end return subtuple, offset end
lefts(*preds, &blk)
click to toggle source
# File lib/bud/executor/join.rb, line 361 def lefts(*preds, &blk) if blk.nil? @cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols setup_accessors end pairs(*preds) do |x,y| blk.nil? ? x : blk.call(x) end end
matches(&blk)
click to toggle source
# File lib/bud/executor/join.rb, line 436 def matches(&blk) preds = self.class.natural_preds(@bud_instance, @all_rels_below) pairs(*preds, &blk) end
outer(*preds, &blk)
click to toggle source
given a * expression over 2 collections, form all combos of items that satisfy preds
, and for any item from the 1st collection that has no matches in the 2nd, nil-pad it and include it in the output.
# File lib/bud/executor/join.rb, line 352 def outer(*preds, &blk) if @all_rels_below.length > 2 raise Bud::Error, "outer joins cannot be used with more than 2 join relations" end pairs(*preds, &blk) self.extend(Bud::PushSHOuterJoin) end
pairs(*preds, &blk)
click to toggle source
and now, the Bloom-facing methods given a * expression over n collections, form all combinations of items subject to an array of predicates, preds
. currently supports two syntax options for equijoin predicates:
general form: an array of arrays capturing a conjunction of equiv. classes [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]] common form: a hash capturing equality of a column on left with one on right. :col1 => :col2 (same as lefttable.col1 => righttable.col2)
# File lib/bud/executor/join.rb, line 321 def pairs(*preds, &blk) if @cols.nil? # derive schema if needed: one column for each table. duplicated inputs # get distinguishing numeral. # # XXX: actually, this seems completely bogus. The schema for the output # of the join should depend on the join's *targetlist*. @cols = [] retval = @all_rels_below.reduce({}) do |memo, r| r_name = r.qualified_tabname.to_s memo[r_name] ||= 0 newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "") @cols << newstr.to_sym memo[r_name] += 1 memo end setup_accessors end @origpreds = preds setup_preds(preds) unless preds.empty? # given new preds, the state for the join will be different. set it up again. setup_state if self.class <= Bud::PushSHJoin set_block(&blk) if blk self end
Also aliased as: combos
rights(*preds, &blk)
click to toggle source
# File lib/bud/executor/join.rb, line 372 def rights(*preds, &blk) if blk.nil? @cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols setup_accessors end pairs(*preds) do |x,y| blk.nil? ? y : blk.call(y) end end
Protected Instance Methods
insert_item(item, offset)
click to toggle source
# File lib/bud/executor/join.rb, line 246 def insert_item(item, offset) # assumes left-deep trees if @left_is_array and offset == 0 the_key = @keys.map do |k| left_subtuple, left_offset = k.first item[left_subtuple][left_offset] end else the_key = item.values_at(*@key_attnos[offset]) end #build # puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}" if (@hash_tables[offset][the_key] ||= Set.new).add? item @found_delta = true #and probe # puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}" the_matches = @hash_tables[1-offset][the_key] process_matches(item, the_matches, offset) unless the_matches.nil? end end
replay_join()
click to toggle source
# File lib/bud/executor/join.rb, line 268 def replay_join @rescan = false a, b = @hash_tables return if a.empty? or b.empty? if a.size < b.size a.each_pair do |key, items| the_matches = b[key] unless the_matches.nil? items.each do |item| process_matches(item, the_matches, 0) end end end else b.each_pair do |key, items| the_matches = a[key] unless the_matches.nil? items.each do |item| process_matches(item, the_matches, 1) end end end end end
Private Instance Methods
dupfree_schema(flat_schema)
click to toggle source
# File lib/bud/executor/join.rb, line 383 def dupfree_schema(flat_schema) dupfree_schema = [] # while loop here (inefficiently) ensures no collisions while dupfree_schema.empty? or dupfree_schema.uniq.length < dupfree_schema.length dupfree_schema = [] flat_schema.reduce({}) do |memo, r| if r.to_s.include?("_") and ((r.to_s.rpartition("_")[2] =~ /^\d+$/) == 0) r = r.to_s.rpartition("_")[0].to_sym end memo[r] ||= 0 if memo[r] == 0 dupfree_schema << r.to_s.to_sym else dupfree_schema << (r.to_s + "_" + (memo[r]).to_s).to_sym end memo[r] += 1 memo end flat_schema = dupfree_schema end return flat_schema end
process_matches(item, the_matches, offset)
click to toggle source
# File lib/bud/executor/join.rb, line 295 def process_matches(item, the_matches, offset) the_matches.each do |m| if offset == 0 left = item right = m else left = m right = item end # FIX: reduce arrays being created result = @left_is_array ? left + [right] : [left, right] push_out(result) end end
setup_state()
click to toggle source
initialize the state for this join to be carried across iterations within a fixpoint
# File lib/bud/executor/join.rb, line 60 def setup_state sid = state_id @tabname = ("(" + @all_rels_below.map{|r| r.qualified_tabname}.join('*') +"):"+sid.to_s).to_sym @hash_tables = [{}, {}] end