class Bud::PushSHJoin

Attributes

all_rels_below[R]
keys[R]
localpreds[R]
origpreds[R]
relnames[R]

Private Class Methods

natural_preds(bud_instance, rels) click to toggle source
# File lib/bud/executor/join.rb, line 420
def self.natural_preds(bud_instance, rels)
  preds = []
  rels.each_with_index do |r,i|
    rels.each_with_index do |s,j|
      unless i >= j
        the_matches = r.cols & s.cols
        the_matches.each do |c|
          preds << [r.send(c), s.send(c)]
        end
      end
    end
  end
  preds.uniq
end

Public Instance Methods

combos(*preds, &blk)
Alias for: pairs
flatten(*preds, &blk) click to toggle source
# File lib/bud/executor/join.rb, line 407
def flatten(*preds, &blk)
  if blk.nil?
    @cols = dupfree_schema(@rels[0].cols + @rels[1].cols)
  else
    @cols = []
  end
  setup_accessors
  pairs(*preds) do |x,y|
    blk.nil? ? x + y : blk.call(x + y)
  end
end
flush() click to toggle source
# File lib/bud/executor/join.rb, line 54
def flush
  replay_join if @rescan
end
insert(item, source) click to toggle source
# File lib/bud/executor/join.rb, line 227
def insert(item, source)
  # If we need to reproduce the join's output, do that now before we process
  # the to-be-inserted tuple. This avoids needless duplicates: if the
  # to-be-inserted tuple produced any join output, we'd produce that output
  # again if we didn't rescan now.
  replay_join if @rescan

  source_tbl = source.qualified_tabname
  if @selfjoins.include? source_tbl
    offsets = []
    @relnames.each_with_index{|r,i| offsets << i if r == source_tbl}
  else
    offsets = [@relnames.index(source_tbl)]
  end

  offsets.each {|offset| insert_item(item, offset)}
end
invalidate_cache() click to toggle source
# File lib/bud/executor/join.rb, line 128
def invalidate_cache
  @rels.each_with_index do |source_elem, i|
    if source_elem.rescan
      puts "#{qualified_tabname} rel:#{i}(#{source_elem.qualified_tabname}) invalidated" if $BUD_DEBUG
      @hash_tables[i] = {}
    end
  end
end
join_offset(entry) click to toggle source

calculate the position for a field in the result of a join: the tuple offset (“subtuple”) and the attribute position within it (“offset”)

# File lib/bud/executor/join.rb, line 140
def join_offset(entry)
  name, offset = entry[0], entry[1]

  # determine which subtuple of the collection contains the table
  # referenced in entry.
  subtuple = 0
  all_rels_below[0..all_rels_below.length-1].each_with_index do |t,i|
    if t.qualified_tabname == name
      subtuple = i
      break
    end
  end

  return subtuple, offset
end
lefts(*preds, &blk) click to toggle source
# File lib/bud/executor/join.rb, line 361
def lefts(*preds, &blk)
  if blk.nil?
    @cols = @bud_instance.toplevel.tables[@rels[0].qualified_tabname].cols
    setup_accessors
  end
  pairs(*preds) do |x,y|
    blk.nil? ? x : blk.call(x)
  end
end
matches(&blk) click to toggle source
# File lib/bud/executor/join.rb, line 436
def matches(&blk)
  preds = self.class.natural_preds(@bud_instance, @all_rels_below)
  pairs(*preds, &blk)
end
outer(*preds, &blk) click to toggle source

given a * expression over 2 collections, form all combos of items that satisfy preds, and for any item from the 1st collection that has no matches in the 2nd, nil-pad it and include it in the output.

# File lib/bud/executor/join.rb, line 352
def outer(*preds, &blk)
  if @all_rels_below.length > 2
    raise Bud::Error, "outer joins cannot be used with more than 2 join relations"
  end
  pairs(*preds, &blk)
  self.extend(Bud::PushSHOuterJoin)
end
pairs(*preds, &blk) click to toggle source

and now, the Bloom-facing methods given a * expression over n collections, form all combinations of items subject to an array of predicates, preds. currently supports two syntax options for equijoin predicates:

general form: an array of arrays capturing a conjunction of equiv. classes
      [[table1.col1, table2.col2, table3.col3], [table1.col2, table2.col3]]
common form: a hash capturing equality of a column on left with one on right.
      :col1 => :col2  (same as  lefttable.col1 => righttable.col2)
# File lib/bud/executor/join.rb, line 321
def pairs(*preds, &blk)
  if @cols.nil?
    # derive schema if needed: one column for each table.  duplicated inputs
    # get distinguishing numeral.
    #
    # XXX: actually, this seems completely bogus. The schema for the output
    # of the join should depend on the join's *targetlist*.
    @cols = []
    retval = @all_rels_below.reduce({}) do |memo, r|
      r_name = r.qualified_tabname.to_s
      memo[r_name] ||= 0
      newstr = r_name + (memo[r_name] > 0 ? "_#{memo[r_name]}" : "")
      @cols << newstr.to_sym
      memo[r_name] += 1
      memo
    end
    setup_accessors
  end

  @origpreds = preds
  setup_preds(preds) unless preds.empty?
  # given new preds, the state for the join will be different.  set it up again.
  setup_state if self.class <= Bud::PushSHJoin
  set_block(&blk) if blk
  self
end
Also aliased as: combos
rights(*preds, &blk) click to toggle source
# File lib/bud/executor/join.rb, line 372
def rights(*preds, &blk)
  if blk.nil?
    @cols = @bud_instance.toplevel.tables[@rels[1].qualified_tabname].cols
    setup_accessors
  end
  pairs(*preds) do |x,y|
    blk.nil? ? y : blk.call(y)
  end
end

Protected Instance Methods

insert_item(item, offset) click to toggle source
# File lib/bud/executor/join.rb, line 246
def insert_item(item, offset)
  # assumes left-deep trees
  if @left_is_array and offset == 0
    the_key = @keys.map do |k|
      left_subtuple, left_offset = k.first
      item[left_subtuple][left_offset]
    end
  else
    the_key = item.values_at(*@key_attnos[offset])
  end

  #build
  # puts "building #{item.inspect} into @source[#{offset}] on key #{the_key.inspect}"
  if (@hash_tables[offset][the_key] ||= Set.new).add? item
    @found_delta = true
    #and probe
    # puts "probing #{item.inspect} into @source[#{1-offset}] on key #{the_key.inspect}"
    the_matches = @hash_tables[1-offset][the_key]
    process_matches(item, the_matches, offset) unless the_matches.nil?
  end
end
replay_join() click to toggle source
# File lib/bud/executor/join.rb, line 268
def replay_join
  @rescan = false
  a, b = @hash_tables
  return if a.empty? or b.empty?

  if a.size < b.size
    a.each_pair do |key, items|
      the_matches = b[key]
      unless the_matches.nil?
        items.each do |item|
          process_matches(item, the_matches, 0)
        end
      end
    end
  else
    b.each_pair do |key, items|
      the_matches = a[key]
      unless the_matches.nil?
        items.each do |item|
          process_matches(item, the_matches, 1)
        end
      end
    end
  end
end

Private Instance Methods

dupfree_schema(flat_schema) click to toggle source
# File lib/bud/executor/join.rb, line 383
def dupfree_schema(flat_schema)
  dupfree_schema = []
  # while loop here (inefficiently) ensures no collisions
  while dupfree_schema.empty? or dupfree_schema.uniq.length < dupfree_schema.length
    dupfree_schema = []
    flat_schema.reduce({}) do |memo, r|
      if r.to_s.include?("_") and ((r.to_s.rpartition("_")[2] =~ /^\d+$/) == 0)
        r = r.to_s.rpartition("_")[0].to_sym
      end
      memo[r] ||= 0
      if memo[r] == 0
        dupfree_schema << r.to_s.to_sym
      else
        dupfree_schema << (r.to_s + "_" + (memo[r]).to_s).to_sym
      end
      memo[r] += 1
      memo
    end
    flat_schema = dupfree_schema
  end
  return flat_schema
end
process_matches(item, the_matches, offset) click to toggle source
# File lib/bud/executor/join.rb, line 295
def process_matches(item, the_matches, offset)
  the_matches.each do |m|
    if offset == 0
      left = item
      right = m
    else
      left = m
      right = item
    end

    # FIX: reduce arrays being created
    result = @left_is_array ? left + [right] : [left, right]
    push_out(result)
  end
end
setup_state() click to toggle source

initialize the state for this join to be carried across iterations within a fixpoint

# File lib/bud/executor/join.rb, line 60
def setup_state
  sid = state_id
  @tabname = ("(" + @all_rels_below.map{|r| r.qualified_tabname}.join('*') +"):"+sid.to_s).to_sym
  @hash_tables = [{}, {}]
end