class BaseFunction

Attributes

fft[RW]
freq_table[RW]
regions[RW]
single_points[RW]
snps[RW]
values[RW]

Public Class Methods

new(freq_table) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 15
def initialize(freq_table)
  @freq_table = freq_table

  @values = []
  @fft = nil
  @lim1 = 0
  @lim2 = 0

  calculate

  #puts @freq_table.inspect_pos(86,100)
  #puts @fft.original_data[86].to_json
  over_lim2 = lambda {|v| v > @lim2}
  below_lim1 = lambda {|v| v < @lim1}

  #puts @lim1,@lim2

  single_points = filter_regions(@fft.original_data, over_lim2, true)
  #puts single_points.to_json
  #           @regions=filter_regions(@fft.filtered_data, over_lim2, false , @values)
  #           @regions=filter_regions(@values, over_lim2, false , @values)

  @regions=group_regions(single_points)
  #@regions_below=filter_regions(@fft.filtered_data, below_lim1, false, @values)

  @single_points = purge_regions(single_points,@regions)

  # repeat snps that are already in a region
  @snps = purge_snps(single_points)

  # do not repeat snps that are in a region
  #                           @snps = purge_snps(@single_points)
  #puts @snps.to_json
  #puts @regions.to_json
  #puts @single_points.to_json
  #           puts @single_points.join(',')
  #graph

  #   puts @values.to_json
end

Public Instance Methods

add_region(regions,r) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 296
def add_region(regions,r)
  w=(r['end']-r['start'])+1

  if w>0 then
    r['score'] = r['score'].to_f/w.to_f
    regions.push r
  end

end
calculate() click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 56
def calculate
  values = []
  length = @freq_table.max_length

  # evaluate freq table
  length.times do |i|

    val = evaluate_pos(i)

    values.push val

  end

  @values = values

  @fft = LowPassFilter.new(@values)

  @lim1,@lim2 = @fft.limits

end
evaluate_pos(i) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 77
def evaluate_pos(i)
  raise "You must create a child class to override this method"
end
filter_regions(data, comp, only_single_points = false, mandatory_data = nil) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 224
def filter_regions(data, comp, only_single_points = false, mandatory_data = nil)
  # ===========
  pos = 0

  regions = []

  region = {}
  region['start'] = 0
  region['end'] = 0
  region['score'] = 0

  anotate = false

  # filter regions
  data.each do |v|

    if not anotate

      if comp.call(v)
        # is out
        anotate = true
        region['start'] = pos
        region['end'] = 0
        region['score'] = v
        #else
        # is inside limits
      end

    else # we are anotating a region, outside limits

      if comp.call(v)
        # is ok
        region['score'] += v
      else
        # finish region
        anotate = false

        # actually it finished at previos pos
        region['end'] = pos - 1

        if (valid_region(region, comp, only_single_points, mandatory_data))
          add_region(regions,region)
        end

        region = {}

      end

    end

    pos = pos + 1

  end

  # anotate last region if any
  if anotate
    # finish region
    anotate = false

    # actually it finished at previos pos
    region['end'] = pos

    if valid_region(region,comp,only_single_points,mandatory_data)
      add_region(regions,region)
    end

    region = {}
  end

  return regions
end
graph(file_name=nil) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 306
def graph(file_name=nil)

  Gnuplot.open do |gp|
    Gnuplot::Plot.new( gp ) do |plot|

      if !file_name.nil?
        plot.terminal "png size #{@fft.filtered_data.length},600"
        plot.output "#{file_name}"
      end

      plot.set "multiplot layout 2,1 upwards"

      plot.xrange("[0:#{@fft.original_data.length-1}]")
      #plot.yrange("[#{@fft.original_data.min}:#{@fft.original_data.max}]")
      #                 plot.ytics("#{@fft.original_data.min},10,#{@fft.original_data.max}]")

      #plot.ylabel "f"
      #plot.xlabel "x"

      #plot.set "bmargin #{dx+1}"
      plot.set "tmargin 0.0"
      #plot.set "lmargin #{dy}"

      # graph fft data

      plot.title ""
      plot.ylabel "Region"
      plot.xlabel "Nucleotide"

      # =====================

      if !@regions.empty?
        x, y = regions_to_graph_data(@regions, @fft.original_data.length-1)

        plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
          ds.with = "lines  lt rgb \"red\" ti \"Regions #{x.length}\""
          #ds.notitle
        end
      end
      # =====================

      if !@single_points.empty?
        x, y = regions_to_graph_data(@single_points, @fft.original_data.length-1)

        plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
          ds.with = "lines  lt rgb \"blue\" ti \"Points #{x.length}\""
          #ds.notitle
        end
      end
      # =====================


    end

    Gnuplot::Plot.new( gp ) do |plot|
      plot.title  "Filter Base: #{fft.filter_base} , skip: #{fft.skip}"

      plot.set "bmargin 0.0"
      plot.set "tmargin 2"

      #plot.set "xtics"
      plot.xrange("[0:#{@fft.original_data.length-1}]")

      #plot.set "origin #{DX},#{DY+SY};"
      plot.ylabel "f"
      plot.xlabel ''
      plot.noxtics

      x = (0..@fft.original_data.length-1).collect.to_a
      y = @fft.original_data.to_a

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines  lt rgb \"green\" ti \"Original data\""
        #ds.notitle
      end

      x = (0..@fft.filtered_data.length-1).collect.to_a
      y = @fft.filtered_data.to_a

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"blue\" ti \"Filtered data\""
        #ds.notitle
      end



      x=[0]
      y=[@lim1]

      x.push(@fft.filtered_data.length-1)
      y.push(@lim1)


      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"red\" ti \"Lim1 [#{@lim1}]\""
        #ds.notitle
      end

      x=[0]
      y=[@lim2]

      x.push(@fft.filtered_data.length-1)
      y.push(@lim2)

      #puts @lim1, @lim2

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"red\" ti \"Lim2 [ #{@lim2}]\""
        #ds.notitle
      end

    end

  end


end
graph2(file_name = nil) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 467
def graph2(file_name = nil)

  Gnuplot.open do |gp|
    Gnuplot::Plot.new( gp ) do |plot|

      if !file_name.nil?
        plot.terminal "png size #{@fft.filtered_data.length},600"
        plot.output "#{file_name}"
      end

      plot.title  "Filter Base: #{@fft.filter_base} , skip: #{@fft.skip}"
      plot.ylabel "f"
      plot.xlabel "x"

      x = (0..@fft.original_data.length-1).collect.to_a

      y = @fft.original_data.to_a

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines  lt rgb \"green\""
        ds.notitle
      end

      x = (0..@fft.filtered_data.length-1).collect.to_a
      y = @fft.filtered_data.to_a

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"blue\""
        ds.notitle
      end

      x = (0..@fft.filtered_data.length-1).collect.to_a
      y = [@lim1]
      @fft.filtered_data.length.times  { y.push(@lim1) }

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"red\""
        ds.notitle
      end

      x = (0..@fft.filtered_data.length-1).collect.to_a
      y = [@lim2]
      @fft.filtered_data.length.times  { y.push(@lim2) }

      plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
        ds.with = "lines lt rgb \"red\""
        ds.notitle
      end

    end

  end
end
group_regions(data) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 168
def group_regions(data)

  max_separation = 15
  last_end = 0

  group_start = 0
  group_end = 0
  group_score = 0
  group_size = 0


  regions = []

  if !data.empty?
    region = {}
    region['start'] = data[0]['start']
    region['end'] = data[0]['end']
    region['score'] = data[0]['score']

    # filter regions
    data.each do |r|

      if r['start'] < last_end+max_separation
        # group
        group_score += r['score']
        group_end = r['end']
        group_size += 1
      else
        #close previous group, start new one
        region = {}
        region['start'] = group_start
        region['end'] = group_end
        region['score'] = group_score.to_f/group_size.to_f

        #save region
        if region['start']<region['end']
          regions.push region
        end

        # init new one
        group_start = r['start']
        group_end = r['end']
        group_score = r['score']
        group_size = 1

      end

      last_end = r['end']

    end
  end

  return regions

end
purge_regions(regions1, regions2) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 81
def purge_regions(regions1, regions2)
  res = []

  #puts "to purge: #{regions1.length}"

  regions1.each do |r1|
    if !((regions2.find{      |r2|
            ((r1['start']<=r2['end']) and (r2['start']<=r1['end']))
      }))

      res.push(r1)
    end

  end

  #puts "purged: #{res.length}"

  return res

end
purge_snps(regions) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 102
def purge_snps(regions)
  res = []

  #puts "to purge: #{regions1.length}"

  regions.each do |r1|
    # is a one point region
    if r1['start']==r1['end']
      pos =r1['start']

      if @freq_table.valid_snp(pos)
        res.push(r1)
      end
    end

  end

  #puts "purged SNPS: #{res.length} from #{regions.length}\n #{res.to_yaml}"

  return res

end
regions_to_graph_data(regions,total_length) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 425
def regions_to_graph_data(regions,total_length)

  x = []
  y = []
  #             x = [0]
  #             y = [0]

  regions.each do |r|

    x.push r['start']-1
    y.push 0

    x.push r['start']
    y.push r['score']



    x.push r['end']
    y.push r['score']

    x.push r['end']+1
    y.push 0

  end

  #           x.push total_length
  #           y.push 0

  if x.empty?
    x.push 0
  end

  if y.empty?
    y.push 0
  end



  return [x,y]

end
valid_region(region, comp, only_single_points, mandatory_data) click to toggle source
# File lib/scbi_cominer/classes/base_function.rb, line 126
def valid_region(region, comp, only_single_points, mandatory_data)

  region_start = region['start']
  region_end = region['end']

  res = false


  if only_single_points
    # only get SNPs
    #print "check: #{region_start} - #{region_end}"
    res = ((region_end - region_start) >= 0)

  else

    if mandatory_data.nil?
      # if no mandatory data, add all regions
      res = ((region_end - region_start) >=0)

    else # there is mandatory data

      # region must have al least one base
      res = ((region_end - region_start) >0)

      # negar la siguiente linea para no tener en cuenta regiones anchas sin snps dentro
      if res
        # check for inner regions in this range of the mandatory_data
        data = mandatory_data[region_start,region_end-region_start+1]
        regions = filter_regions(data,comp,nil)

        # if there is more than one region, then is valid
        if regions.empty? or regions.count<=1
          res = false
        end
      end

    end
  end

  return res
end