class RexleXPathParser

file: rexle-xpath-parser.rb

Attributes

to_a[R]

Public Class Methods

new(string) click to toggle source
# File lib/rexle-xpath-parser.rb, line 10
def initialize(string)
  #puts 'inside RExleXpathParser'
  tokens = tokenise string
  #puts 'tokens: ' + tokens.inspect
  nested_tokens = tokens.map {|x| scan(x)}
  #puts 'nested_tokens: ' + nested_tokens.inspect
  @to_a = functionalise nested_tokens
  
end

Private Instance Methods

functionalise(a, r2=[]) click to toggle source

maps the nested tokens to XPath functions, predicates, operators,

and 1 or more elements
# File lib/rexle-xpath-parser.rb, line 25
def functionalise(a, r2=[])

  r4 = a.inject(r2) do |r,x|
    
    return r << functionalise(x) if x.is_a? Array
    
    if x =~ /^or$/ then
      r << :|

    elsif /^(?<func>\w+)\(\)(?:\s+(?<operator>[<>=])\s+(?<value>\w+))?/ =~ x

      r << if operator then
          x = ''
        [func.to_sym, operator.to_sym, value]
      else
        func.to_sym
      end
    elsif /^@(?<attribute>[\w\/]+)/ =~ x
      r << [:attribute, attribute]
    elsif x =~ /^\/\//
      r << [:recursive, *RexleXPathParser.new(x[2..-1]).to_a]                
    elsif x =~ /^[\w\/\*]+\[/
      
      epath, predicate, remainder = x.match(/^([^\[]+)\[([^\]]+)\](.*)/).captures

      r.concat epath.split('/').map {|e| [:select, e]} << \
          [:predicate, RexleXPathParser.new(predicate).to_a] 

      
      if remainder.length > 0 then
        remainder.slice!(0) if remainder[0] == '/'          
        r << functionalise(match(remainder))
      else
        r
      end
      
    elsif /!=(?<value>.*)/  =~ x
      r << [:value, :'!=', value.sub(/^["'](.*)["']$/,'\1')]        
    elsif /=(?<value>.*)/  =~ x
      r << [:value, :==, value.sub(/^["'](.*)["']\s*$/,'\1')]
    elsif x =~ /\|/
      r << [:union] 
    elsif x =~ /\s+or\s+/
      r << :|
    elsif x =~ /\w+\(/
      r << [x.chop.to_sym]
    elsif x =~ /\d+/
      r << [:index, x]
    elsif /^attribute::(?<attribute>\w+)/ =~ x
      r << [:attribute, attribute]        
    elsif x.is_a? String and /^(?<name>[\w\*\.]+)\/?/ =~ x
      
      x.slice!(/^[\w\*\.]+\/?/)
      r3 = [:select, name]

      if x.length > 0 then
        functionalise([x], r3) 

      end
      r << r3        
    else

      r
    end
  
  end

  r4

end
lmatch(a, lchar, rchar) click to toggle source

matches a left bracket with a right bracket recursively if necessary

# File lib/rexle-xpath-parser.rb, line 98
def lmatch(a, lchar, rchar)

  token = []
  c = a.first
  token << c until (c = a.shift; c == lchar or c == rchar or a.empty?)
  token << c

  if c == lchar then
    found, tokenx, remainderx = rmatch(a, lchar, rchar)
    c = found
    token << tokenx
    remainder = remainderx
  else
    remainder = a.join
  end

  [c, token.join, remainder]
end
match(s) click to toggle source

tokeniser e.g. “a | d©” #=> [“a”, “ | ”, “d©”]

# File lib/rexle-xpath-parser.rb, line 147
def match(s)

  a = []
  
  # it's a function with no arguments
  # e.g. position()
  if /^\w+\(\)/ =~ s then

    a << s

  elsif /^\w+\(\)/ =~ s then
    
    fn, operator, val = s.match(/^(\w+)\(\)\s+(<|>|=)\s+(\w+)/).captures

    a << [fn.to_sym, operator.to_sym, val]
    return a
    
  # it's a function with arguments
  elsif s =~ /^\w+\(/ 

    found, token, remainder = lmatch(s.chars, '(',')')

    if found == ')' then
      a << token
    end

  # it contains a predicate
  # e.g. b[c='45']
  elsif s =~ /^[\w\/\*]+\[/
    
    found, token, remainder = lmatch(s.chars, '[',']') 
    a << token
    a2 = match remainder

    token << a2.first if  a2.first 
    a.concat a2[1..-1]

    a2
    
  # it's an element name e.g. b
  elsif /^(?<name>[\w\*]+)\// =~ s
    a << name <<  match($')
    
  # it's something else e.g. @colour='red'
  else

    token = s.slice!(/^[@?\w\/:\*\(\)\.]+/)

    a << token
    remainder = s
  end

  return a if remainder.nil? or remainder.strip.empty?

  operator = remainder.slice!(/^\s*(?:\||or)\s*/)

  if operator then
    a.concat [operator, *match(remainder)]
  else
    a << remainder
  end
  
  a
end
rmatch(a, lchar, rchar) click to toggle source

matches a right bracket for a left bracket which has already been found.

# File lib/rexle-xpath-parser.rb, line 120
def rmatch(a, lchar, rchar)

  token = []
  c = a.first
  token << c until (c = a.shift; c == lchar or c == rchar or a.empty?)
  token << c

  if c == lchar then
    
    found, tokenx, remainderx = rmatch(a, lchar, rchar)
    token << tokenx

    # find the rmatch for the starting token
    found, tokenx, remainderx = rmatch(a, lchar, rchar)        
    c = found
    token << tokenx
    remainder = remainderx
    
  elsif c = rchar
    remainder = a.join
  end

  [c, token.join, remainder]
end
scan(s) click to toggle source

accepts a token and drills into it to identify more tokens beneath it

# File lib/rexle-xpath-parser.rb, line 215
def scan(s)

  if s =~ /^\w+\([^\)]/ then
    
    func = s.slice!(/\w+\(/)
    remainder = s[0..-2]

    return func if remainder.empty?
    
    if remainder =~ /^\w+\(/ then
      scan(remainder)
    else
      [func, match(remainder)]
    end
    
  else
    s
  end
end
tokenise(s) click to toggle source

alias tokenise match

# File lib/rexle-xpath-parser.rb, line 236
def tokenise(s)
  
  if s =~ /\[/ then
    match s
  else
    s.split(/(?=\bor\b)/).flat_map do |x| 

      if /^or\b\s+(?<exp>.*)/ =~ x then
        match(exp).unshift 'or'
      else
        match x
      end

    end
  end
end