class Bliss::Parser

Public Class Methods

new(path, filepath=nil) click to toggle source
# File lib/bliss/parser.rb, line 3
def initialize(path, filepath=nil)
  @path = path
  
  @parser_machine = Bliss::ParserMachine.new

  @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine)

  if filepath
    @file = File.new(filepath, 'w')
    @file.autoclose = false
  end

  @root = nil
  @nodes = nil
  @formats = []

  on_root {}
end

Public Instance Methods

add_format(format) click to toggle source
# File lib/bliss/parser.rb, line 22
def add_format(format)
  @formats.push(format)
end
check_unhandled_bytes() click to toggle source
# File lib/bliss/parser.rb, line 86
def check_unhandled_bytes
  if @unhandled_bytes > @max_unhandled_bytes
    if @on_max_unhandled_bytes
      @on_max_unhandled_bytes.call
      @on_max_unhandled_bytes = nil
    end
  end
end
check_unhandled_bytes?() click to toggle source
# File lib/bliss/parser.rb, line 102
def check_unhandled_bytes?
  @max_unhandled_bytes ? true : false
end
close() click to toggle source
# File lib/bliss/parser.rb, line 110
def close
  @parser_machine.close
end
exceeded?() click to toggle source
# File lib/bliss/parser.rb, line 95
def exceeded?
  return false if not check_unhandled_bytes?
  if @unhandled_bytes > @max_unhandled_bytes
    return true
  end
end
file_close() click to toggle source
# File lib/bliss/parser.rb, line 215
def file_close
  if @file
    @file.close
  end
end
formats_details() click to toggle source
# File lib/bliss/parser.rb, line 30
def formats_details
  @formats.each do |format|
    puts format.details.inspect
  end
end
handle_wait_tag_close(chunk) click to toggle source
# File lib/bliss/parser.rb, line 199
def handle_wait_tag_close(chunk)
  begin
    last_index = chunk.index(@wait_tag_close)
    if last_index
      last_index += 4
      @file << chunk[0..last_index]
      @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed
      secure_close
    else
      @file << chunk
    end
  rescue
    secure_close
  end
end
load_constraints_on_parser_machine() click to toggle source
# File lib/bliss/parser.rb, line 26
def load_constraints_on_parser_machine
  @parser_machine.constraints(@formats.collect(&:constraints).flatten)
end
on_max_unhandled_bytes(bytes, &block) click to toggle source
# File lib/bliss/parser.rb, line 67
def on_max_unhandled_bytes(bytes, &block)
  @max_unhandled_bytes = bytes
  @on_max_unhandled_bytes = block
end
on_root(&block) click to toggle source

deprecate this, use depth at on_tag_open or on_tag_close instead

# File lib/bliss/parser.rb, line 37
def on_root(&block)
  return false if not block.is_a? Proc
  @parser_machine.on_root { |root|
    @root = root
    block.call(root)
  }
end
on_tag_close(element='.', &block) click to toggle source
# File lib/bliss/parser.rb, line 58
def on_tag_close(element='.', &block)
  overriden_block = Proc.new { |hash, depth|
    reset_unhandled_bytes

    block.call(hash, depth)
  }
  @parser_machine.on_tag_close(element, overriden_block)
end
on_tag_open(element='.', &block) click to toggle source
# File lib/bliss/parser.rb, line 45
def on_tag_open(element='.', &block)
  return false if block.arity != 1

  overriden_block = Proc.new { |depth|
    if not element == 'default'
      reset_unhandled_bytes
    end

    block.call(depth)
  }
  @parser_machine.on_tag_open(element, overriden_block)
end
on_timeout(seconds, &block) click to toggle source
# File lib/bliss/parser.rb, line 72
def on_timeout(seconds, &block)
  @timeout = seconds
  @on_timeout = block
end
parse() click to toggle source
# File lib/bliss/parser.rb, line 114
def parse
  reset_unhandled_bytes if check_unhandled_bytes?
  load_constraints_on_parser_machine

  EM.run do
    http = nil
    if @timeout
      http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get
    else
      http = EM::HttpRequest.new(@path).get
    end
    
    @autodetect_compression = true
    compression = :none
    if @autodetect_compression
      http.headers do
        if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE']
          @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16)
          compression = :gzip
        end
      end
    end
    
    http.stream { |chunk|
      if chunk
        chunk.force_encoding('UTF-8')

        if check_unhandled_bytes?
          @unhandled_bytes += chunk.length
          check_unhandled_bytes
        end
        if not @parser_machine.is_closed?
          begin
            case compression
              when :gzip
                chunk = @zstream.inflate(chunk)
                chunk.force_encoding('UTF-8')
            end
            @push_parser << chunk
            if @file
              @file << chunk
            end
          rescue Nokogiri::XML::SyntaxError => e
            #puts 'encoding error'
            if e.message.include?("encoding")
              raise Bliss::EncodingError, "Wrong encoding given"
            end
          end

        else
          if exceeded?
            #puts 'exceeded'
            secure_close
          else
            if @file
              if @wait_tag_close
                #puts 'handle wait'
                handle_wait_tag_close(chunk) #if @wait_tag_close
              else
                #puts 'secure close'
                secure_close
              end
            end
          end
        end
      end
    }
    http.errback {
      #puts 'errback'
      if @timeout
        @on_timeout.call
      end
      secure_close
    }
    http.callback {
      #if @file
      #  @file.close
      #end
      #EM.stop
      secure_close
    }
  end
  file_close
end
reset_unhandled_bytes() click to toggle source
# File lib/bliss/parser.rb, line 81
def reset_unhandled_bytes
  return false if not check_unhandled_bytes?
  @unhandled_bytes = 0
end
root() click to toggle source
# File lib/bliss/parser.rb, line 106
def root
  @root
end
secure_close() click to toggle source
# File lib/bliss/parser.rb, line 221
def secure_close
  begin
    if @zstream
      @zstream.close
    end
  rescue
  ensure
    EM.stop
    #puts "Closed secure."
  end
end
wait_tag_close(element) click to toggle source
# File lib/bliss/parser.rb, line 77
def wait_tag_close(element)
  @wait_tag_close = "</#{element}>"
end