class Bliss::Parser
Public Class Methods
new(path, filepath=nil)
click to toggle source
# File lib/bliss/parser.rb, line 3 def initialize(path, filepath=nil) @path = path @parser_machine = Bliss::ParserMachine.new @push_parser = Nokogiri::XML::SAX::PushParser.new(@parser_machine) if filepath @file = File.new(filepath, 'w') @file.autoclose = false end @root = nil @nodes = nil @formats = [] on_root {} end
Public Instance Methods
add_format(format)
click to toggle source
# File lib/bliss/parser.rb, line 22 def add_format(format) @formats.push(format) end
check_unhandled_bytes()
click to toggle source
# File lib/bliss/parser.rb, line 86 def check_unhandled_bytes if @unhandled_bytes > @max_unhandled_bytes if @on_max_unhandled_bytes @on_max_unhandled_bytes.call @on_max_unhandled_bytes = nil end end end
check_unhandled_bytes?()
click to toggle source
# File lib/bliss/parser.rb, line 102 def check_unhandled_bytes? @max_unhandled_bytes ? true : false end
close()
click to toggle source
# File lib/bliss/parser.rb, line 110 def close @parser_machine.close end
exceeded?()
click to toggle source
# File lib/bliss/parser.rb, line 95 def exceeded? return false if not check_unhandled_bytes? if @unhandled_bytes > @max_unhandled_bytes return true end end
file_close()
click to toggle source
# File lib/bliss/parser.rb, line 215 def file_close if @file @file.close end end
formats_details()
click to toggle source
# File lib/bliss/parser.rb, line 30 def formats_details @formats.each do |format| puts format.details.inspect end end
handle_wait_tag_close(chunk)
click to toggle source
# File lib/bliss/parser.rb, line 199 def handle_wait_tag_close(chunk) begin last_index = chunk.index(@wait_tag_close) if last_index last_index += 4 @file << chunk[0..last_index] @file << "</#{self.root}>" # TODO set this by using actual depth, so all tags get closed secure_close else @file << chunk end rescue secure_close end end
load_constraints_on_parser_machine()
click to toggle source
# File lib/bliss/parser.rb, line 26 def load_constraints_on_parser_machine @parser_machine.constraints(@formats.collect(&:constraints).flatten) end
on_max_unhandled_bytes(bytes, &block)
click to toggle source
# File lib/bliss/parser.rb, line 67 def on_max_unhandled_bytes(bytes, &block) @max_unhandled_bytes = bytes @on_max_unhandled_bytes = block end
on_root(&block)
click to toggle source
deprecate this, use depth at on_tag_open
or on_tag_close
instead
# File lib/bliss/parser.rb, line 37 def on_root(&block) return false if not block.is_a? Proc @parser_machine.on_root { |root| @root = root block.call(root) } end
on_tag_close(element='.', &block)
click to toggle source
# File lib/bliss/parser.rb, line 58 def on_tag_close(element='.', &block) overriden_block = Proc.new { |hash, depth| reset_unhandled_bytes block.call(hash, depth) } @parser_machine.on_tag_close(element, overriden_block) end
on_tag_open(element='.', &block)
click to toggle source
# File lib/bliss/parser.rb, line 45 def on_tag_open(element='.', &block) return false if block.arity != 1 overriden_block = Proc.new { |depth| if not element == 'default' reset_unhandled_bytes end block.call(depth) } @parser_machine.on_tag_open(element, overriden_block) end
on_timeout(seconds, &block)
click to toggle source
# File lib/bliss/parser.rb, line 72 def on_timeout(seconds, &block) @timeout = seconds @on_timeout = block end
parse()
click to toggle source
# File lib/bliss/parser.rb, line 114 def parse reset_unhandled_bytes if check_unhandled_bytes? load_constraints_on_parser_machine EM.run do http = nil if @timeout http = EM::HttpRequest.new(@path, :connect_timeout => @timeout, :inactivity_timeout => @timeout).get else http = EM::HttpRequest.new(@path).get end @autodetect_compression = true compression = :none if @autodetect_compression http.headers do if (/^attachment.+filename.+\.gz/i === http.response_header['CONTENT_DISPOSITION']) or http.response_header.compressed? or ["application/octet-stream", "application/x-gzip"].include? http.response_header['CONTENT_TYPE'] @zstream = Zlib::Inflate.new(Zlib::MAX_WBITS+16) compression = :gzip end end end http.stream { |chunk| if chunk chunk.force_encoding('UTF-8') if check_unhandled_bytes? @unhandled_bytes += chunk.length check_unhandled_bytes end if not @parser_machine.is_closed? begin case compression when :gzip chunk = @zstream.inflate(chunk) chunk.force_encoding('UTF-8') end @push_parser << chunk if @file @file << chunk end rescue Nokogiri::XML::SyntaxError => e #puts 'encoding error' if e.message.include?("encoding") raise Bliss::EncodingError, "Wrong encoding given" end end else if exceeded? #puts 'exceeded' secure_close else if @file if @wait_tag_close #puts 'handle wait' handle_wait_tag_close(chunk) #if @wait_tag_close else #puts 'secure close' secure_close end end end end end } http.errback { #puts 'errback' if @timeout @on_timeout.call end secure_close } http.callback { #if @file # @file.close #end #EM.stop secure_close } end file_close end
reset_unhandled_bytes()
click to toggle source
# File lib/bliss/parser.rb, line 81 def reset_unhandled_bytes return false if not check_unhandled_bytes? @unhandled_bytes = 0 end
root()
click to toggle source
# File lib/bliss/parser.rb, line 106 def root @root end
secure_close()
click to toggle source
# File lib/bliss/parser.rb, line 221 def secure_close begin if @zstream @zstream.close end rescue ensure EM.stop #puts "Closed secure." end end
wait_tag_close(element)
click to toggle source
# File lib/bliss/parser.rb, line 77 def wait_tag_close(element) @wait_tag_close = "</#{element}>" end