class StreamLines::Reading::Stream

Attributes

url[R]

Public Class Methods

new(url, encoding: Encoding.default_external) click to toggle source
# File lib/stream_lines/reading/stream.rb, line 15
def initialize(url, encoding: Encoding.default_external)
  @url = url
  @encoding = encoding
  @buffer = String.new(encoding: @encoding)
end

Public Instance Methods

each(&block) click to toggle source
# File lib/stream_lines/reading/stream.rb, line 21
def each(&block)
  stream_lines(&block)
rescue HTTParty::Error => e
  raise Error, "Failed to download #{url} with code: #{e.response.code}"
end

Private Instance Methods

extract_lines(chunk) click to toggle source
# File lib/stream_lines/reading/stream.rb, line 40
def extract_lines(chunk)
  encoded_chunk = @buffer + chunk.to_s.dup.force_encoding(@encoding)
  lines = split_lines(encoded_chunk)
  @buffer = String.new(encoding: @encoding)
  @buffer << lines.pop.to_s

  lines
end
split_lines(encoded_chunk) click to toggle source
# File lib/stream_lines/reading/stream.rb, line 49
def split_lines(encoded_chunk)
  encoded_chunk.split($INPUT_RECORD_SEPARATOR, -1)
rescue ArgumentError => e
  raise e unless /invalid byte sequence/.match?(e.message)

  # NOTE: (jdlubrano)
  # The last byte in the chunk is most likely a part of a multibyte
  # character that, on its own, is an invalid byte sequence.  So, we
  # want to split the lines containing all valid bytes and make the
  # trailing bytes the last line.  The last line eventually gets added
  # to the buffer, prepended to the next chunk, and, hopefully, restores
  # a valid byte sequence.
  last_newline_index = encoded_chunk.rindex($INPUT_RECORD_SEPARATOR)
  return [encoded_chunk] if last_newline_index.nil?

  valid_lines = encoded_chunk[0...last_newline_index].split($INPUT_RECORD_SEPARATOR, -1)
  valid_lines + [encoded_chunk[(last_newline_index + 1)..-1]].compact
end
stream_lines(&block) click to toggle source
# File lib/stream_lines/reading/stream.rb, line 31
def stream_lines(&block)
  self.class.get(url, stream_body: true) do |chunk|
    lines = extract_lines(chunk)
    lines.each { |line| block.call(line) }
  end

  block.call(@buffer) if @buffer.size.positive?
end