class REX12::Parser

Public Class Methods

required_io_methods() click to toggle source
# File lib/rex12/parser.rb, line 93
def self.required_io_methods
  [:pos, :rewind, :readchar, :each_line]
end

Public Instance Methods

document(io) click to toggle source

Reads EDI data from the given IO object and returns a REX12::Document model of the whole EDI file

# File lib/rex12/parser.rb, line 8
def document io
  REX12::Document.new(each_segment(io).to_a)
end
each_segment(io) { |segment| ... } click to toggle source

NOTE: The ISA line will be returned as a specialized REX12::IsaSegment class, which subclasses REX12::Segment

# File lib/rex12/parser.rb, line 16
def each_segment io
  metadata = parse_metadata io
  segments = []
  each_line(io, metadata) do |line_counter, segment|
    if block_given? 
      yield segment
    else
      segments << segment
    end
  end

  block_given? ? nil : segments.to_enum { segments.length }
end
each_transaction(io) { |transaction| ... } click to toggle source

Reads EDI data from the received IO object. If a block is given, yields all REX12::Transaction objects read from the EDI in sequence If no block is given, an Enumerator of REX12::Transactions is returned

# File lib/rex12/parser.rb, line 33
def each_transaction io
  metadata = parse_metadata io

  # At this point we can read through the io "line" by "line" utilizing the segment_terminator as the linefeed character
  # I'm not exactly sure what to use as the max line length here, so I'm just going to basically negate it by using
  # a value of 1 million
  isa_segment = nil
  gs_segment = nil
  segments = []
  transactions = []
  each_line(io, metadata) do |line_counter, segment|
    
    if metadata.segment_markers[:isa] == segment.segment_type
      isa_segment = segment
      next
    end

    case segment.segment_type
    when metadata.segment_markers[:gs]
      gs_segment = segment
    when metadata.segment_markers[:iea], metadata.segment_markers[:ge]
      # Do nothing, we don't care about the trailer segments if we're procssing the data transaction by
      # transaction...they do nothing for us except act as potential checksums for segment counts..which
      # we're not bothering with
    else
      next if segment.segment_type.nil?

      segments << segment

      if segment.segment_type == metadata.segment_markers[:se]
        transaction = REX12::Transaction.new(isa_segment, gs_segment, segments)

        if block_given?
          yield transaction
        else
          transactions << transaction
        end

        segments = []
      end
    end
  end
  
  block_given? ? nil : transactions.to_enum { transactions.length }
end

Private Instance Methods

determine_segment_terminator(encoding, isa_chars, markers, io) click to toggle source
# File lib/rex12/parser.rb, line 201
def determine_segment_terminator encoding, isa_chars, markers, io
  # Technically, allowing multi-character terminators is not valid EDI, but you
  # see it happen ALL the time with EDI that's been hand editted from a client where \r\n is utilized.
  # It's a valid enough use-case that we're accounting specifically for a terminator character and/or cr/lfs
  terminator = isa_chars[105]

  raise REX12::ParseError, "Invalid EDI.  All EDI documents have a segment terminator character at position 106 of the ISA segment." if terminator.nil?

  next_char = io.readchar

  # If there's a single character terminator and the next char is the start of the GS segment, then everything is copacetic
  # and no need to continue looking for more terminator characters
  return terminator if markers[:gs][0] == next_char

  cr = "\r".encode(encoding)
  lf = "\n".encode(encoding)

  if next_char == cr || next_char == lf
    terminator << next_char

    # The only valid char at this point we'll accept as part of the terminator is a linefeed
    next_char = io.readchar
    if next_char == lf
      terminator << next_char
    else 
      raise REX12::ParseError, "Invalid ISA segment.  Could not determine segment terminator." unless markers[:gs][0] == next_char
    end
  elsif markers[:gs][0] != next_char
    raise REX12::ParseError, "Invalid ISA segment.  Could not determine segment terminator."
  end

  terminator
end
each_line(io, metadata) { |line_counter, segment| ... } click to toggle source
# File lib/rex12/parser.rb, line 99
def each_line io, metadata
  line_counter = -1
  isa_seen = false
  io.each_line(metadata.segment_terminator, 1_000_000).each do |segment_line|
    next if segment_line.length == 0

    line_counter += 1

    # Strip the segment terminator off the line before we parse it
    segment_line = segment_line[0..-(1 + metadata.segment_terminator.length)]

    segment = (parse_edi_line(segment_line, line_counter, metadata))

    if metadata.segment_markers[:isa] == segment.segment_type
      raise "Invalid EDI.  Only 1 ISA segment is allow per EDI file." if isa_seen
      isa_seen = false
    end

    yield line_counter, segment
  end
  nil
end
encoded_segment_markers(document_encoding) click to toggle source
# File lib/rex12/parser.rb, line 190
def encoded_segment_markers document_encoding
  {
    isa: "ISA".encode(document_encoding), 
    gs: "GS".encode(document_encoding), 
    iea: "IEA".encode(document_encoding), 
    ge: "GE".encode(document_encoding),
    st: "ST".encode(document_encoding),
    se: "SE".encode(document_encoding)
  }
end
parse_edi_line(segment_line, line_counter, metadata) click to toggle source
# File lib/rex12/parser.rb, line 122
def parse_edi_line segment_line, line_counter, metadata
  # Handle isa segments a little different
  if segment_line.start_with?(metadata.segment_markers[:isa])
    isa_segment = parse_isa(segment_line, line_counter, metadata)
  else    
    segment = parse_line(segment_line, line_counter, metadata)
  end
end
parse_isa(line, line_counter, metadata) click to toggle source
# File lib/rex12/parser.rb, line 151
def parse_isa line, line_counter, metadata
  elements = []
  split_segment = line.split(metadata.element_delimiter, -1)
  split_segment.each_with_index do |element, index| 
    # There's no subelements in the isa
    elements << REX12::Element.new(element, index)
  end

  REX12::IsaSegment.new elements, line_counter, metadata.segment_terminator, metadata.element_delimiter, metadata.sub_element_separator
end
parse_line(line, line_counter, metadata) click to toggle source
# File lib/rex12/parser.rb, line 131
def parse_line line, line_counter, metadata
  elements = []
  # Ruby's split function by default compresses elements together if there are no trailing positions, we don't want this here
  # we want every position accounted for...hence the -1 argument as the limit value
  # .ie NOT 'SLN*1****'.split('*') -> ["SLN", "1"] ---- We want ["SLN", "1", "", "", "", ""]
  line.split(metadata.element_delimiter, -1).each_with_index do |element, index|  
    split_element = element.split(metadata.sub_element_separator, -1)
    if split_element.length > 1
      sub_elements = []
      split_element.each_with_index {|v, x| sub_elements << REX12::SubElement.new(v, x) }

      elements << REX12::ElementWithSubElements.new(sub_elements, index, metadata.sub_element_separator)
    else
      elements << REX12::Element.new(element, index)
    end
  end

  REX12::Segment.new elements, line_counter
end
parse_metadata(io) click to toggle source
# File lib/rex12/parser.rb, line 162
def parse_metadata io
  # Record the initial position so we can rewind back to it
  initial_position = io.pos

  # Read out 107 chars from the io object to determine ISA data
  isa_chars = []

  # Use readchar instead of bytes so that we're letting the IO stream handle any character
  # encoding for us

  106.times { isa_chars << io.readchar }

  encoding = isa_chars[0].encoding
  segment_markers = encoded_segment_markers(encoding)
  # We should have a full isa segment now, interrogate it to determine the segment terminator, element separator and subelement separator
  raise REX12::ParseError, "Invalid EDI.  All EDI documents must start with an ISA segment." unless isa_chars[0..2].join == segment_markers[:isa]

  element_delimiter = isa_chars[3]
  segment_terminator = determine_segment_terminator(encoding, isa_chars, segment_markers, io)
  sub_element_separator = isa_chars[104]      

  io.pos = initial_position

  return REX12::Parser::DocumentMetadata.new(encoding, segment_markers, segment_terminator, element_delimiter, sub_element_separator)
rescue EOFError
  raise REX12::ParseError, "Invalid EDI.  All EDI documents must start with an ISA segment that is exactly 107 characters long - including the segment terminator."
end