class Edifunct::Tokenizer

Tokenizer is responsible for splitting message into segments, data elements and components.

Constants

SERVICE_STRING_ADVICE_REGEXP

Attributes

component_data_element_separator[R]
data_element_separator[R]
release_character[R]
segment_terminator[R]

Public Class Methods

for_message(edifact_message) click to toggle source
# File lib/edifunct/tokenizer.rb, line 9
def for_message(edifact_message)
  if edifact_message.start_with?("UNA")
    _u,
    _n,
    _a,
    component_data_element_separator,
    data_element_separator,
    _decimal_mark_delimiter,
    release_character,
    _reserved,
    segment_terminator = edifact_message.chars

    new(
      release_character: release_character,
      segment_terminator: segment_terminator,
      data_element_separator: data_element_separator,
      component_data_element_separator: component_data_element_separator
    )
  else
    new
  end
end
new(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":") click to toggle source
# File lib/edifunct/tokenizer.rb, line 35
def initialize(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":")
  @release_character = release_character
  @segment_terminator = segment_terminator
  @data_element_separator = data_element_separator
  @component_data_element_separator = component_data_element_separator
end

Public Instance Methods

as_segments(message_as_string) click to toggle source
# File lib/edifunct/tokenizer.rb, line 42
def as_segments(message_as_string)
  strip_service_string_advice(message_as_string).split(segment_regexp).map do |raw_segment|
    segment_tag, data_elements = split_segment(raw_segment)

    Segment.new(tag: segment_tag, raw_segment: raw_segment, data_elements: data_elements)
  end
end
decode_value(encoded_value) click to toggle source
# File lib/edifunct/tokenizer.rb, line 63
def decode_value(encoded_value)
  encoded_value.gsub(escape_value_regexp, '\1')
end
formatted_segments_per_line(message_as_string) click to toggle source
# File lib/edifunct/tokenizer.rb, line 67
def formatted_segments_per_line(message_as_string)
  message_as_string.gsub(segment_regexp, "\n")
end
split_segment(raw_segment) click to toggle source
# File lib/edifunct/tokenizer.rb, line 50
def split_segment(raw_segment)
  segment_without_terminator = raw_segment.chomp(@segment_terminator)
  segment_tag, *data_elements_as_strings = segment_without_terminator.split(data_element_regexp)

  data_elements = data_elements_as_strings.map do |data_element_as_string|
    data_element_as_string.split(component_data_element_regexp).map do |component|
      decode_value(component)
    end
  end

  [segment_tag, data_elements]
end

Private Instance Methods

component_data_element_regexp() click to toggle source
# File lib/edifunct/tokenizer.rb, line 85
def component_data_element_regexp
  @component_data_element_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})#{Regexp.escape(@component_data_element_separator)}")
end
data_element_regexp() click to toggle source
# File lib/edifunct/tokenizer.rb, line 81
def data_element_regexp
  @data_element_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})#{Regexp.escape(@data_element_separator)}")
end
escape_value_regexp() click to toggle source
# File lib/edifunct/tokenizer.rb, line 73
def escape_value_regexp
  @escape_value_regexp ||= Regexp.new("#{Regexp.escape(@release_character)}(#{Regexp.escape(@release_character)}|#{Regexp.escape(@segment_terminator)}|#{Regexp.escape(@data_element_separator)}|#{Regexp.escape(@component_data_element_separator)})")
end
segment_regexp() click to toggle source
# File lib/edifunct/tokenizer.rb, line 77
def segment_regexp
  @segment_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})(?<=#{Regexp.escape(@segment_terminator)})\\s*")
end
strip_service_string_advice(message_as_string) click to toggle source

Strips the optional UNA segment, also known as the Service String Advice.

# File lib/edifunct/tokenizer.rb, line 92
def strip_service_string_advice(message_as_string)
  message_as_string.sub(SERVICE_STRING_ADVICE_REGEXP, '')
end