class Edifunct::Tokenizer
Tokenizer
is responsible for splitting message into segments, data elements and components.
Constants
- SERVICE_STRING_ADVICE_REGEXP
Attributes
component_data_element_separator[R]
data_element_separator[R]
release_character[R]
segment_terminator[R]
Public Class Methods
for_message(edifact_message)
click to toggle source
# File lib/edifunct/tokenizer.rb, line 9 def for_message(edifact_message) if edifact_message.start_with?("UNA") _u, _n, _a, component_data_element_separator, data_element_separator, _decimal_mark_delimiter, release_character, _reserved, segment_terminator = edifact_message.chars new( release_character: release_character, segment_terminator: segment_terminator, data_element_separator: data_element_separator, component_data_element_separator: component_data_element_separator ) else new end end
new(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":")
click to toggle source
# File lib/edifunct/tokenizer.rb, line 35 def initialize(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":") @release_character = release_character @segment_terminator = segment_terminator @data_element_separator = data_element_separator @component_data_element_separator = component_data_element_separator end
Public Instance Methods
as_segments(message_as_string)
click to toggle source
# File lib/edifunct/tokenizer.rb, line 42 def as_segments(message_as_string) strip_service_string_advice(message_as_string).split(segment_regexp).map do |raw_segment| segment_tag, data_elements = split_segment(raw_segment) Segment.new(tag: segment_tag, raw_segment: raw_segment, data_elements: data_elements) end end
decode_value(encoded_value)
click to toggle source
# File lib/edifunct/tokenizer.rb, line 63 def decode_value(encoded_value) encoded_value.gsub(escape_value_regexp, '\1') end
formatted_segments_per_line(message_as_string)
click to toggle source
# File lib/edifunct/tokenizer.rb, line 67 def formatted_segments_per_line(message_as_string) message_as_string.gsub(segment_regexp, "\n") end
split_segment(raw_segment)
click to toggle source
# File lib/edifunct/tokenizer.rb, line 50 def split_segment(raw_segment) segment_without_terminator = raw_segment.chomp(@segment_terminator) segment_tag, *data_elements_as_strings = segment_without_terminator.split(data_element_regexp) data_elements = data_elements_as_strings.map do |data_element_as_string| data_element_as_string.split(component_data_element_regexp).map do |component| decode_value(component) end end [segment_tag, data_elements] end
Private Instance Methods
component_data_element_regexp()
click to toggle source
# File lib/edifunct/tokenizer.rb, line 85 def component_data_element_regexp @component_data_element_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})#{Regexp.escape(@component_data_element_separator)}") end
data_element_regexp()
click to toggle source
# File lib/edifunct/tokenizer.rb, line 81 def data_element_regexp @data_element_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})#{Regexp.escape(@data_element_separator)}") end
escape_value_regexp()
click to toggle source
# File lib/edifunct/tokenizer.rb, line 73 def escape_value_regexp @escape_value_regexp ||= Regexp.new("#{Regexp.escape(@release_character)}(#{Regexp.escape(@release_character)}|#{Regexp.escape(@segment_terminator)}|#{Regexp.escape(@data_element_separator)}|#{Regexp.escape(@component_data_element_separator)})") end
segment_regexp()
click to toggle source
# File lib/edifunct/tokenizer.rb, line 77 def segment_regexp @segment_regexp ||= Regexp.new("(?!#{Regexp.escape(@release_character)})(?<=#{Regexp.escape(@segment_terminator)})\\s*") end
strip_service_string_advice(message_as_string)
click to toggle source
Strips the optional UNA segment, also known as the Service String Advice.
# File lib/edifunct/tokenizer.rb, line 92 def strip_service_string_advice(message_as_string) message_as_string.sub(SERVICE_STRING_ADVICE_REGEXP, '') end