class PragmaticSegmenter::PunctuationReplacer

This class replaces punctuation that is typically a sentence boundary but in this case is not a sentence boundary.

Attributes

match_type[R]
matches_array[R]
text[R]

Public Class Methods

new(text:, matches_array:, match_type: nil) click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 33
def initialize(text:, matches_array:, match_type: nil)
  @text = text
  @matches_array = matches_array
  @match_type = match_type
end

Public Instance Methods

replace() click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 39
def replace
  replace_punctuation(matches_array)
end

Private Instance Methods

replace_punctuation(array) click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 45
def replace_punctuation(array)
  return if !array || array.empty?
  Rule.apply(@text, Rules::EscapeRegexReservedCharacters::All)
  array.each do |a|
    Rule.apply(a, Rules::EscapeRegexReservedCharacters::All)
    sub = sub_characters(a, '.', '∯')
    sub_1 = sub_characters(sub, '。', '&ᓰ&')
    sub_2 = sub_characters(sub_1, '.', '&ᓱ&')
    sub_3 = sub_characters(sub_2, '!', '&ᓳ&')
    sub_4 = sub_characters(sub_3, '!', '&ᓴ&')
    sub_5 = sub_characters(sub_4, '?', '&ᓷ&')
    sub_6 = sub_characters(sub_5, '?', '&ᓸ&')
    unless match_type.eql?('single')
      sub_7 = sub_characters(sub_6, "'", '&⎋&')
    end
  end
  Rule.apply(@text, Rules::SubEscapedRegexReservedCharacters::All)
end
sub_characters(string, char_a, char_b) click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 64
def sub_characters(string, char_a, char_b)
  sub = string.gsub(char_a, char_b)
  @text.gsub!(/#{Regexp.escape(string)}/, sub)
  sub
end