class PragmaticSegmenter::PunctuationReplacer
This class replaces punctuation that is typically a sentence boundary but in this case is not a sentence boundary.
Attributes
match_type[R]
matches_array[R]
text[R]
Public Class Methods
new(text:, matches_array:, match_type: nil)
click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 33 def initialize(text:, matches_array:, match_type: nil) @text = text @matches_array = matches_array @match_type = match_type end
Public Instance Methods
replace()
click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 39 def replace replace_punctuation(matches_array) end
Private Instance Methods
replace_punctuation(array)
click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 45 def replace_punctuation(array) return if !array || array.empty? Rule.apply(@text, Rules::EscapeRegexReservedCharacters::All) array.each do |a| Rule.apply(a, Rules::EscapeRegexReservedCharacters::All) sub = sub_characters(a, '.', '∯') sub_1 = sub_characters(sub, '。', '&ᓰ&') sub_2 = sub_characters(sub_1, '.', '&ᓱ&') sub_3 = sub_characters(sub_2, '!', '&ᓳ&') sub_4 = sub_characters(sub_3, '!', '&ᓴ&') sub_5 = sub_characters(sub_4, '?', '&ᓷ&') sub_6 = sub_characters(sub_5, '?', '&ᓸ&') unless match_type.eql?('single') sub_7 = sub_characters(sub_6, "'", '&⎋&') end end Rule.apply(@text, Rules::SubEscapedRegexReservedCharacters::All) end
sub_characters(string, char_a, char_b)
click to toggle source
# File lib/pragmatic_segmenter/punctuation_replacer.rb, line 64 def sub_characters(string, char_a, char_b) sub = string.gsub(char_a, char_b) @text.gsub!(/#{Regexp.escape(string)}/, sub) sub end