class Object
Public Instance Methods
segment(text, lang, segmentByNewline)
click to toggle source
# File lib/pragmatic_segmenter_server/segment.rb, line 4 def segment(text, lang, segmentByNewline) newLineRegex = /((?: *[\n\r\t]+ *)+)/ mask = '' segments = [] if segmentByNewline textParts = text.split(newLineRegex) else textParts = [text] end textParts.each do |textPart| if segmentByNewline && textPart.match(newLineRegex) mask += textPart else ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false) ps.segment.each do |segment| segments.push(segment) end mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}") end end return segments, mask end