class Object

Public Instance Methods

segment(text, lang, segmentByNewline) click to toggle source
# File lib/pragmatic_segmenter_server/segment.rb, line 4
def segment(text, lang, segmentByNewline)
    newLineRegex = /((?: *[\n\r\t]+ *)+)/
    mask = ''
    segments = []
    
    if segmentByNewline
        textParts = text.split(newLineRegex)
    else
        textParts = [text]
    end
    
    textParts.each do |textPart|
        if segmentByNewline && textPart.match(newLineRegex)
            mask += textPart
        else
            ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false)
            ps.segment.each do |segment|
                segments.push(segment)
            end                         
            mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}")
        end
    end
    
    return segments, mask
end