class TeRex::Format::BrownFile

Attributes

category[R]
path[R]
sentences[R]

Public Class Methods

new(file_path, klass) click to toggle source
# File lib/format/brown_file.rb, line 7
def initialize(file_path, klass)
  @path = file_path
  @category = klass
end

Public Instance Methods

scanner() click to toggle source

Each line of file with Array object, strip it, split by whitespace, map it, split words by '/' to separate POS tags, join by whitespace

# File lib/format/brown_file.rb, line 16
def scanner
  @sentences ||= File.open(@path) do |file|
    file.each_line.each_with_object([]) do |line, acc|
      stripped_line = line.strip

      unless stripped_line.nil? || stripped_line.empty?
        acc << line.split(' ').map do |word|
          word.split('/').first
        end.join(' ')
      end
    end
  end

end