class Glueckskeks::CorpusReader

Public Class Methods

read_corpus_files(corpus_dir='corpus') click to toggle source

Returned “Corpus”“ is a map from a name (originating from filename) to the lines of that file

# File lib/glueckskeks/corpus_reader.rb, line 5
def self.read_corpus_files(corpus_dir='corpus')
  result_map = {}

  Dir.glob("#{corpus_dir}/*.corpus").each do |filename|
    Glueckskeks.logger.debug "Reading corpus file #{filename}"
    file = File.new(filename)

    lines = file.readlines

    lines.map!(&:strip!)
    lines.reject!(&:empty?)

    Glueckskeks.logger.debug "#{lines.count} valid line(s) found"
    if lines.count == 0
      Glueckskeks.logger.error "#{filename} does not contain any pattern"
    end
    result_map[File.basename(file, '.corpus')] = lines
  end

  result_map
end