class Ebooks::Generator
Attributes
dictionary[RW]
Public Class Methods
new(config)
click to toggle source
# File lib/ebooks/generator.rb, line 6 def initialize(config) @tweets_csv_path = config[:tweets_csv_path] @corpus_path = config[:corpus_path] build_corpus @dictionary_name = config[:dictionary_name] @dictionary = build_dictionary end
Public Instance Methods
generate_sentence()
click to toggle source
# File lib/ebooks/generator.rb, line 33 def generate_sentence # Run when you want to generate a new Markov tweet dictionary.generate_n_sentences(2).split(/\#\</).first.chomp.chop end
generate_twitter_corpus()
click to toggle source
# File lib/ebooks/generator.rb, line 14 def generate_twitter_corpus # Go to Twitter.com -> Settings -> Download Archive. # This tweets.csv file is in the top directory. Put it in the same directory as this script. csv_text = CSV.read(@tweets_csv_path) # Create a new clean file of text that acts as the seed for your Markov chains File.open(@corpus_path, 'w') do |file| csv_text.reverse_each do |row| tweet_text = row[5] .gsub(/(?:f|ht)tps?:\/[^\s]+/, '') # Strip links .gsub(/\n/,' ') # Strip new lines .gsub(/@[a-z0-9_]+/i, '') # Strip usernames .gsub(/[R|M]T/, '') # Strip RTs # Save the text file.write("#{tweet_text}\n") end end end
Private Instance Methods
build_corpus()
click to toggle source
# File lib/ebooks/generator.rb, line 40 def build_corpus unless File.exists?(@corpus_path) generate_twitter_corpus end end
build_dictionary()
click to toggle source
# File lib/ebooks/generator.rb, line 46 def build_dictionary if File.exists?(dictionary_path) MarkyMarkov::Dictionary.new(@dictionary_name) else markov = MarkyMarkov::Dictionary.new(@dictionary_name) markov.parse_file(@corpus_path) markov.save_dictionary! markov end end
dictionary_path()
click to toggle source
# File lib/ebooks/generator.rb, line 57 def dictionary_path "#{@dictionary_name}.mmd" end