class Newral::QLearning::Base
Attributes
game[RW]
Public Class Methods
new( id: nil, game: nil, learning_rate: 0.4, discount: 0.9, epsilon: 0.9, sleep_time: 0.001 )
click to toggle source
this q_learning algorithm was posted here www.practicalai.io/teaching-ai-play-simple-game-using-q-learning/ however I extended it so it can play more games also the q_table is implemente as a hash so actions can differ at different positions this way the algorithm also needs to know less about the game
# File lib/newral/q_learning/base.rb, line 10 def initialize( id: nil, game: nil, learning_rate: 0.4, discount: 0.9, epsilon: 0.9, sleep_time: 0.001 ) game.set_player( self ) @id = id @game = game @learning_rate = learning_rate @discount = discount @epsilon = epsilon @sleep = sleep_time @random = Random.new @q_hash = {} end
Public Instance Methods
get_input( move: true )
click to toggle source
# File lib/newral/q_learning/base.rb, line 31 def get_input( move: true ) # Our new state is equal to the player position @outcome_state = @game.get_position( player: self ) # which actions are available to the player at the moment? @actions = @game.get_actions( player: self ) # is this the first run initial_run = @q_hash.empty? @q_hash[@outcome_state] = @q_hash[@outcome_state] || {} @actions.each do |action| @q_hash[@outcome_state][action] = @q_hash[@outcome_state][action] || 0.1 # @random.rand/10.0 end if initial_run @action_taken = @actions.first elsif @old_state # If this is not the first run # Evaluate what happened on last action and update Q table # Calculate reward reward = 0 # default is 0 if @old_score < @game.get_score( player: self ) reward = [@game.get_score( player: self )-@old_score,1].max # reward is at least 1 if our score increased elsif @old_score > @game.get_score( player: self ) reward = [@old_score-@game.get_score( player: self ),-1].min # reward is smaller or equal -1 if our score decreased else reward = -0.1 # time is money, we punish moves end @q_hash[@old_state][@action_taken] = @q_hash[@old_state][@action_taken] + @learning_rate * (reward + @discount * (@q_hash[@outcome_state]).values.max.to_f - @q_hash[@old_state][@action_taken]) end # Capture current state and score @old_score = @game.get_score( player: self ) @old_state = @game.get_position( player: self ) # we remember this for next run, its current state @old_actions = @actions if move # in the goal state we just update the q_hash # Chose action based on Q value estimates for state if @random.rand > @epsilon || @q_hash[@old_state].nil? # Select random action @action_taken_index = @random.rand(@actions.length).round @action_taken = @actions[@action_taken_index] else # Select based on Q table, remember @old_state is equal to current state at this point @action_taken = @q_hash[@old_state].to_a.sort{ |v1,v2| v2[1]<=>v1[1]}[0][0] raise "impossible action #{ @action_taken } #{@old_state} #{@q_hash[ @old_state ] } #{ @actions } #{@old_actions } " unless @actions.member?( @action_taken) end # Take action return @action_taken else @old_state = nil # we do not have a old state any more as we have reached an end state end end
inform_game_ended()
click to toggle source
# File lib/newral/q_learning/base.rb, line 26 def inform_game_ended get_input( move: false ) end
set_epsilon( epsilon )
click to toggle source
# File lib/newral/q_learning/base.rb, line 22 def set_epsilon( epsilon ) @epsilon = epsilon end