001 package aima.learning.reinforcement; 002 003 import aima.probability.Randomizer; 004 import aima.probability.decision.MDP; 005 import aima.probability.decision.MDPPerception; 006 007 /** 008 * @author Ravi Mohan 009 * 010 */ 011 012 public abstract class MDPAgent<STATE_TYPE, ACTION_TYPE> { 013 014 protected MDP<STATE_TYPE, ACTION_TYPE> mdp; 015 016 protected STATE_TYPE currentState; 017 018 protected Double currentReward; 019 020 protected STATE_TYPE previousState; 021 022 protected ACTION_TYPE previousAction; 023 024 public MDPAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp) { 025 this.mdp = mdp; 026 this.currentState = mdp.getInitialState(); 027 this.currentReward = mdp.getRewardFor(currentState); 028 029 } 030 031 public MDPPerception<STATE_TYPE> execute(ACTION_TYPE action, Randomizer r) { 032 MDPPerception<STATE_TYPE> perception = mdp.execute(currentState, 033 action, r); 034 updateFromPerception(perception); 035 return perception; 036 } 037 038 public void updateFromPerception(MDPPerception<STATE_TYPE> perception) { 039 currentState = perception.getState(); 040 currentReward = perception.getReward(); 041 } 042 043 public void executeTrial(Randomizer r) { 044 currentState = mdp.getInitialState(); 045 currentReward = mdp.getRewardFor(mdp.getInitialState()); 046 previousState = null; 047 previousAction = null; 048 MDPPerception<STATE_TYPE> perception = new MDPPerception<STATE_TYPE>( 049 currentState, currentReward); 050 ACTION_TYPE action = null; 051 do { 052 action = decideAction(perception); 053 if (action != null) { 054 perception = execute(action, r); 055 } 056 } while (action != null); 057 } 058 059 public abstract ACTION_TYPE decideAction( 060 MDPPerception<STATE_TYPE> perception); 061 062 public Double getCurrentReward() { 063 return currentReward; 064 } 065 066 public void setCurrentReward(Double currentReward) { 067 this.currentReward = currentReward; 068 } 069 070 public ACTION_TYPE getPreviousAction() { 071 return previousAction; 072 } 073 074 public void setPreviousAction(ACTION_TYPE previousAction) { 075 this.previousAction = previousAction; 076 } 077 078 public STATE_TYPE getPreviousState() { 079 return previousState; 080 } 081 082 public void setPreviousState(STATE_TYPE previousState) { 083 this.previousState = previousState; 084 } 085 086 public STATE_TYPE getCurrentState() { 087 return currentState; 088 } 089 090 }