001    package aima.learning.reinforcement;
002    
003    import aima.probability.Randomizer;
004    import aima.probability.decision.MDP;
005    import aima.probability.decision.MDPPerception;
006    
007    /**
008     * @author Ravi Mohan
009     * 
010     */
011    
012    public abstract class MDPAgent<STATE_TYPE, ACTION_TYPE> {
013    
014            protected MDP<STATE_TYPE, ACTION_TYPE> mdp;
015    
016            protected STATE_TYPE currentState;
017    
018            protected Double currentReward;
019    
020            protected STATE_TYPE previousState;
021    
022            protected ACTION_TYPE previousAction;
023    
024            public MDPAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp) {
025                    this.mdp = mdp;
026                    this.currentState = mdp.getInitialState();
027                    this.currentReward = mdp.getRewardFor(currentState);
028    
029            }
030    
031            public MDPPerception<STATE_TYPE> execute(ACTION_TYPE action, Randomizer r) {
032                    MDPPerception<STATE_TYPE> perception = mdp.execute(currentState,
033                                    action, r);
034                    updateFromPerception(perception);
035                    return perception;
036            }
037    
038            public void updateFromPerception(MDPPerception<STATE_TYPE> perception) {
039                    currentState = perception.getState();
040                    currentReward = perception.getReward();
041            }
042    
043            public void executeTrial(Randomizer r) {
044                    currentState = mdp.getInitialState();
045                    currentReward = mdp.getRewardFor(mdp.getInitialState());
046                    previousState = null;
047                    previousAction = null;
048                    MDPPerception<STATE_TYPE> perception = new MDPPerception<STATE_TYPE>(
049                                    currentState, currentReward);
050                    ACTION_TYPE action = null;
051                    do {
052                            action = decideAction(perception);
053                            if (action != null) {
054                                    perception = execute(action, r);
055                            }
056                    } while (action != null);
057            }
058    
059            public abstract ACTION_TYPE decideAction(
060                            MDPPerception<STATE_TYPE> perception);
061    
062            public Double getCurrentReward() {
063                    return currentReward;
064            }
065    
066            public void setCurrentReward(Double currentReward) {
067                    this.currentReward = currentReward;
068            }
069    
070            public ACTION_TYPE getPreviousAction() {
071                    return previousAction;
072            }
073    
074            public void setPreviousAction(ACTION_TYPE previousAction) {
075                    this.previousAction = previousAction;
076            }
077    
078            public STATE_TYPE getPreviousState() {
079                    return previousState;
080            }
081    
082            public void setPreviousState(STATE_TYPE previousState) {
083                    this.previousState = previousState;
084            }
085    
086            public STATE_TYPE getCurrentState() {
087                    return currentState;
088            }
089    
090    }