aima.probability.decision
Class MDP<STATE_TYPE,ACTION_TYPE>
java.lang.Object
aima.probability.decision.MDP<STATE_TYPE,ACTION_TYPE>
public class MDP<STATE_TYPE,ACTION_TYPE>
- extends java.lang.Object
Method Summary |
MDP<STATE_TYPE,ACTION_TYPE> |
emptyMdp()
|
MDPPerception<STATE_TYPE> |
execute(STATE_TYPE state,
ACTION_TYPE action,
Randomizer r)
|
java.util.List<ACTION_TYPE> |
getAllActions()
|
STATE_TYPE |
getInitialState()
|
double |
getRewardFor(STATE_TYPE state)
|
double |
getTransitionProbability(MDPTransition<STATE_TYPE,ACTION_TYPE> transition)
|
java.util.List<MDPTransition<STATE_TYPE,ACTION_TYPE>> |
getTransitionsWith(STATE_TYPE initialState,
ACTION_TYPE action)
|
MDPUtilityFunction<STATE_TYPE> |
initialUtilityFunction()
|
boolean |
isTerminalState(STATE_TYPE state)
|
MDPUtilityFunction<STATE_TYPE> |
policyEvaluation(MDPPolicy<STATE_TYPE,ACTION_TYPE> pi,
MDPUtilityFunction<STATE_TYPE> U,
double gamma,
int iterations)
|
MDPPolicy<STATE_TYPE,ACTION_TYPE> |
policyIteration(double gamma)
|
MDPPolicy<STATE_TYPE,ACTION_TYPE> |
randomPolicy()
|
void |
setReward(STATE_TYPE state,
double reward)
|
void |
setTransitionProbability(MDPTransition<STATE_TYPE,ACTION_TYPE> transition,
double probability)
|
java.lang.String |
toString()
|
Pair<MDPUtilityFunction<STATE_TYPE>,java.lang.Double> |
valueIterateOnce(double gamma,
MDPUtilityFunction<STATE_TYPE> presentUtilityFunction)
|
MDPUtilityFunction<STATE_TYPE> |
valueIteration(double gamma,
double error,
double delta)
|
MDPUtilityFunction<STATE_TYPE> |
valueIterationForFixedIterations(int numberOfIterations,
double gamma)
|
MDPUtilityFunction<STATE_TYPE> |
valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(double gamma,
double errorMargin)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
MDP
public MDP(MDPSource<STATE_TYPE,ACTION_TYPE> source)
emptyMdp
public MDP<STATE_TYPE,ACTION_TYPE> emptyMdp()
valueIteration
public MDPUtilityFunction<STATE_TYPE> valueIteration(double gamma,
double error,
double delta)
valueIterationForFixedIterations
public MDPUtilityFunction<STATE_TYPE> valueIterationForFixedIterations(int numberOfIterations,
double gamma)
valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin
public MDPUtilityFunction<STATE_TYPE> valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(double gamma,
double errorMargin)
valueIterateOnce
public Pair<MDPUtilityFunction<STATE_TYPE>,java.lang.Double> valueIterateOnce(double gamma,
MDPUtilityFunction<STATE_TYPE> presentUtilityFunction)
policyIteration
public MDPPolicy<STATE_TYPE,ACTION_TYPE> policyIteration(double gamma)
policyEvaluation
public MDPUtilityFunction<STATE_TYPE> policyEvaluation(MDPPolicy<STATE_TYPE,ACTION_TYPE> pi,
MDPUtilityFunction<STATE_TYPE> U,
double gamma,
int iterations)
randomPolicy
public MDPPolicy<STATE_TYPE,ACTION_TYPE> randomPolicy()
initialUtilityFunction
public MDPUtilityFunction<STATE_TYPE> initialUtilityFunction()
getInitialState
public STATE_TYPE getInitialState()
getRewardFor
public double getRewardFor(STATE_TYPE state)
setReward
public void setReward(STATE_TYPE state,
double reward)
setTransitionProbability
public void setTransitionProbability(MDPTransition<STATE_TYPE,ACTION_TYPE> transition,
double probability)
getTransitionProbability
public double getTransitionProbability(MDPTransition<STATE_TYPE,ACTION_TYPE> transition)
execute
public MDPPerception<STATE_TYPE> execute(STATE_TYPE state,
ACTION_TYPE action,
Randomizer r)
isTerminalState
public boolean isTerminalState(STATE_TYPE state)
getTransitionsWith
public java.util.List<MDPTransition<STATE_TYPE,ACTION_TYPE>> getTransitionsWith(STATE_TYPE initialState,
ACTION_TYPE action)
getAllActions
public java.util.List<ACTION_TYPE> getAllActions()
toString
public java.lang.String toString()
- Overrides:
toString
in class java.lang.Object