@Override public void setCurStateTo(State s) { if(this.stateGenerator == null){ this.stateGenerator = new ConstantStateGenerator(s); } this.curState = s; }
public SimulatedEnvironment(SampleModel model, State initialState) { this.stateGenerator = new ConstantStateGenerator(initialState); this.curState = initialState; this.model = model; }
public SimulatedEnvironment(SADomain domain, State initialState) { this.stateGenerator = new ConstantStateGenerator(initialState); this.curState = initialState; if(domain.getModel() == null){ throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one."); } this.model = domain.getModel(); }
/** * Initializes the world. * @param domain the SGDomain the world will use * @param jr the joint reward function * @param tf the terminal function * @param initialState the initial state of the world every time a new game starts */ public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, State initialState){ this.init(domain, domain.getJointActionModel(), jr, tf, new ConstantStateGenerator(initialState), new IdentityStateMapping()); }
/** * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily * selects the action with the highest Q-value and breaks ties uniformly randomly. * @param initialState the initial state of the planning problem * @return a {@link burlap.behavior.policy.GreedyQPolicy}. */ @Override public GreedyQPolicy planFromState(State initialState) { if(this.model == null){ throw new RuntimeException("LSPI cannot execute planFromState because the reward function and/or terminal function for planning have not been set. Use the initializeForPlanning method to set them."); } if(planningCollector == null){ this.planningCollector = new SARSCollector.UniformRandomSARSCollector(this.actionTypes); } this.dataset = this.planningCollector.collectNInstances(new ConstantStateGenerator(initialState), this.model, this.numSamplesForPlanning, Integer.MAX_VALUE, this.dataset); return this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange); }
public static void main(String[] args) { GridGame gg = new GridGame(); OOSGDomain domain = gg.generateDomain(); State s = GridGame.getTurkeyInitialState(); JointRewardFunction jr = new GridGame.GGJointRewardFunction(domain); TerminalFunction tf = new GridGame.GGTerminalFunction(domain); World world = new World(domain, jr, tf, new ConstantStateGenerator(s)); DPrint.toggleCode(world.getDebugId(),false); SGAgent ragent1 = new RandomSGAgent(); SGAgent ragent2 = new RandomSGAgent(); SGAgentType type = new SGAgentType("agent", domain.getActionTypes()); world.join(ragent1); world.join(ragent2); GameEpisode ga = world.runGame(20); System.out.println(ga.maxTimeStep()); String serialized = ga.serialize(); System.out.println(serialized); GameEpisode read = GameEpisode.parse(serialized); System.out.println(read.maxTimeStep()); System.out.println(read.state(0).toString()); }
final ConstantStateGenerator sg = new ConstantStateGenerator(s);