@Override public State copy() { return new MCState(x, v); }
@Override public State generateState() { double rx = this.rand.nextDouble() * (this.xmax - this.xmin) + this.xmin; double rv = this.rand.nextDouble() * (this.vmax - this.vmin) + this.vmin; MCState s = new MCState(rx, rv); return s; }
public MCState valleyState(){ return new MCState(this.valleyPos(), 0.); }
public static void main(String[] args) { MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures features = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(features, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0)); EnvironmentServer envServ = new EnvironmentServer(env, vob); for(int i = 0; i < 100; i++){ PolicyUtils.rollout(p, envServ); envServ.resetEnvironment(); } System.out.println("Finished"); }
public static void MCLSPIFB(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(inputFeatures, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.)); env.addObservers(vob); for(int i = 0; i < 5; i++){ PolicyUtils.rollout(p, env); env.resetEnvironment(); } System.out.println("Finished"); }
MCState s = new MCState(mcGen.physParams.valleyPos(), 0.);