/** * Initializes the mechanics for the given domain and sets the semi-wall pass through probability to semiWallPassThroughProb. * @param d d the domain object * @param semiWallPassThroughProb the probability that an agent will pass through a semi-wall. */ public GridGameStandardMechanics(Domain d, double semiWallPassThroughProb){ rand = RandomFactory.getMapped(0); domain = d; pMoveThroughSWall = semiWallPassThroughProb; }
/** * Initializes for the {@link MountainCar} {@link Domain} object for which states will be generated. By default, the random x and velocity ranges will be * the full range used by the domain. * @param params the mountain car physics parameters specifying the boundaries */ public MCRandomStateGenerator(MountainCar.MCPhysicsParams params){ this.xmin = params.xmin; this.xmax = params.xmax; this.vmin = params.vmin; this.vmax = params.vmax; this.rand = RandomFactory.getMapped(0); }
/** * Initializes with the value of epsilon, where epsilon is the probability of taking a random action. * @param epsilon the probability of taking a random action. */ public EpsilonGreedy(double epsilon) { qplanner = null; this.epsilon = epsilon; rand = RandomFactory.getMapped(0); }
/** * Initializes the mechanics for the given domain and sets the semi-wall pass through probability to 0.5; * @param d the domain object */ public GridGameStandardMechanics(Domain d){ rand = RandomFactory.getMapped(0); domain = d; pMoveThroughSWall = 0.5; }
/** * Initializes with the QComputablePlanner to use and the value of epsilon to use, where epsilon is the probability of taking a random action. * @param planner the QComputablePlanner to use * @param epsilon the probability of taking a random action. */ public EpsilonGreedy(QProvider planner, double epsilon) { qplanner = planner; this.epsilon = epsilon; rand = RandomFactory.getMapped(0); }
/** * Initializes with a QComputablePlanner * @param planner the QComputablePlanner to use */ public GreedyQPolicy(QProvider planner){ qplanner = planner; rand = RandomFactory.getMapped(0); }
/** * Initializes a graph action object for the given domain and for the action of the given number. * The name of this action will be the constant BASEACTIONNAMEi where i is the action number specified. * @param aId the action identifier number * @param transitionDynamics the underlying transition dynamics that also define the action preconditions */ public GraphActionType(int aId, Map<Integer, Map<Integer, Set<NodeTransitionProbability>>> transitionDynamics){ this.aId = aId; rand = RandomFactory.getMapped(0); this.transitionDynamics = transitionDynamics; }
public GreedyQPolicy(){ qplanner = null; rand = RandomFactory.getMapped(0); }
/** * Initializes the three data structure */ protected void init(){ root = null; nodeMap = new HashMap<T, StochasticTree<T>.STNode>(); rand = RandomFactory.getMapped(2347636); }
protected void UCTInit(SADomain domain, double gamma, HashableStateFactory hashingFactory, int horizon, int nRollouts, int explorationBias){ this.solverInit(domain, gamma, hashingFactory); this.maxHorizon = horizon; this.maxRollOutsFromRoot = nRollouts; this.explorationBias = explorationBias; goalCondition = null; rand = RandomFactory.getMapped(589449); }
@Override public Action action(State s) { List<Action> gsas = ActionUtils.allApplicableActionsForTypes(this.agentType.actions, s); int r = RandomFactory.getMapped(0).nextInt(gsas.size()); Action gsa = gsas.get(r); return gsa; }
@Override public State sample() { double sumProb = 0.; double r = RandomFactory.getMapped(0).nextDouble(); for(Map.Entry<Integer, Double> e : this.beliefValues.entrySet()){ sumProb += e.getValue(); if(r < sumProb){ return this.stateEnumerator.getStateForEnumerationId(e.getKey()); } } throw new RuntimeException("Error; could not sample from belief state because the beliefs did not sum to 1; they summed to: " + sumProb); }
/** * Example usage. * @param args command line args */ public static void main(String []args){ //example //seed in main method of program to synchronize classes (comment out to break synchronization) RandomFactory.seedMapped(0, 943); //in each class that uses a random construct it like this: Random rand = RandomFactory.getMapped(0); //and use reference as normal System.out.println("" + rand.nextInt()); }
@Override public Action action(State s) { List<Action> unmodeled = KWIKModel.Helper.unmodeledActions(model, allActionTypes, s); if(!unmodeled.isEmpty()){ return unmodeled.get(RandomFactory.getMapped(0).nextInt(unmodeled.size())); } return this.sourcePolicy.action(s); }
/** * Method to implement the {@link SampleModel#sample(State, Action)} method when the * {@link FullModel#transitions(State, Action)} method is implemented. Operates by calling * the {@link FullModel#transitions(State, Action)} method, rolls a random number, and selects a * transition according the probability specified by {@link FullModel#transitions(State, Action)}. * @param model the {@link FullModel} with the implemented {@link FullModel#transitions(State, Action)} method. * @param s the input state * @param a the action to be applied in the input state * @return a sampled transition ({@link EnvironmentOutcome}). */ public static EnvironmentOutcome sampleByEnumeration(FullModel model, State s, Action a){ List<TransitionProb> tps = model.transitions(s, a); double roll = RandomFactory.getMapped(0).nextDouble(); double sum = 0; for(TransitionProb tp : tps){ sum += tp.p; if(roll < sum){ return tp.eo; } } throw new RuntimeException("Transition probabilities did not sum to one, they summed to " + sum); } }
@Override public State generateState() { double roll = RandomFactory.getMapped(0).nextDouble(); return roll < probLeft ? new TigerState(VAL_LEFT) : new TigerState(VAL_RIGHT); } };
@Override public State sample(State s, Action a) { s = s.copy(); double baseForce = 0.; if(a.actionName().equals(CartPoleDomain.ACTION_LEFT)){ baseForce = -physParams.actionForce; } else if(a.actionName().equals(CartPoleDomain.ACTION_RIGHT)){ baseForce = physParams.actionForce; } double roll = RandomFactory.getMapped(0).nextDouble() * (2 * physParams.actionNoise) - physParams.actionNoise; double force = baseForce + roll; return updateState(s, force); }
/** * Constructor of DFS with specification of depth limit, whether to maintain a closed list that affects exploration, and whether paths * generated by options should be explored first. * @param domain the domain in which to plan * @param gc indicates the goal states * @param hashingFactory the state hashing factory to use * @param maxDepth depth limit of DFS. -1 specifies no limit. * @param maintainClosed whether to maintain a closed list or not * @param optionsFirst whether to explore paths generated by options first. */ protected void DFSInit(SADomain domain, StateConditionTest gc, HashableStateFactory hashingFactory, int maxDepth, boolean maintainClosed, boolean optionsFirst){ this.deterministicPlannerInit(domain, gc, hashingFactory); this.maxDepth = maxDepth; this.maintainClosed = maintainClosed; if(optionsFirst){ this.setOptionsFirst(); } rand = RandomFactory.getMapped(0); }
@Override public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) { if(intoDataset == null){ intoDataset = new SARSData(); } State curState = s; int nsteps = 0; boolean terminated = model.terminal(s); while(!terminated && nsteps < maxSteps){ List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState); Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size())); EnvironmentOutcome eo = model.sample(curState, ga); intoDataset.add(curState, ga, eo.r, eo.op); curState = eo.op; terminated = eo.terminated; nsteps++; } return intoDataset; }
@Override public SARSData collectDataFrom(Environment env, int maxSteps, SARSData intoDataset) { if(intoDataset == null){ intoDataset = new SARSData(); } int nsteps = 0; while(!env.isInTerminalState() && nsteps < maxSteps){ List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, env.currentObservation()); Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size())); EnvironmentOutcome eo = env.executeAction(ga); intoDataset.add(eo.o, eo.a, eo.r, eo.op); nsteps++; } return intoDataset; } }