@Override public JointPolicy copy() { EGreedyJointPolicy np = new EGreedyJointPolicy(this.epsilon); np.setAgentTypesInJointPolicy(this.agentsInJointPolicy); np.setQSourceProvider(this.qSourceProvider); np.setTargetAgent(this.targetAgent); return np; }
@Override public JointPolicy copy() { EGreedyMaxWellfare np = new EGreedyMaxWellfare(this.epsilon, this.breakTiesRandomly); np.setAgentTypesInJointPolicy(this.agentsInJointPolicy); np.setQSourceProvider(this.qSourceProvider); return np; }
@Override public JointPolicy copy() { EMinMaxPolicy np = new EMinMaxPolicy(this.epsilon); np.setTargetAgent(this.targetAgent); np.setQSourceProvider(this.qSourceProvider); np.setAgentTypesInJointPolicy(this.agentsInJointPolicy); return np; }
@Override public JointPolicy copy() { ECorrelatedQJointPolicy jp = new ECorrelatedQJointPolicy(this.objectiveType, this.epsilon); jp.setQSourceProvider(this.qSourceProvider); jp.setAgentTypesInJointPolicy(this.agentsInJointPolicy); return jp; }
public CoCoQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){ this.init(d, discount, learningRate, hashFactory, qInit, new CoCoQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyMaxWellfare(epsilon))); }
EGreedyMaxWellfare jp0 = new EGreedyMaxWellfare(0.); jp0.setBreakTiesRandomly(false); //don't break ties randomly
public static void VICorrelatedTest(){ GridGame gridGame = new GridGame(); final OOSGDomain domain = gridGame.generateDomain(); final HashableStateFactory hashingFactory = new SimpleHashableStateFactory(); final State s = GridGame.getPrisonersDilemmaInitialState(); JointRewardFunction rf = new GridGame.GGJointRewardFunction(domain, -1, 100, false); TerminalFunction tf = new GridGame.GGTerminalFunction(domain); SGAgentType at = GridGame.getStandardGridGameAgentType(domain); MAValueIteration vi = new MAValueIteration(domain, rf, tf, 0.99, hashingFactory, 0., new CorrelatedQ(CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective.UTILITARIAN), 0.00015, 50); World w = new World(domain, rf, tf, s); //for correlated Q, use a correlated equilibrium policy joint policy ECorrelatedQJointPolicy jp0 = new ECorrelatedQJointPolicy(CorrelatedEquilibriumSolver.CorrelatedEquilibriumObjective.UTILITARIAN, 0.); MultiAgentDPPlanningAgent a0 = new MultiAgentDPPlanningAgent(domain, vi, new PolicyFromJointPolicy(0, jp0, true), "agent0", at); MultiAgentDPPlanningAgent a1 = new MultiAgentDPPlanningAgent(domain, vi, new PolicyFromJointPolicy(1, jp0, true), "agent1", at); w.join(a0); w.join(a1); GameEpisode ga = null; List<GameEpisode> games = new ArrayList<GameEpisode>(); for(int i = 0; i < 10; i++){ ga = w.runGame(); games.add(ga); } Visualizer v = GGVisualizer.getVisualizer(9, 9); new GameSequenceVisualizer(v, domain, games); }
@Override public Action action(State s) { List<JointAction> jas = this.getAllJointActions(s); JointAction selected; AgentQSourceMap qSources = this.qSourceProvider.getQSources();
@Override public Action action(State s) { List<JointAction> jas = this.getAllJointActions(s); JointAction selected; AgentQSourceMap qSources = this.qSourceProvider.getQSources();
public MAMaxQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){ this.init(d, discount, learningRate, hashFactory, qInit, new MaxQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyJointPolicy(epsilon))); }
/** * Initializes. The policy will be defaulted to a epsilon-greedy max welfare policy. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the constant learning rate * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the default Q-value to which all initial Q-values will be initialized * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. */ public MAQLFactory(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){ this.domain = d; this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashingFactory = hashFactory; this.qInit = new ConstantValueFunction(qInit); this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(0.1)); }
@Override public List<ActionProb> policyDistribution(State s) { List<JointAction> jas = this.getAllJointActions(s); AgentQSourceMap qSources = this.qSourceProvider.getQSources();
@Override public List<ActionProb> policyDistribution(State s) { List<JointAction> jas = this.getAllJointActions(s); AgentQSourceMap qSources = this.qSourceProvider.getQSources(); HashedAggregator<JointAction> sumProb = new HashedAggregator<JointAction>();
/** * Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted * to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will * only store its own Q-values and will use the other agent's stored Q-values to determine theirs. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the learning rate function to use * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the q-value initialization to use * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public MultiAgentQLearning(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, String agentName, SGAgentType agentType){ this.init(d, agentName, agentType); this.discount = discount; this.learningRate = learningRate; this.hashingFactory = hashFactory; this.qInit = qInit; this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit); this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1)); }
/** * Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted * to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will * only store its own Q-values and will use the other agent's stored Q-values to determine theirs. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the constant learning rate * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the default Q-value to which all initial Q-values will be initialized * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public MultiAgentQLearning(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, String agentName, SGAgentType agentType){ this.init(d, agentName, agentType); this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashingFactory = hashFactory; this.qInit = new ConstantValueFunction(qInit); this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit); this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1)); }