public CoCoQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){ this.init(d, discount, learningRate, hashFactory, qInit, new CoCoQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyMaxWellfare(epsilon))); }
@Override public JointPolicy copy() { EGreedyMaxWellfare np = new EGreedyMaxWellfare(this.epsilon, this.breakTiesRandomly); np.setAgentTypesInJointPolicy(this.agentsInJointPolicy); np.setQSourceProvider(this.qSourceProvider); return np; }
/** * Initializes. The policy will be defaulted to a epsilon-greedy max welfare policy. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the constant learning rate * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the default Q-value to which all initial Q-values will be initialized * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. */ public MAQLFactory(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){ this.domain = d; this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashingFactory = hashFactory; this.qInit = new ConstantValueFunction(qInit); this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(0.1)); }
/** * Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted * to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will * only store its own Q-values and will use the other agent's stored Q-values to determine theirs. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the learning rate function to use * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the q-value initialization to use * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public MultiAgentQLearning(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, String agentName, SGAgentType agentType){ this.init(d, agentName, agentType); this.discount = discount; this.learningRate = learningRate; this.hashingFactory = hashFactory; this.qInit = qInit; this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit); this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1)); }
/** * Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted * to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will * only store its own Q-values and will use the other agent's stored Q-values to determine theirs. * @param d the domain in which to perform learing * @param discount the discount factor * @param learningRate the constant learning rate * @param hashFactory the hashing factory used to index states and Q-values * @param qInit the default Q-value to which all initial Q-values will be initialized * @param backupOperator the backup operator to use that defines the solution concept being learned * @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world. * @param agentName the name of the agent * @param agentType the {@link SGAgentType} for the agent defining its action space */ public MultiAgentQLearning(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, String agentName, SGAgentType agentType){ this.init(d, agentName, agentType); this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashingFactory = hashFactory; this.qInit = new ConstantValueFunction(qInit); this.backupOperator = backupOperator; this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues; this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit); this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1)); }
EGreedyMaxWellfare jp0 = new EGreedyMaxWellfare(0.); jp0.setBreakTiesRandomly(false); //don't break ties randomly