@Override public RmsProp clone() { return new RmsProp(learningRate, learningRateSchedule, rmsDecay, epsilon); }
.l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp(0.1)) .list() .layer(0, new LSTM.Builder().nIn(CHAR_TO_INT.size()).nOut(lstmLayerSize).activation(Activation.TANH).build())
@Override public void initializeBackend() { backend = new org.nd4j.linalg.learning.config.RmsProp(); }
@Override public RmsProp clone() { return new RmsProp(learningRate, rmsDecay, epsilon); }
public IUpdater getIUpdaterWithDefaultConfig() { switch (this) { case SGD: return new Sgd(); case ADAM: return new Adam(); case ADAMAX: return new AdaMax(); case ADADELTA: return new AdaDelta(); case NESTEROVS: return new Nesterovs(); case NADAM: return new Nadam(); case ADAGRAD: return new AdaGrad(); case RMSPROP: return new RmsProp(); case NONE: return new NoOp(); case CUSTOM: default: throw new UnsupportedOperationException("Unknown or not supported updater: " + this); } } }
public static MultiLayerNetwork train(WordVectors wordVectors, ObjectStream<NameSample> samples, int epochs, int windowSize, String[] labels) throws IOException { int vectorSize = 300; int layerSize = 256; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.01)).l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(layerSize) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(layerSize).nOut(3).build()) .pretrain(false).backprop(true).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.setListeners(new ScoreIterationListener(5)); // TODO: Extract labels on the fly from the data DataSetIterator train = new NameSampleDataSetIterator(samples, wordVectors, windowSize, labels); System.out.println("Starting training"); for (int i = 0; i < epochs; i++) { net.fit(train); train.reset(); System.out.println(String.format("Finished epoch %d", i)); } return net; }
.updater(new RmsProp(args.learningRate)) // ADAM .adamMeanDecay(0.9).adamVarDecay(0.999) .l2(1e-5) .weightInit(WeightInit.XAVIER)
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(new RmsProp()).list() .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(50).tBPTTBackwardLength(50) .pretrain(false).backprop(true).build(); return conf; }
case RMSPROP: double rmsDecay = bl.getRmsDecay(); bl.setIUpdater(new RmsProp(lr, rmsDecay, eps)); break; case NONE:
.iterations(iterations).activation(Activation.IDENTITY) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l1(1e-7).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder();
.iterations(iterations).activation(Activation.RELU) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder();