@Override public RmsProp clone() { return new RmsProp(learningRate, learningRateSchedule, rmsDecay, epsilon); }
@Override public Nesterovs clone() { return new Nesterovs(learningRate, learningRateSchedule, momentum, momentumISchedule); }
@Override public NoOp clone() { return new NoOp(); }
@Override public Adam clone() { return new Adam(learningRate, learningRateSchedule, beta1, beta2, epsilon); }
throw new IllegalStateException("Updater has not been initialized with view state"); double beta1 = config.getBeta1(); double beta2 = config.getBeta2(); double learningRate = config.getLearningRate(iteration, epoch); double epsilon = config.getEpsilon();
throw new IllegalStateException("Updater has not been initialized with view state"); double beta1 = config.getBeta1(); double beta2 = config.getBeta2(); double learningRate = config.getLearningRate(iteration, epoch); double epsilon = config.getEpsilon();
@Override public Sgd clone() { return new Sgd(learningRate, learningRateSchedule); }
@Override public AdaDelta clone() { return new AdaDelta(rho, epsilon); }
@Override public void applyUpdater(INDArray gradient, int iteration, int epoch) { if (lastGradient == null) throw new IllegalStateException("Updater has not been initialized with view state"); double learningRate = config.getLearningRate(iteration, epoch); double rmsDecay = config.getRmsDecay(); double epsilon = config.getEpsilon(); lastGradient.muli(rmsDecay).addi(gradient.mul(gradient).muli(1 - rmsDecay)); // lr * gradient / (sqrt(cache) + 1e-8) gradient.muli(learningRate).divi(Transforms.sqrt(lastGradient.dup(gradientReshapeOrder), false).addi(epsilon)); } }
@Override public AdaGrad clone() { return new AdaGrad(learningRate, epsilon); }
@Override public Nadam clone() { return new Nadam(learningRate, beta1, beta2, epsilon); }
@Override public IUpdater clone() { return new AdaMax(learningRate, learningRateSchedule, beta1, beta2, epsilon); }
throw new IllegalStateException("Updater has not been initialized with view state"); double rho = config.getRho(); double epsilon = config.getEpsilon();
/** * Gets feature specific learning rates * Adagrad keeps a history of gradients being passed in. * Note that each gradient passed in becomes adapted over time, hence the opName adagrad * * @param gradient the gradient to get learning rates for * @param iteration */ @Override public void applyUpdater(INDArray gradient, int iteration, int epoch) { if (historicalGradient == null) throw new IllegalStateException("Updater has not been initialized with view state"); double learningRate = config.getLearningRate(iteration, epoch); double epsilon = config.getEpsilon(); historicalGradient.addi(gradient.mul(gradient)); INDArray sqrtHistory = sqrt(historicalGradient.dup(gradientReshapeOrder), false).addi(epsilon); // lr * gradient / (sqrt(sumSquaredGradients) + epsilon) gradient.muli(sqrtHistory.rdivi(learningRate)); } }
@Override public AMSGrad clone() { return new AMSGrad(learningRate, learningRateSchedule, beta1, beta2, epsilon); }
@Override public void setStateViewArray(INDArray viewArray, long[] gradientShape, char gradientOrder, boolean initialize) { if (!viewArray.isRowVector()) throw new IllegalArgumentException("Invalid input: expect row vector input"); if (initialize) viewArray.assign(config.getEpsilon()); this.lastGradient = viewArray; //Reshape to match the expected shape of the input gradient arrays this.lastGradient = Shape.newShapeNoCopy(this.lastGradient, gradientShape, gradientOrder == 'f'); if (lastGradient == null) throw new IllegalStateException("Could not correctly reshape gradient view array"); gradientReshapeOrder = gradientOrder; }
throw new IllegalStateException("Updater has not been initialized with view state"); double momentum = config.currentMomentum(iteration, epoch); double learningRate = config.getLearningRate(iteration, epoch);
@Override public void applyUpdater(INDArray gradient, int iteration, int epoch) { double lr = config.getLearningRate(iteration, epoch); gradient.muli(lr); } }
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
public static MultiLayerConfiguration lenetModelConf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return conf; } public static void saveModel(FileSystem fs, Model model ) throws Exception{