.updater(new RmsProp(0.1)) .list() .layer(0, new LSTM.Builder().nIn(CHAR_TO_INT.size()).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build())
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .list() .layer(0, new LSTM.Builder().nIn(vectorSize).nOut(256) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX)
.activation(Activation.SOFTSIGN) .nIn(50) .nOut(50) .weightInit(WeightInit.XAVIER) .updater(new AdaGrad.Builder().learningRate(0.008).build())
case CUDNN_LSTM: rnnBuilder = new LSTM.Builder(); ((LSTM.Builder) rnnBuilder).nOut(numHiddenNodes).learningRateDecayPolicy(LEARNING_RATE_POLICY); break; case DL4J_BidirectionalGraves:
build.addLayer(lstmLayerName, new LSTM.Builder() .nIn(numLSTMInputNodes) .nOut(numLSTMIndelHiddenNodes) .build(), lstmPreviousLayerName);
.updater(new RmsProp.Builder().learningRate(0.1).build()) .list() .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }