.updater(new RmsProp(0.1)) .list() .layer(0, new LSTM.Builder().nIn(CHAR_TO_INT.size()).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification
.weightInit(WeightInit.RELU) .layer(3, new DenseLayer.Builder() .activation(Activation.RELU) .nIn(490)
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
switch (arguments.rnnKind) { case CUDNN_LSTM: rnnBuilder = new LSTM.Builder(); ((LSTM.Builder) rnnBuilder).nOut(numHiddenNodes).learningRateDecayPolicy(LEARNING_RATE_POLICY); break;
String lstmPreviousLayerName = i == 0 ? "indel" : "lstmindel_" + (i - 1); int numLSTMInputNodes = i == 0 ? numLSTMInputs : numLSTMIndelHiddenNodes; build.addLayer(lstmLayerName, new LSTM.Builder() .nIn(numLSTMInputNodes) .nOut(numLSTMIndelHiddenNodes)
.l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp.Builder().learningRate(0.1).build()) .list() .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize)