.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();
.build()) .inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)) .inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)) .inputPreProcessor(4, new FeedForwardToRnnPreProcessor()) .pretrain(false).backprop(true) .backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(V_NFRAMES / 5) .tBPTTBackwardLength(V_NFRAMES / 5) .build();
.backprop(true).pretrain(false).setInputType(InputType .convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) .build();
.backprop(true).pretrain(false).setInputType(InputType .convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) .build();
listBuilder.backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(truncatedBPTT) .tBPTTBackwardLength(truncatedBPTT); else listBuilder.backpropType(BackpropType.Standard);
.name("output").nOut(numLabels).activation(Activation.SOFTMAX).build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])).build();
private void initHelperMLN() { if (applyFrozen) { org.deeplearning4j.nn.api.Layer[] layers = origMLN.getLayers(); for (int i = frozenTill; i >= 0; i--) { //unchecked? layers[i] = new FrozenLayer(layers[i]); } origMLN.setLayers(layers); } for (int i = 0; i < origMLN.getnLayers(); i++) { if (origMLN.getLayer(i) instanceof FrozenLayer) { frozenInputLayer = i; } } List<NeuralNetConfiguration> allConfs = new ArrayList<>(); for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { allConfs.add(origMLN.getLayer(i).conf()); } MultiLayerConfiguration c = origMLN.getLayerWiseConfigurations(); unFrozenSubsetMLN = new MultiLayerNetwork(new MultiLayerConfiguration.Builder().backprop(c.isBackprop()) .inputPreProcessors(c.getInputPreProcessors()).pretrain(c.isPretrain()) .backpropType(c.getBackpropType()).tBPTTForwardLength(c.getTbpttFwdLength()) .tBPTTBackwardLength(c.getTbpttBackLength()).confs(allConfs).build()); unFrozenSubsetMLN.init(); //copy over params for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).params()); } //unFrozenSubsetMLN.setListeners(origMLN.getListeners()); }
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode).seed(seed).iterations(iterations) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new AdaDelta()) .regularization(false).convolutionMode(ConvolutionMode.Same).list() // block 1 .layer(0, new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}).name("cnn1") .nIn(inputShape[0]).nOut(20).activation(Activation.RELU).build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}, new int[] {2, 2}).name("maxpool1").build()) // block 2 .layer(2, new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}).name("cnn2").nOut(50) .activation(Activation.RELU).build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}, new int[] {2, 2}).name("maxpool2").build()) // fully connected .layer(4, new DenseLayer.Builder().name("ffn1").activation(Activation.RELU).nOut(500).build()) // output .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).name("output") .nOut(numLabels).activation(Activation.SOFTMAX) // radial basis function required .build()) .setInputType(InputType.convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) .backprop(true).pretrain(false).build(); return conf; }
return new MultiLayerConfiguration.Builder().backprop(backprop).inputPreProcessors(inputPreProcessors) .pretrain(pretrain).backpropType(backpropType).tBPTTForwardLength(tbpttFwdLength) .tBPTTBackwardLength(tbpttBackLength).setInputType(this.inputType) .trainingWorkspaceMode(globalConfig.trainingWorkspaceMode).cacheMode(globalConfig.cacheMode) .inferenceWorkspaceMode(globalConfig.inferenceWorkspaceMode).confs(list).build();
private MultiLayerConfiguration constructConf() { //use the editedConfs list to make a new config List<NeuralNetConfiguration> allConfs = new ArrayList<>(); allConfs.addAll(editedConfs); allConfs.addAll(appendConfs); //Set default layer names, if not set - as per NeuralNetConfiguration.ListBuilder.build() for (int i = 0; i < allConfs.size(); i++) { if (allConfs.get(i).getLayer().getLayerName() == null) { allConfs.get(i).getLayer().setLayerName("layer" + i); } } MultiLayerConfiguration conf = new MultiLayerConfiguration.Builder().inputPreProcessors(inputPreProcessors) .setInputType(this.inputType).confs(allConfs).build(); if (finetuneConfiguration != null) { finetuneConfiguration.applyToMultiLayerConfiguration(conf); } return conf; } }
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(new RmsProp()).list() .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(50).tBPTTBackwardLength(50) .pretrain(false).backprop(true).build(); return conf; }
/**When doing truncated BPTT: how many steps should we do?<br> * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)<br> * See: http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf * @param bpttLength length > 0 */ public Builder tBPTTLength(int bpttLength) { tBPTTForwardLength(bpttLength); return tBPTTBackwardLength(bpttLength); }
protected void intializeConfigurations() { if (layerWiseConfigurations == null) layerWiseConfigurations = new MultiLayerConfiguration.Builder().build(); if (layers == null) layers = new Layer[getnLayers()]; if (defaultConfiguration == null) defaultConfiguration = new NeuralNetConfiguration.Builder().build(); }
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }