public static MultiLayerConfiguration lenetModelConf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return conf; } public static void saveModel(FileSystem fs, Model model ) throws Exception{
protected INDArray createUserWeightMatrix(NeuralNetConfiguration conf, INDArray weightParamView, boolean initializeParameters) { FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); if (initializeParameters) { Distribution dist = Distributions.createDistribution(layerConf.getDist()); return createWeightMatrix(numUsers, layerConf.getNOut(), layerConf.getWeightInit(), dist, weightParamView, true); } else { return createWeightMatrix(numUsers, layerConf.getNOut(), null, null, weightParamView, false); } }
public static ComputationGraphConfiguration getConf() { ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("in"); String[] poolNames = new String[ngramFilters.length]; int i = 0; for (int ngram : ngramFilters) { String filterName = String.format("ngram%d", ngram); poolNames[i] = String.format("pool%d", ngram); builder = builder.addLayer(filterName, new Convolution1DLayer.Builder() .nOut(numFilters) .kernelSize(ngram) .activation(Activation.RELU) .build(), "in") .addLayer(poolNames[i], new GlobalPoolingLayer.Builder(PoolingType.MAX).build(), filterName); i++; } return builder.addVertex("concat", new MergeVertex(), poolNames) .addLayer("predict", new DenseLayer.Builder().nOut(numClasses).dropOut(dropoutRetain) .activation(Activation.SOFTMAX).build(), "concat") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "predict") .setOutputs("loss") .setInputTypes(InputType.recurrent(W2V_VECTOR_SIZE, 1000)) .build(); } }
.weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build()) .list() .layer(0, new ConvolutionLayer.Builder(5, 5) .nIn(nChannels) .stride(1, 1) .nOut(20) .activation(Activation.IDENTITY) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(2, new ConvolutionLayer.Builder(5, 5) .stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(4, new DenseLayer.Builder().activation(Activation.RELU) .nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum)
.seed(12345) .l2(0.001) //l2 regularization on all layers .updater(new AdaGrad.Builder().learningRate(0.04).build()) .list() .layer(0, new ConvolutionLayer.Builder(10, 10) .nIn(3) //3 channels: RGB .nOut(30) .stride(4, 4) .activation(Activation.RELU) .weightInit(WeightInit.RELU) .build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3) .stride(2, 2).build()) //(31-3+0)/2+1 = 15 .layer(2, new ConvolutionLayer.Builder(3, 3) .nIn(30) .nOut(10) .stride(2, 2) .activation(Activation.RELU) .weightInit(WeightInit.RELU) .build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 .layer(3, new DenseLayer.Builder() .activation(Activation.RELU) .nIn(490) .nOut(50) .weightInit(WeightInit.RELU) .updater(new AdaGrad.Builder().learningRate(0.01).build()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10)
.graphBuilder() .addInputs("input") .addLayer("cnn3", new ConvolutionLayer.Builder() .kernelSize(3,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addLayer("cnn4", new ConvolutionLayer.Builder() .kernelSize(4,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addLayer("cnn5", new ConvolutionLayer.Builder() .kernelSize(5,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addVertex("merge", new MergeVertex(), "cnn3", "cnn4", "cnn5") //Perform depth concatenation .addLayer("globalPool", new GlobalPoolingLayer.Builder() .poolingType(globalPoolingType) .build(), "merge") .addLayer("out", new OutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE) .activation(Activation.RELU) .nIn(3*cnnLayerFeatureMaps) .nOut(numFactors)
.l2(lambdaReg) .list() .layer(0, new DenseLayer.Builder().nIn(inputDim).nOut(hiddenDim) .activation(Activation.fromString(hiddenActivation)) .biasInit(0.1) .build()) .layer(1, new OutputLayer.Builder(new AutoRecLossFunction()).nIn(hiddenDim).nOut(inputDim) .activation(Activation.fromString(outputActivation)) .biasInit(0.1) .build()) .pretrain(false).backprop(true) .build();
.l2(1e-4) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(500).build()) .layer(1, new DenseLayer.Builder().nIn(500).nOut(100).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(100).nOut(10).build()) .pretrain(false).backprop(true) .build();
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
DarknetHelper.addLayers(b, 5, 2, 256, 512, 2); //8x8 out b.addLayer("convolution2d_6", new ConvolutionLayer.Builder(1, 1) .nIn(512) .nOut(TinyImageNetFetcher.NUM_LABELS) .weightInit(WeightInit.XAVIER) .stride(1, 1) .activation(Activation.IDENTITY) .build(), "maxpooling2d_5") .addLayer("globalpooling", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "convolution2d_6") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).build(), "globalpooling") .setOutputs("loss");
.l2(lambdaReg) .list() .layer(0, new CDAELayer.Builder().nIn(inputDim).nOut(hiddenDim) .activation(Activation.fromString(hiddenActivation)) .setNumUsers(numUsers) .build()) .layer(1, new OutputLayer.Builder().nIn(hiddenDim).nOut(inputDim) .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.fromString(outputActivation)) .build()) .pretrain(false).backprop(true) .build();
public Map<String, INDArray> getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { Map<String, INDArray> out = super.getGradientsFromFlattened(conf, gradientView); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; int nUserWeightParams = numUsers * nOut; INDArray userWeightGradientView = gradientView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams)) .reshape('f', numUsers, nOut); out.put(USER_WEIGHT_KEY, userWeightGradientView); return out; } }
public int numParams(NeuralNetConfiguration conf) { FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); return super.numParams(conf) + numUsers * layerConf.getNOut(); // plus another user weight matrix }
if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) { W = Dropout.applyDropConnect(this, CDAEParamInitializer.WEIGHT_KEY);
.weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build()) .list() .layer(0, new ConvolutionLayer.Builder(5, 5) .nIn(nChannels) .stride(1, 1) .nOut(20) .activation(Activation.IDENTITY) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(2, new ConvolutionLayer.Builder(5, 5) .stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(4, new DenseLayer.Builder().activation(Activation.RELU) .nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum)
public static MultiLayerNetwork lenetModel() { /** * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5 **/ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) // tried 0.0001, 0.0005 .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001,0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return new MultiLayerNetwork(conf); }
.l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp.Builder().learningRate(0.1).build()) .list() .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true)
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = super.init(conf, paramsView, initializeParams); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; int nUserWeightParams = numUsers * nOut; INDArray userWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams)); params.put(USER_WEIGHT_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams)); conf.addVariable(USER_WEIGHT_KEY); return params; }
.weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build()) .list() .layer(0, new ConvolutionLayer.Builder(5, 5) .nIn(nChannels) .stride(1, 1) .nOut(20) .activation(Activation.IDENTITY) .build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(2, new ConvolutionLayer.Builder(5, 5) .stride(1, 1) .nOut(50) .activation(Activation.IDENTITY) .build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2,2) .stride(2,2) .build()) .layer(4, new DenseLayer.Builder().activation(Activation.RELU) .nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(outputNum)
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .list() .layer(0, new LSTM.Builder().nIn(vectorSize).nOut(256) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(256).nOut(2).build()) .pretrain(false).backprop(true).build();