@Override public Layer instantiate(NeuralNetConfiguration conf, Collection<IterationListener> iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { LayerValidation.assertNInNOutSet("DenseLayer", getLayerName(), layerIndex, getNIn(), getNOut()); org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer ret = new org.deeplearning4j.nn.layers.feedforward.dense.DenseLayer(conf); ret.setListeners(iterationListeners); ret.setIndex(layerIndex); ret.setParamsViewArray(layerParamsView); Map<String, INDArray> paramTable = initializer().init(conf, layerParamsView, initializeParams); ret.setParamTable(paramTable); ret.setConf(conf); return ret; }
@Override public LayerMemoryReport getMemoryReport(InputType inputType) { InputType outputType = getOutputType(-1, inputType); int numParams = initializer().numParams(this); int updaterStateSize = (int) getIUpdater().stateSize(numParams); int trainSizeFixed = 0; int trainSizeVariable = 0; if (getDropOut() > 0) { if (false) { //TODO drop connect //Dup the weights... note that this does NOT depend on the minibatch size... trainSizeVariable += 0; //TODO } else { //Assume we dup the input trainSizeVariable += inputType.arrayElementsPerExample(); } } //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size // which is modified in-place by activation function backprop // then we have 'epsilonNext' which is equivalent to input size trainSizeVariable += outputType.arrayElementsPerExample(); return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, outputType) .standardMemory(numParams, updaterStateSize) .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer .build(); }