public static ComputationGraphConfiguration getConf() { ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("in"); String[] poolNames = new String[ngramFilters.length]; int i = 0; for (int ngram : ngramFilters) { String filterName = String.format("ngram%d", ngram); poolNames[i] = String.format("pool%d", ngram); builder = builder.addLayer(filterName, new Convolution1DLayer.Builder() .nOut(numFilters) .kernelSize(ngram) .activation(Activation.RELU) .build(), "in") .addLayer(poolNames[i], new GlobalPoolingLayer.Builder(PoolingType.MAX).build(), filterName); i++; } return builder.addVertex("concat", new MergeVertex(), poolNames) .addLayer("predict", new DenseLayer.Builder().nOut(numClasses).dropOut(dropoutRetain) .activation(Activation.SOFTMAX).build(), "concat") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "predict") .setOutputs("loss") .setInputTypes(InputType.recurrent(W2V_VECTOR_SIZE, 1000)) .build(); } }
@Override public MemoryReport getMemoryReport(InputType... inputTypes) { InputType outputType = getOutputType(-1, inputTypes); //TODO multiple input types return new LayerMemoryReport.Builder(null, MergeVertex.class, inputTypes[0], outputType).standardMemory(0, 0) //No params .workingMemory(0, 0, 0, 0) //No working memory in addition to activations/epsilons .cacheMemory(0, 0) //No caching .build(); } }
.nOut(cnnLayerFeatureMaps) .build(), "input") .addVertex("merge", new MergeVertex(), "cnn3", "cnn4", "cnn5") //Perform depth concatenation .addLayer("globalPool", new GlobalPoolingLayer.Builder() .poolingType(globalPoolingType)
@Override public MergeVertex clone() { return new MergeVertex(); }
/** * Add a {@link GraphVertex} to the network configuration. A GraphVertex defines forward and backward pass methods, * and can contain a {@link LayerVertex}, a {@link org.deeplearning4j.nn.conf.graph.ElementWiseVertex} to do element-wise * addition/subtraction, a {@link MergeVertex} to combine/concatenate the activations out of multiple layers or vertices, * a {@link org.deeplearning4j.nn.conf.graph.SubsetVertex} to select a subset of the activations out of another layer/GraphVertex.<br> * Custom GraphVertex objects (that extend the abstract {@link GraphVertex} class) may also be used. * * @param vertexName The name of the GraphVertex to add * @param vertex The GraphVertex to add * @param vertexInputs The inputs/activations to this GraphVertex */ public GraphBuilder addVertex(String vertexName, GraphVertex vertex, String... vertexInputs) { vertices.put(vertexName, vertex); //Automatically insert a MergeNode if this vertex can only take 1 input (layer vertices, etc) if (vertex.maxVertexInputs() == 1 && vertexInputs != null && vertexInputs.length > 1) { String mergeName = vertexName + "-merge"; addVertex(mergeName, new MergeVertex(), vertexInputs); this.vertexInputs.put(vertexName, Collections.singletonList(mergeName)); } else if (vertexInputs != null) { this.vertexInputs.put(vertexName, Arrays.asList(vertexInputs)); } return this; }
/** * Constructor from parsed Keras layer configuration dictionary. * * @param layerConfig dictionary containing Keras layer configuration * @param enforceTrainingConfig whether to enforce training-related configuration options * @throws InvalidKerasConfigurationException * @throws UnsupportedKerasConfigurationException */ public KerasMerge(Map<String, Object> layerConfig, boolean enforceTrainingConfig) throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException { super(layerConfig, enforceTrainingConfig); this.mergeMode = getMergeMode(layerConfig); if (this.mergeMode == null) this.vertex = new MergeVertex(); else this.vertex = new ElementWiseVertex(mergeMode); }
gb.addVertex(mergeVertexName, new MergeVertex(), names); currentInput = mergeVertexName;
private GraphBuilder inception(GraphBuilder graph, String name, int inputSize, int[][] config, String inputLayer) { graph.addLayer(name + "-cnn1", conv1x1(inputSize, config[0][0], 0.2), inputLayer) .addLayer(name + "-cnn2", c3x3reduce(inputSize, config[1][0], 0.2), inputLayer) .addLayer(name + "-cnn3", c5x5reduce(inputSize, config[2][0], 0.2), inputLayer) .addLayer(name + "-max1", maxPool3x3(1), inputLayer) .addLayer(name + "-cnn4", conv3x3(config[1][0], config[1][1], 0.2), name + "-cnn2") .addLayer(name + "-cnn5", conv5x5(config[2][0], config[2][1], 0.2), name + "-cnn3") .addLayer(name + "-cnn6", conv1x1(inputSize, config[3][0], 0.2), name + "-max1") .addVertex(name + "-depthconcat1", new MergeVertex(), name + "-cnn1", name + "-cnn4", name + "-cnn5", name + "-cnn6"); return graph; }
build.addVertex("lstmFeedForwardMerge", new MergeVertex(), mergeInputs); int numInputsToDenseAfterMerge = numInputs + (lstmInputNames.length * numLSTMIndelHiddenNodes); assembler.assemble(numInputsToDenseAfterMerge, numHiddenNodes,
build.addVertex("lstmFeedForwardMerge", new MergeVertex(), mergeInputs); int numInputsToDenseAfterMerge = numInputs + (lstmInputNames.length * numLSTMIndelHiddenNodes); assembler.assemble(numInputsToDenseAfterMerge, numHiddenNodes,
graph.addVertex(getModuleName(moduleLayerName), new MergeVertex(), getModuleName(moduleLayerName) + "-transfer2-0", getModuleName(moduleLayerName) + "-transfer3", getModuleName(moduleLayerName) + "-transfer4"); } else if (kernelSize.length == 2 && reduceSize.length == 2) { graph.addVertex(getModuleName(moduleLayerName), new MergeVertex(), getModuleName(moduleLayerName) + "-transfer2-0", getModuleName(moduleLayerName) + "-transfer2-1"); } else if (kernelSize.length == 2 && reduceSize.length == 3) { graph.addVertex(getModuleName(moduleLayerName), new MergeVertex(), getModuleName(moduleLayerName) + "-transfer2-0", getModuleName(moduleLayerName) + "-transfer2-1", getModuleName(moduleLayerName) + "-transfer3"); } else if (kernelSize.length == 2 && reduceSize.length == 4) { graph.addVertex(getModuleName(moduleLayerName), new MergeVertex(), getModuleName(moduleLayerName) + "-transfer2-0", getModuleName(moduleLayerName) + "-transfer2-1",
new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "inception-3b") .addVertex("inception-3c", new MergeVertex(), "3c-transfer2", "3c-2-transfer4", "3c-pool"); new int[] {3, 3}, new int[] {2, 2}, new int[] {1, 1}).build(), "inception-4a") .addVertex("inception-4e", new MergeVertex(), "4e-transfer2", "4e-2-transfer4", "4e-pool"); "5a-3-1x1reduce-norm") .addVertex("inception-5a", new MergeVertex(), "5a-transfer1", "5a-transfer3", "5a-3-transfer4"); "5b-3-1x1reduce-norm") .addVertex("inception-5b", new MergeVertex(), "5b-transfer1", "5b-2-transfer3", "5b-3-transfer4");
nameLayer(blockName, "cnn4", i)) .addVertex(nameLayer(blockName, "merge1", i), new MergeVertex(), nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch4", i)) .addLayer(nameLayer(blockName, "cnn5", i),
nameLayer(blockName, "cnn4", i)) .addVertex(nameLayer(blockName, "merge1", i), new MergeVertex(), nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch4", i)) .addLayer(nameLayer(blockName, "cnn5", i),
"resnetA") .addVertex("reduceA", new MergeVertex(), "reduceA-batch1", "reduceA-batch4", "reduceA-pool5"); "reduceB-cnn8") .addVertex("reduceB", new MergeVertex(), "reduceB-pool1", "reduceB-batch2", "reduceB-batch4", "reduceB-batch7");
nameLayer(blockName, "cnn6", i)) .addVertex(nameLayer(blockName, "merge1", i), new MergeVertex(), nameLayer(blockName, "batch1", i), nameLayer(blockName, "batch3", i), nameLayer(blockName, "batch6", i))