@Override public INDArray axpy(Number da, INDArray dx, INDArray dy) { // if(!dx.isVector()) // throw new IllegalArgumentException("Unable to use axpy on a non vector"); LinAlgExceptions.assertSameLength(dx, dy); level1().axpy(dx.length(), da.doubleValue(), dx, dy); return dy; }
@Override public INDArray axpy(double da, INDArray dx, INDArray dy) { LinAlgExceptions.assertVector(dx, dy); if (dx.data().dataType() == DataBuffer.Type.FLOAT) return axpy((float) da, dx, dy); level1().axpy(dx.length(), da, dx, dy); return dy; }
@Override public INDArray axpy(float da, INDArray dx, INDArray dy) { LinAlgExceptions.assertVector(dx, dy); if (dx.data().dataType() == DataBuffer.Type.DOUBLE) return axpy((double) da, dx, dy); level1().axpy(dx.length(), da, dx, dy); return dy; }
/** * computes a vector-scalar product and adds the result to a vector. * * @param n * @param alpha * @param x * @param y */ @Override public void axpy(long n, double alpha, INDArray x, INDArray y) { if (Nd4j.getExecutioner().getProfilingMode() == OpExecutioner.ProfilingMode.ALL) OpProfiler.getInstance().processBlasCall(false, x, y); if (x.isSparse() && !y.isSparse()) { Nd4j.getSparseBlasWrapper().level1().axpy(n, alpha, x, y); } else if (x.data().dataType() == DataBuffer.Type.DOUBLE) { DefaultOpExecutioner.validateDataType(DataBuffer.Type.DOUBLE, x, y); daxpy(n, alpha, x, BlasBufferUtil.getBlasStride(x), y, BlasBufferUtil.getBlasStride(y)); } else if (x.data().dataType() == DataBuffer.Type.FLOAT) { DefaultOpExecutioner.validateDataType(DataBuffer.Type.FLOAT, x, y); saxpy(n, (float) alpha, x, BlasBufferUtil.getBlasStride(x), y, BlasBufferUtil.getBlasStride(y)); } else { DefaultOpExecutioner.validateDataType(DataBuffer.Type.HALF, x, y); haxpy(n, (float) alpha, x, BlasBufferUtil.getBlasStride(x), y, BlasBufferUtil.getBlasStride(y)); } }
@Override public INDArray axpy(Number da, INDArray dx, INDArray dy) { // if(!dx.isVector()) // throw new IllegalArgumentException("Unable to use axpy on a non vector"); LinAlgExceptions.assertSameLength(dx, dy); level1().axpy(dx.length(), da.doubleValue(), dx, dy); return dy; }
/**Does x = x + stepSize * line * @param step step size. */ @Override public void step(INDArray parameters, INDArray searchDirection, double step) { Nd4j.getBlasWrapper().level1().axpy(searchDirection.length(), step, searchDirection, parameters); }
@Override public void step(INDArray parameters, INDArray searchDirection, double step) { Nd4j.getBlasWrapper().level1().axpy(searchDirection.length(), -step, searchDirection, parameters); }
@Override public INDArray axpy(double da, INDArray dx, INDArray dy) { LinAlgExceptions.assertVector(dx, dy); if (dx.data().dataType() == DataBuffer.Type.FLOAT) return axpy((float) da, dx, dy); level1().axpy(dx.length(), da, dx, dy); return dy; }
@Override public INDArray axpy(float da, INDArray dx, INDArray dy) { LinAlgExceptions.assertVector(dx, dy); if (dx.data().dataType() == DataBuffer.Type.DOUBLE) return axpy((double) da, dx, dy); level1().axpy(dx.length(), da, dx, dy); return dy; }
blasL1.axpy(dLdxzb.length(), scaleFactor, dpdpxz.sum(0), dLdxzb); blasL1.axpy(dLdB.length(), scaleFactor, currentDelta.sum(0), dLdB); blasL1.axpy(dLdZXMeanb.length(), scaleFactor, pzxActivationFn .backprop(fwd.getPzxMeanPreOut().dup(), dLdz.add(meanZ)).getFirst().sum(0), dLdZXMeanb); blasL1.axpy(dLdZXLogStdev2b.length(), scaleFactor, dLdPreLogSigma2.sum(0), dLdZXLogStdev2b); blasL1.axpy(dLdB.length(), scaleFactor, currentDelta.sum(0), dLdB);
/** * Apply L1 and L2 regularization, if necessary. Note that L1/L2 may differ for different layers in the same block * * @param layer The layer to apply L1/L2 to * @param paramName Parameter name in the given layer * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ public void postApply(Layer layer, String paramName, INDArray gradientView, INDArray paramsView) { NeuralNetConfiguration conf = layer.conf(); //TODO: do this for multiple contiguous params/layers (fewer, larger ops) double l2 = conf.getL2ByParam(paramName); if (conf.isUseRegularization() && l2 > 0) { //This can be an axpy op, saving an allocation... //gradientView += params * l2 i.e., dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function //Equivalent to gradientView.addi(paramsView.mul(conf.getL2ByParam(paramName))); int length = gradientView.length(); Nd4j.getBlasWrapper().level1().axpy(length, l2, paramsView, gradientView); } if (conf.isUseRegularization() && conf.getL1ByParam(paramName) > 0) { gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } }
if (iTimeIndex != timeSeriesLength - 1 && hasPeepholeConnections) { nablaCellState = deltafNext.dup('f').muliRowVector(wFFTranspose); l1BLAS.axpy(nablaCellState.length(), 1.0, deltagNext.dup('f').muliRowVector(wGGTranspose), nablaCellState); } else { l1BLAS.axpy(nablaCellState.length(), 1.0, temp, nablaCellState); if (hasPeepholeConnections) { INDArray deltaMulRowWOO = deltao.dup('f').muliRowVector(wOOTranspose); l1BLAS.axpy(nablaCellState.length(), 1.0, deltaMulRowWOO, nablaCellState); //nablaCellState.addi(deltao.mulRowVector(wOOTranspose)); l1BLAS.axpy(length, 1.0, nextForgetGateAs.muli(nablaCellStateNext), nablaCellState); //nablaCellState.addi(nextForgetGateAs.mul(nablaCellStateNext)) l1BLAS.axpy(hiddenLayerSize, 1.0, dLdwFF, rwGradientsFF); //rwGradients[4].addi(dLdwFF); //dL/dw_{FF} INDArray dLdwGG = deltag.dup('f').muli(prevMemCellState).sum(0); l1BLAS.axpy(hiddenLayerSize, 1.0, dLdwGG, rwGradientsGG); //rwGradients[6].addi(dLdwGG); l1BLAS.axpy(hiddenLayerSize, 1.0, dLdwOO, rwGradientsOO); //rwGradients[5].addi(dLdwOO); //dL/dw_{OOxy} l1BLAS.axpy(4 * hiddenLayerSize, 1.0, deltaifogNext.sum(0), bGradientsOut); } else { l1BLAS.axpy(hiddenLayerSize, 1.0, deltai.sum(0), bGradientsOut); //Sneaky way to do bGradients_i += deltai.sum(0) INDArray ogBiasToAdd = deltaifogNext.get(NDArrayIndex.all(), NDArrayIndex.interval(2 * hiddenLayerSize, 4 * hiddenLayerSize)).sum(0); INDArray ogBiasGrad = bGradientsOut.get(NDArrayIndex.point(0), NDArrayIndex.interval(2 * hiddenLayerSize, 4 * hiddenLayerSize)); l1BLAS.axpy(2 * hiddenLayerSize, 1.0, ogBiasToAdd, ogBiasGrad);
if (hasPeepholeConnections) { INDArray pmcellWFF = prevMemCellState.dup('f').muliRowVector(wFFTranspose); l1BLAS.axpy(pmcellWFF.length(), 1.0, pmcellWFF, forgetGateActivations); //y = a*x + y i.e., forgetGateActivations.addi(pmcellWFF) if (hasPeepholeConnections) { INDArray pmcellWGG = prevMemCellState.dup('f').muliRowVector(wGGTranspose); l1BLAS.axpy(pmcellWGG.length(), 1.0, pmcellWGG, inputModGateActivations); //inputModGateActivations.addi(pmcellWGG) inputModMulInput = inputModGateActivations.muli(inputActivations); l1BLAS.axpy(currentMemoryCellState.length(), 1.0, inputModMulInput, currentMemoryCellState); //currentMemoryCellState.addi(inputModMulInput) if (hasPeepholeConnections) { INDArray pmcellWOO = currentMemoryCellState.dup('f').muliRowVector(wOOTranspose); l1BLAS.axpy(pmcellWOO.length(), 1.0, pmcellWOO, outputGateActivations); //outputGateActivations.addi(pmcellWOO)