private void testShipStrategyCombinationsWithUnknowns(EstimateProvider knownEstimates) { Costs hashBoth = new Costs(); Costs bcKnown10 = new Costs(); Costs bcUnknown10 = new Costs(); Costs bcKnown1000 = new Costs(); Costs bcUnknown1000 = new Costs(); assertTrue(hashBoth.compareTo(bcKnown10) < 0); assertTrue(hashBoth.compareTo(bcUnknown10) < 0); assertTrue(hashBoth.compareTo(bcKnown1000) < 0); assertTrue(hashBoth.compareTo(bcUnknown1000) < 0); assertTrue(bcKnown10.compareTo(bcUnknown10) == 0); assertTrue(bcKnown1000.compareTo(bcUnknown1000) == 0); assertTrue(bcKnown10.compareTo(bcKnown1000) < 0); assertTrue(bcUnknown10.compareTo(bcUnknown1000) < 0);
@Override public void addHybridHashCosts(EstimateProvider buildSideInput, EstimateProvider probeSideInput, Costs costs, int costWeight) { long bs = buildSideInput.getEstimatedOutputSize(); long ps = probeSideInput.getEstimatedOutputSize(); if (bs > 0 && ps > 0) { long overall = 2*bs + ps; costs.addDiskCost(overall); costs.addCpuCost((long) (overall * HASHING_CPU_FACTOR)); } else { costs.setDiskCost(Costs.UNKNOWN); costs.setCpuCost(Costs.UNKNOWN); } costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE); costs.addHeuristicCpuCost((long) (2 * HEURISTIC_COST_BASE * HASHING_CPU_FACTOR)); // cost weight applies to everything costs.multiplyWith(costWeight); }
public void setCosts(Costs nodeCosts) { // the plan enumeration logic works as for regular two-input-operators, which is important // because of the branch handling logic. it does pick redistributing network channels // between the sink and the sink joiner, because sinks joiner has a different parallelism than the sink. // we discard any cost and simply use the sum of the costs from the two children. Costs totalCosts = getInput1().getSource().getCumulativeCosts().clone(); totalCosts.addCosts(getInput2().getSource().getCumulativeCosts()); super.setCosts(totalCosts); }
@Override public void addFileInputCost(long fileSizeInBytes, Costs costs) { if (fileSizeInBytes >= 0) { costs.addDiskCost(fileSizeInBytes); } else { costs.setDiskCost(Costs.UNKNOWN); } costs.addHeuristicDiskCost(HEURISTIC_COST_BASE); }
@Override public void addLocalSortCost(EstimateProvider estimates, Costs costs) { final long s = estimates.getEstimatedOutputSize(); // we assume a two phase merge sort, so all in all 2 I/O operations per block if (s <= 0) { costs.setDiskCost(Costs.UNKNOWN); costs.setCpuCost(Costs.UNKNOWN); } else { costs.addDiskCost(2 * s); costs.addCpuCost((long) (s * SORTING_CPU_FACTOR)); } costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE); costs.addHeuristicCpuCost((long) (HEURISTIC_COST_BASE * SORTING_CPU_FACTOR)); }
@Override public void addRandomPartitioningCost(EstimateProvider estimates, Costs costs) { // conservative estimate: we need ship the whole data over the network to establish the // partitioning. no disk costs. final long estOutShipSize = estimates.getEstimatedOutputSize(); if (estOutShipSize <= 0) { costs.setNetworkCost(Costs.UNKNOWN); } else { costs.addNetworkCost(estOutShipSize); } costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE); }
final Costs totalCosts = new Costs(); final long availableMemory = n.getGuaranteedAvailableMemory(); final Costs costs = new Costs(); costs.multiplyWith(channel.getCostWeight()); totalCosts.addCosts(costs); Costs driverCosts = new Costs(); int costWeight = 1; totalCosts.addCosts(driverCosts); n.setCosts(totalCosts);
@Override public void addRangePartitionCost(EstimateProvider estimates, Costs costs) { final long dataSize = estimates.getEstimatedOutputSize(); if (dataSize > 0) { // Assume sampling of 10% of the data and spilling it to disk final long sampled = (long) (dataSize * 0.1f); // set shipping costs costs.addNetworkCost(dataSize + sampled); } else { costs.setNetworkCost(Costs.UNKNOWN); } // no costs known. use the same assumption as above on the heuristic costs final long sampled = (long) (HEURISTIC_COST_BASE * 0.1f); costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE + sampled); costs.addHeuristicDiskCost(2 * sampled); }
private void testJoinCostFormulasWithWeights(EstimateProvider e1, EstimateProvider e2) { Costs hf1 = new Costs(); Costs hf5 = new Costs(); Costs hs1 = new Costs(); Costs hs5 = new Costs(); Costs mm1 = new Costs(); Costs mm5 = new Costs(); mm5.multiplyWith(5); costEstimator.addLocalMergeCost(e1, e2, mm5, 5); assertTrue(hf1.compareTo(hf5) < 0); assertTrue(hs1.compareTo(hs5) < 0); assertTrue(mm1.compareTo(mm5) < 0); assertTrue(hf1.compareTo(mm1) < 0); assertTrue(hs1.compareTo(mm1) < 0); assertTrue(hf5.compareTo(mm5) < 0); assertTrue(hs5.compareTo(mm5) < 0);
/** * @param template */ public NAryUnionPlanNode(BinaryUnionNode template, List<Channel> inputs, GlobalProperties gProps, Costs cumulativeCosts) { super(template, "Union", DriverStrategy.NONE); this.inputs = inputs; this.globalProps = gProps; this.localProps = new LocalProperties(); this.nodeCosts = new Costs(); this.cumulativeCosts = cumulativeCosts; }
public void setCosts(Costs nodeCosts) { // add the costs from the step function nodeCosts.addCosts(this.rootOfStepFunction.getCumulativeCosts()); // add the costs for the termination criterion, if it exists // the costs are divided at branches, so we can simply add them up if (rootOfTerminationCriterion != null) { nodeCosts.addCosts(this.rootOfTerminationCriterion.getCumulativeCosts()); } super.setCosts(nodeCosts); }
if (cheapest == null || (cheapest.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) { cheapest = candidate; if (cheapestForGlobal[i] == null || (cheapestForGlobal[i].getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) { cheapestForGlobal[i] = candidate; if (lps[k].isMetBy(candidate.getLocalProperties())) { final PlanNode previous = localMatches[k]; if (previous == null || previous.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0) {
@Override public void addHashPartitioningCost(EstimateProvider estimates, Costs costs) { // conservative estimate: we need ship the whole data over the network to establish the // partitioning. no disk costs. final long estOutShipSize = estimates.getEstimatedOutputSize(); if (estOutShipSize <= 0) { costs.setNetworkCost(Costs.UNKNOWN); } else { costs.addNetworkCost(estOutShipSize); } costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE); }
final Costs totalCosts = new Costs(); final long availableMemory = n.getGuaranteedAvailableMemory(); final Costs costs = new Costs(); costs.multiplyWith(channel.getCostWeight()); totalCosts.addCosts(costs); Costs driverCosts = new Costs(); int costWeight = 1; totalCosts.addCosts(driverCosts); n.setCosts(totalCosts);
@Override public void addRangePartitionCost(EstimateProvider estimates, Costs costs) { final long dataSize = estimates.getEstimatedOutputSize(); if (dataSize > 0) { // Assume sampling of 10% of the data and spilling it to disk final long sampled = (long) (dataSize * 0.1f); // set shipping costs costs.addNetworkCost(dataSize + sampled); } else { costs.setNetworkCost(Costs.UNKNOWN); } // no costs known. use the same assumption as above on the heuristic costs final long sampled = (long) (HEURISTIC_COST_BASE * 0.1f); costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE + sampled); costs.addHeuristicDiskCost(2 * sampled); }
@Override public void addFileInputCost(long fileSizeInBytes, Costs costs) { if (fileSizeInBytes >= 0) { costs.addDiskCost(fileSizeInBytes); } else { costs.setDiskCost(Costs.UNKNOWN); } costs.addHeuristicDiskCost(HEURISTIC_COST_BASE); }
candidate.updatePropertiesWithUniqueSets(getUniqueFields()); final Costs costs = new Costs(); if (FileInputFormat.class.isAssignableFrom(getOperator().getFormatWrapper().getUserCodeClass()) && this.estimatedOutputSize >= 0) { } else { final Costs costs = new Costs(); InputFormat<?,?> inputFormat = ((ReplicatingInputFormat<?,?>) getOperator().getFormatWrapper().getUserCodeObject()).getReplicatedInputFormat();
if (cheapest == null || (cheapest.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) { cheapest = candidate; if (cheapestForGlobal[i] == null || (cheapestForGlobal[i].getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0)) { cheapestForGlobal[i] = candidate; if (lps[k].isMetBy(candidate.getLocalProperties())) { final PlanNode previous = localMatches[k]; if (previous == null || previous.getCumulativeCosts().compareTo(candidate.getCumulativeCosts()) > 0) {
@Override public void addArtificialDamCost(EstimateProvider estimates, long bufferSize, Costs costs) { final long s = estimates.getEstimatedOutputSize(); // we assume spilling and re-reading if (s <= 0) { costs.setDiskCost(Costs.UNKNOWN); costs.setCpuCost(Costs.UNKNOWN); } else { costs.addDiskCost(2 * s); costs.setCpuCost((long) (s * MATERIALIZATION_CPU_FACTOR)); } costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE); costs.addHeuristicCpuCost((long) (HEURISTIC_COST_BASE * MATERIALIZATION_CPU_FACTOR)); } }