org.apache.flink.optimizer.costs java code examples

/**
 * Creates a new costs object using the given values for the network and storage cost.
 * 
 * @param networkCost The network cost, in bytes to be transferred.
 * @param diskCost The cost for disk, in bytes to be written and read.
 */
public Costs(double networkCost, double diskCost) {
  setNetworkCost(networkCost);
  setDiskCost(diskCost);
}

/**
 * Creates a new optimizer instance that uses the statistics object to determine properties about the input.
 * Given those statistics, the optimizer can make better choices for the execution strategies.
 * 
 * @param stats
 *        The statistics to be used to determine the input properties.
 */
public Optimizer(DataStatistics stats, Configuration config) {
  this(stats, new DefaultCostEstimator(), config);
}

/**
 * Creates a new costs object using the given values for the network and storage cost.
 * 
 * @param networkCost The network cost, in bytes to be transferred.
 * @param diskCost The cost for disk, in bytes to be written and read.
 * @param cpuCost The cost for CPU operations.
 */
public Costs(double networkCost, double diskCost, double cpuCost) {
  setNetworkCost(networkCost);
  setDiskCost(diskCost);
  setCpuCost(cpuCost);
}

@Override
public void addLocalSortCost(EstimateProvider estimates, Costs costs) {
  final long s = estimates.getEstimatedOutputSize();
  // we assume a two phase merge sort, so all in all 2 I/O operations per block
  if (s <= 0) {
    costs.setDiskCost(Costs.UNKNOWN);
    costs.setCpuCost(Costs.UNKNOWN);
  } else {
    costs.addDiskCost(2 * s);
    costs.addCpuCost((long) (s * SORTING_CPU_FACTOR));
  }
  costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE);
  costs.addHeuristicCpuCost((long) (HEURISTIC_COST_BASE * SORTING_CPU_FACTOR));
}

private void testJoinCostFormulasWithWeights(EstimateProvider e1, EstimateProvider e2) {
  Costs hf1 = new Costs();
  Costs hf5 = new Costs();
  Costs hs1 = new Costs();
  Costs hs5 = new Costs();
  Costs mm1 = new Costs();
  Costs mm5 = new Costs();
  costEstimator.addHybridHashCosts(e1, e2, hf1, 1);
  costEstimator.addHybridHashCosts(e1, e2, hf5, 5);
  costEstimator.addHybridHashCosts(e2, e1, hs1, 1);
  costEstimator.addHybridHashCosts(e2, e1, hs5, 5);
  costEstimator.addLocalSortCost(e1, mm1);
  costEstimator.addLocalSortCost(e2, mm1);
  costEstimator.addLocalMergeCost(e1, e2, mm1, 1);
  costEstimator.addLocalSortCost(e1, mm5);
  costEstimator.addLocalSortCost(e2, mm5);
  mm5.multiplyWith(5);
  costEstimator.addLocalMergeCost(e1, e2, mm5, 5);
  assertTrue(hf1.compareTo(hf5) < 0);
  assertTrue(hs1.compareTo(hs5) < 0);
  assertTrue(mm1.compareTo(mm5) < 0);
  assertTrue(hf1.compareTo(mm1) < 0);
  assertTrue(hs1.compareTo(mm1) < 0);
  assertTrue(hf5.compareTo(mm5) < 0);
  assertTrue(hs5.compareTo(mm5) < 0);

private void testShipStrategiesIsolated(EstimateProvider estimates, int targetParallelism) {
  Costs random = new Costs();
  costEstimator.addRandomPartitioningCost(estimates, random);
  Costs hash = new Costs();
  costEstimator.addHashPartitioningCost(estimates, hash);
  Costs range = new Costs();
  costEstimator.addRangePartitionCost(estimates, range);
  Costs broadcast = new Costs();
  costEstimator.addBroadcastCost(estimates, targetParallelism, broadcast);
  int randomVsHash = random.compareTo(hash);
  int hashVsRange = hash.compareTo(range);
  int hashVsBroadcast = hash.compareTo(broadcast);
  int rangeVsBroadcast = range.compareTo(broadcast);

private void testShipStrategyCombinationsWithUnknowns(EstimateProvider knownEstimates) {
  Costs hashBoth = new Costs();
  Costs bcKnown10 = new Costs();
  Costs bcUnknown10 = new Costs();
  Costs bcKnown1000 = new Costs();
  Costs bcUnknown1000 = new Costs();
  costEstimator.addHashPartitioningCost(knownEstimates, hashBoth);
  costEstimator.addHashPartitioningCost(UNKNOWN_ESTIMATES, hashBoth);
  costEstimator.addBroadcastCost(knownEstimates, 10, bcKnown10);
  costEstimator.addBroadcastCost(UNKNOWN_ESTIMATES, 10, bcUnknown10);
  costEstimator.addBroadcastCost(knownEstimates, 1000, bcKnown1000);
  costEstimator.addBroadcastCost(UNKNOWN_ESTIMATES, 1000, bcUnknown1000);
  assertTrue(hashBoth.compareTo(bcKnown10) < 0);
  assertTrue(hashBoth.compareTo(bcUnknown10) < 0);
  assertTrue(hashBoth.compareTo(bcKnown1000) < 0);
  assertTrue(hashBoth.compareTo(bcUnknown1000) < 0);
  assertTrue(bcKnown10.compareTo(bcUnknown10) == 0);
  assertTrue(bcKnown1000.compareTo(bcUnknown1000) == 0);
  assertTrue(bcKnown10.compareTo(bcKnown1000) < 0);
  assertTrue(bcUnknown10.compareTo(bcUnknown1000) < 0);

@Override
public void addRangePartitionCost(EstimateProvider estimates, Costs costs) {
  final long dataSize = estimates.getEstimatedOutputSize();
  if (dataSize > 0) {
    // Assume sampling of 10% of the data and spilling it to disk
    final long sampled = (long) (dataSize * 0.1f);
    // set shipping costs
    costs.addNetworkCost(dataSize + sampled);
  } else {
    costs.setNetworkCost(Costs.UNKNOWN);
  }
  
  // no costs known. use the same assumption as above on the heuristic costs
  final long sampled = (long) (HEURISTIC_COST_BASE * 0.1f);
  costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE + sampled);
  costs.addHeuristicDiskCost(2 * sampled);
}

@Override
public void addRandomPartitioningCost(EstimateProvider estimates, Costs costs) {
  // conservative estimate: we need ship the whole data over the network to establish the
  // partitioning. no disk costs.
  final long estOutShipSize = estimates.getEstimatedOutputSize();
  if (estOutShipSize <= 0) {
    costs.setNetworkCost(Costs.UNKNOWN);
  } else {
    costs.addNetworkCost(estOutShipSize);
  }
  costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE);
}

@Override
public void addFileInputCost(long fileSizeInBytes, Costs costs) {
  if (fileSizeInBytes >= 0) {
    costs.addDiskCost(fileSizeInBytes);
  } else {
    costs.setDiskCost(Costs.UNKNOWN);
  }
  costs.addHeuristicDiskCost(HEURISTIC_COST_BASE);
}

/**
 * @param template
 */
public NAryUnionPlanNode(BinaryUnionNode template, List<Channel> inputs, GlobalProperties gProps,
    Costs cumulativeCosts)
{
  super(template, "Union", DriverStrategy.NONE);
  
  this.inputs = inputs;
  this.globalProps = gProps;
  this.localProps = new LocalProperties();
  this.nodeCosts = new Costs();
  this.cumulativeCosts = cumulativeCosts;
}

public void setCosts(Costs nodeCosts) {
  // add the costs from the step function
  nodeCosts.addCosts(this.rootOfStepFunction.getCumulativeCosts());
  
  // add the costs for the termination criterion, if it exists
  // the costs are divided at branches, so we can simply add them up
  if (rootOfTerminationCriterion != null) {
    nodeCosts.addCosts(this.rootOfTerminationCriterion.getCumulativeCosts());
  }
  
  super.setCosts(nodeCosts);
}

@Test
public void testShipStrategiesIsolated() {
  testShipStrategiesIsolated(UNKNOWN_ESTIMATES, 1);
  testShipStrategiesIsolated(UNKNOWN_ESTIMATES, 10);
  testShipStrategiesIsolated(ZERO_ESTIMATES, 1);
  testShipStrategiesIsolated(ZERO_ESTIMATES, 10);
  testShipStrategiesIsolated(SMALL_ESTIMATES, 1);
  testShipStrategiesIsolated(SMALL_ESTIMATES, 10);
  testShipStrategiesIsolated(BIG_ESTIMATES, 1);
  testShipStrategiesIsolated(BIG_ESTIMATES, 10);
}

@Test
public void testJoinCostFormulasWithWeights() {
  testJoinCostFormulasWithWeights(UNKNOWN_ESTIMATES, SMALL_ESTIMATES);
  testJoinCostFormulasWithWeights(SMALL_ESTIMATES, UNKNOWN_ESTIMATES);
  testJoinCostFormulasWithWeights(UNKNOWN_ESTIMATES, MEDIUM_ESTIMATES);
  testJoinCostFormulasWithWeights(MEDIUM_ESTIMATES, UNKNOWN_ESTIMATES);
  testJoinCostFormulasWithWeights(BIG_ESTIMATES, MEDIUM_ESTIMATES);
  testJoinCostFormulasWithWeights(MEDIUM_ESTIMATES, BIG_ESTIMATES);
}

@Test
public void testShipStrategyCombinationsWithUnknowns() {
  testShipStrategyCombinationsWithUnknowns(UNKNOWN_ESTIMATES);
  testShipStrategyCombinationsWithUnknowns(ZERO_ESTIMATES);
  testShipStrategyCombinationsWithUnknowns(SMALL_ESTIMATES);
  testShipStrategyCombinationsWithUnknowns(MEDIUM_ESTIMATES);
  testShipStrategyCombinationsWithUnknowns(BIG_ESTIMATES);
}

  @Override
  public void addArtificialDamCost(EstimateProvider estimates, long bufferSize, Costs costs) {
    final long s = estimates.getEstimatedOutputSize();
    // we assume spilling and re-reading
    if (s <= 0) {
      costs.setDiskCost(Costs.UNKNOWN);
      costs.setCpuCost(Costs.UNKNOWN);
    } else {
      costs.addDiskCost(2 * s);
      costs.setCpuCost((long) (s * MATERIALIZATION_CPU_FACTOR));
    }
    costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE);
    costs.addHeuristicCpuCost((long) (HEURISTIC_COST_BASE * MATERIALIZATION_CPU_FACTOR));
  }
}

/**
 * Creates a new optimizer instance. The optimizer has no access to statistics about the
 * inputs and can hence not determine any properties. It will perform all optimization with
 * unknown sizes and hence use only the heuristic cost functions, which result in the selection
 * of the most robust execution strategies.
 */
public Optimizer(Configuration config) {
  this(null, new DefaultCostEstimator(), config);
}

@Override
public void addHashPartitioningCost(EstimateProvider estimates, Costs costs) {
  // conservative estimate: we need ship the whole data over the network to establish the
  // partitioning. no disk costs.
  final long estOutShipSize = estimates.getEstimatedOutputSize();
  if (estOutShipSize <= 0) {
    costs.setNetworkCost(Costs.UNKNOWN);
  } else {
    costs.addNetworkCost(estOutShipSize);
  }
  costs.addHeuristicNetworkCost(HEURISTIC_COST_BASE);
}

public void setCosts(Costs nodeCosts) {
  // add the costs from the step function
  nodeCosts.addCosts(this.solutionSetDeltaPlanNode.getCumulativeCostsShare());
  nodeCosts.addCosts(this.nextWorkSetPlanNode.getCumulativeCostsShare());
  super.setCosts(nodeCosts);
}

@Override
public void addHybridHashCosts(EstimateProvider buildSideInput, EstimateProvider probeSideInput, Costs costs, int costWeight) {
  long bs = buildSideInput.getEstimatedOutputSize();
  long ps = probeSideInput.getEstimatedOutputSize();
  
  if (bs > 0 && ps > 0) {
    long overall = 2*bs + ps;
    costs.addDiskCost(overall);
    costs.addCpuCost((long) (overall * HASHING_CPU_FACTOR));
  } else {
    costs.setDiskCost(Costs.UNKNOWN);
    costs.setCpuCost(Costs.UNKNOWN);
  }
  costs.addHeuristicDiskCost(2 * HEURISTIC_COST_BASE);
  costs.addHeuristicCpuCost((long) (2 * HEURISTIC_COST_BASE * HASHING_CPU_FACTOR));
  
  // cost weight applies to everything
  costs.multiplyWith(costWeight);
}

How to use org.apache.flink.optimizer.costs

Best Java code snippets using org.apache.flink.optimizer.costs (Showing top 20 results out of 315)