public Integer splitCount(RelNode rel, RelMetadataQuery mq) { Boolean newPhase = mq.isPhaseTransition(rel); if (newPhase == null) { return null; } if (newPhase) { // We repartition: new number of splits return splitCountRepartition(rel, mq); } // We do not repartition: take number of splits from children Integer splitCount = 0; for (RelNode input : rel.getInputs()) { splitCount += mq.splitCount(input); } return splitCount; }
public Integer splitCount(RelNode rel, RelMetadataQuery mq) { Boolean newPhase = mq.isPhaseTransition(rel); if (newPhase == null) { return null; } if (newPhase) { // We repartition: new number of splits return splitCountRepartition(rel, mq); } // We do not repartition: take number of splits from children Integer splitCount = 0; for (RelNode input : rel.getInputs()) { splitCount += mq.splitCount(input); } return splitCount; }
public static Integer getSplitCountWithoutRepartition(HiveJoin join) { RelNode largeInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { largeInput = join.getLeft(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { largeInput = join.getRight(); } else { return null; } return RelMetadataQuery.instance().splitCount(largeInput); }
public static Integer getSplitCountWithoutRepartition(HiveJoin join) { RelNode largeInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { largeInput = join.getLeft(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { largeInput = join.getRight(); } else { return null; } final RelMetadataQuery mq = join.getCluster().getMetadataQuery(); return mq.splitCount(largeInput); }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { RelMetadataQuery mq = RelMetadataQuery.instance(); // TODO: Split count is not same as no of buckets JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = mq.cumulativeMemoryWithinPhase(join); final Integer splitCount = mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final RelMetadataQuery mq = join.getCluster().getMetadataQuery(); final Double memoryWithinPhase = mq.cumulativeMemoryWithinPhase(join); final Integer splitCount = mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { final RelMetadataQuery mq = join.getCluster().getMetadataQuery(); // TODO: Split count is not same as no of buckets JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = mq.cumulativeMemoryWithinPhase(join); final Integer splitCount = mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join); final Integer splitCount = RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE); final int parallelism = mq.splitCount(join) == null ? 1 : mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism);
final int parallelism = mq.splitCount(join) == null ? 1 : mq.splitCount(join); join.setJoinAlgorithm(oldAlgo);
final int parallelism = mq.splitCount(join) == null ? 1 : mq .splitCount(join); join.setJoinAlgorithm(oldAlgo);
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { // Check streaming side RelNode inMemoryInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { inMemoryInput = join.getRight(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { inMemoryInput = join.getLeft(); } else { return null; } // If bucket map join, only a split goes in memory final RelMetadataQuery mq = join.getCluster().getMetadataQuery(); final Double memoryInput = mq.cumulativeMemoryWithinPhase(inMemoryInput); final Integer splitCount = mq.splitCount(inMemoryInput); if (memoryInput == null || splitCount == null) { return null; } return memoryInput / splitCount; }
Integer buckets = mq.splitCount(smallInput); join.setJoinAlgorithm(oldAlgo);
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { // Check streaming side RelNode inMemoryInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { inMemoryInput = join.getRight(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { inMemoryInput = join.getLeft(); } else { return null; } // If bucket map join, only a split goes in memory final Double memoryInput = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); final Integer splitCount = RelMetadataQuery.instance().splitCount(inMemoryInput); if (memoryInput == null || splitCount == null) { return null; } return memoryInput / splitCount; }
Integer buckets = RelMetadataQuery.instance().splitCount(smallInput); join.setJoinAlgorithm(oldAlgo);
final int parallelism = mq.splitCount(join) == null ? 1 : mq.splitCount(join); join.setJoinAlgorithm(oldAlgo);
final int parallelism = mq.splitCount(join) == null ? 1 : mq .splitCount(join); join.setJoinAlgorithm(oldAlgo);
JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezMapJoinAlgorithm.INSTANCE); final int parallelism = mq.splitCount(join) == null ? 1 : mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); final double ioCost = algoUtils.computeMapJoinIOCost(relationInfos, streaming, parallelism);
public static Integer getSplitCountWithoutRepartition(HiveJoin join) { RelNode largeInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { largeInput = join.getLeft(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { largeInput = join.getRight(); } else { return null; } return RelMetadataQuery.splitCount(largeInput); }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { // TODO: Split count is not same as no of buckets JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = RelMetadataQuery.cumulativeMemoryWithinPhase(join); final Integer splitCount = RelMetadataQuery.splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }