input_right.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1)); JoinOperator testMatcher = JoinOperator.builder(new TestMatcher(), StringValue.class, 0, 0) .build(); testMatcher.setDegreeOfParallelism(config.getInteger("MatchTest#NoSubtasks", 1)); testMatcher.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY, config.getString("MatchTest#LocalStrategy", "")); if (config.getString("MatchTest#ShipStrategy", "").equals("BROADCAST_FIRST")) { testMatcher.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY_FIRST_INPUT, PactCompiler.HINT_SHIP_STRATEGY_BROADCAST); testMatcher.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY_SECOND_INPUT, PactCompiler.HINT_SHIP_STRATEGY_FORWARD); } else if (config.getString("MatchTest#ShipStrategy", "").equals("BROADCAST_SECOND")) { testMatcher.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY_FIRST_INPUT, PactCompiler.HINT_SHIP_STRATEGY_FORWARD); testMatcher.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY_SECOND_INPUT, PactCompiler.HINT_SHIP_STRATEGY_BROADCAST); } else { testMatcher.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY, config.getString("MatchTest#ShipStrategy", "")); testMatcher.setFirstInput(input_left); testMatcher.setSecondInput(input_right);
/** * Creates and returns a JoinOperator from using the values given * to the builder. * * @return The created operator */ public JoinOperator build() { if (keyClasses.size() <= 0) { throw new IllegalStateException("At least one key attribute has to be set."); } if (name == null) { name = udf.getUserCodeClass().getName(); } return new JoinOperator(this); } }
/** * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly, * re-exploiting the sorted order is cheaper. */ @Test public void testQueryWithStatsForRepartitionMerge() { TPCHQuery3 query = new TPCHQuery3(); Plan p = query.getPlan(DEFAULT_PARALLELISM_STRING, IN_FILE, IN_FILE, OUT_FILE); // set compiler hints OperatorResolver cr = getContractResolver(p); JoinOperator match = cr.getNode("JoinLiO"); match.getCompilerHints().setFilterFactor(100f); testQueryGeneric(100l*1024*1024*1024*1024, 100l*1024*1024*1024*1024, 0.05f, 100f, false, true, false, false, true); }
/** * The private constructor that only gets invoked from the Builder. * @param builder */ protected JoinOperator(Builder builder) { super(builder.udf, OperatorInfoHelper.binary(), builder.getKeyColumnsArray1(), builder.getKeyColumnsArray2(), builder.name); this.keyTypes = builder.getKeyClassesArray(); if (builder.inputs1 != null && !builder.inputs1.isEmpty()) { setFirstInput(Operator.createUnionCascade(builder.inputs1)); } if (builder.inputs2 != null && !builder.inputs2.isEmpty()) { setSecondInput(Operator.createUnionCascade(builder.inputs2)); } setBroadcastVariables(builder.broadcastInputs); setSemanticProperties(FunctionAnnotation.readDualConstantAnnotations(builder.udf)); }
.build(); JoinOperator closeTriads = JoinOperator.builder(new CloseTriads(), StringValue.class, 1, 0) .keyField(StringValue.class, 2, 1) .name("Close Triads") .build(); closeTriads.setParameter("INPUT_LEFT_SHIP_STRATEGY", "SHIP_REPARTITION_HASH"); closeTriads.setParameter("INPUT_RIGHT_SHIP_STRATEGY", "SHIP_REPARTITION_HASH"); closeTriads.setParameter("LOCAL_STRATEGY", "LOCAL_STRATEGY_HASH_BUILD_SECOND"); closeTriads.setSecondInput(edges); closeTriads.setFirstInput(buildTriads); buildTriads.setInput(edges);
JoinOperator partsJoin = JoinOperator.builder(PartJoin.class, IntValue.class, 0, 0) .name("partsJoin") .build(); JoinOperator.builder(OrderedPartsJoin.class, IntValue.class, 0, 0) .name("orderedPartsJoin") .build(); JoinOperator.builder(SuppliersJoin.class, IntValue.class, 0, 0) .name("suppliersJoin") .build(); JoinOperator.builder(FilteredPartsJoin.class, IntPair.class, 0, 0) .name("filteredPartsJoin") .build(); JoinOperator.builder(PartListJoin.class, IntValue.class , 0, 0) .name("partlistJoin") .build(); partsJoin.setFirstInput(filterPart); partsJoin.setSecondInput(mapPartsupp); orderedPartsJoin.setFirstInput(mapOrder); orderedPartsJoin.setSecondInput(mapLineItem); suppliersJoin.setFirstInput(mapSupplier); suppliersJoin.setSecondInput(nationInput); filteredPartsJoin.setFirstInput(partsJoin); filteredPartsJoin.setSecondInput(orderedPartsJoin); partListJoin.setFirstInput(filteredPartsJoin);
public Plan getPlan(int numSubTasks, String output) { List<Object> tmp = new ArrayList<Object>(); int pos = 0; for (String s : WordCountData.COUNTS.split("\n")) { List<Object> tmpInner = new ArrayList<Object>(); tmpInner.add(pos++); tmpInner.add(Integer.parseInt(s.split(" ")[1])); tmp.add(tmpInner); } // test serializable iterator input, the input record is {id, word} CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "test_iterator"); // test collection input, the input record is {id, count} CollectionDataSource source2 = new CollectionDataSource(tmp, "test_collection"); JoinOperator join = JoinOperator.builder(Join.class, IntValue.class, 0, 0) .input1(source).input2(source2).build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, join, "Collection Join"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "CollectionDataSource"); plan.setDefaultParallelism(numSubTasks); return plan; }
.build(); JoinOperator closeTriads = JoinOperator.builder(new CloseTriads(), IntValue.class, 1, 0) .keyField(IntValue.class, 2, 1) .input1(buildTriads) .name("Close Triads") .build(); closeTriads.setParameter("INPUT_SHIP_STRATEGY", "SHIP_REPARTITION_HASH"); closeTriads.setParameter("LOCAL_STRATEGY", "LOCAL_STRATEGY_HASH_BUILD_SECOND");
.build(); JoinOperator joinOL = JoinOperator.builder(JoinOL.class, IntValue.class, 0, 0) .name("JoinOL") .build(); JoinOperator joinCOL = JoinOperator.builder(JoinCOL.class, IntValue.class, 0, 0) .name("JoinCOL") .build(); JoinOperator joinNCOL = JoinOperator.builder(JoinNCOL.class, IntValue.class, 4, 0) .name("JoinNCOL") .build(); joinNCOL.setFirstInput(joinCOL); joinNCOL.setSecondInput(projectN); joinCOL.setFirstInput(projectC); joinCOL.setSecondInput(joinOL); joinOL.setFirstInput(mapO); joinOL.setSecondInput(mapLi);
public void testCancelSortMatchWhileReadingSlowInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormatWithDelay> source1 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormatWithDelay> source2 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 3000, 10*1000); }
.input(toLowerDegreeEdge).name("Build Triads").build(); JoinOperator closeTriads = JoinOperator.builder(new CloseTriads(), IntValue.class, 1, 0) .keyField(IntValue.class, 2, 1) .input1(buildTriads).input2(projectOutCounts) .name("Close Triads").build(); closeTriads.setParameter("INPUT_SHIP_STRATEGY", "SHIP_REPARTITION_HASH"); closeTriads.setParameter("LOCAL_STRATEGY", "LOCAL_STRATEGY_HASH_BUILD_SECOND");
JoinOperator.builder(new ConcatPaths(), StringValue.class, 0, 1) .name("Concat Paths") .build(); concatPaths.setDegreeOfParallelism(numSubTasks); findShortestPaths.setFirstInput(pathsInput); findShortestPaths.setSecondInput(concatPaths); concatPaths.setFirstInput(pathsInput); concatPaths.setSecondInput(pathsInput);
public void testCancelSortMatchWithHighDOP() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(new SimpleMatcher(), IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(64); runAndCancelJob(p, 3000, 20*1000); }
mapper.getCompilerHints().setAvgOutputRecordSize(16f); mapper.getCompilerHints().setFilterFactor(orderSelectivity); joiner.getCompilerHints().setFilterFactor(joinSelectivity);
JoinOperator.builder(JoinLiO.class, IntValue.class, 0, 0) .name("OrdersLineitemsJoin") .build(); join.setDegreeOfParallelism(degreeOfParallelism); join.setFirstInput(ordersFilter); join.setSecondInput(lineFilter); aggregation.setInput(join); result.setInput(aggregation);
adjacencyListInputPath, "AdjancencyListInput"); JoinOperator join = JoinOperator.builder(new JoinVerexWithEdgesMatch(), LongValue.class, 0, 0) .input1(iteration.getPartialSolution()) .input2(adjacencyListInput) iteration.setMaximumNumberOfIterations(numIterations); JoinOperator termination = JoinOperator.builder(new JoinOldAndNew(), LongValue.class, 0, 0) .input1(iteration.getPartialSolution()) .input2(rankAggregation)
JoinOperator joinCO = JoinOperator.builder(new JoinCO(), IntValue.class, 0, 0) .name("JoinCO") .build(); joinCO.setDegreeOfParallelism(numSubtasks); joinCO.setFirstInput(orders); joinCO.setSecondInput(customers);
public void testCancelSortMatchWhileReadingFastInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000, 10*1000); }
public void testCancelSortMatchPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(StuckInOpenMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Stuc-In-Open Match") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
adjacencyListInputPath, "AdjancencyListInput"); JoinOperator join = JoinOperator.builder(new DotProductMatch(), LongValue.class, 0, 0) .input1(iteration.getPartialSolution()) .input2(adjacencyListInput)