public static void main(String[] args) throws Exception { GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class); MapOperator mapper = MapOperator.builder(new NumberExtractingMapper()) .input(source).name("le mapper").build(); ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1) .input(mapper).name("le reducer").build(); GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer); Plan p = new Plan(sink); p.setDefaultParallelism(4); LocalExecutor.execute(p); }
public void testCancelSortMatchWhileDoingHeavySorting() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 50000); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 100); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 50000); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 100); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 30 * 1000, 30 * 1000); }
public void testCancelSortMatchWhileReadingSlowInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormatWithDelay> source1 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormatWithDelay> source2 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 3000, 10*1000); }
public void testCancelSortMatchWhileJoining() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); JoinOperator matcher = JoinOperator.builder(DelayingMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 20 * 1000); }
public void testCancelSortMatchWhileReadingFastInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000, 10*1000); }
public void testCancelSortMatchWithLongCancellingResponse() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); JoinOperator matcher = JoinOperator.builder(LongCancelTimeMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
public void testCancelSortMatchWithHighDOP() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(new SimpleMatcher(), IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(64); runAndCancelJob(p, 3000, 20*1000); }
public void testCancelSortMatchPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(StuckInOpenMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Stuc-In-Open Match") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
@Override public Plan getPlan(String[] args) { /* * In this example we use the constructor where the url contains all the settings that are needed. * You could also use the default constructor and deliver a Configuration with all the needed settings. * You also could set the settings to the source-instance. */ GenericDataSource<JDBCInputFormat> source = new GenericDataSource<JDBCInputFormat>( new JDBCInputFormat( "org.apache.derby.jdbc.EmbeddedDriver", "jdbc:derby:memory:ebookshop", "select * from books"), "Data Source"); GenericDataSink sink = new GenericDataSink(new JDBCOutputFormat(), "Data Output"); JDBCOutputFormat.configureOutputFormat(sink) .setDriver("org.apache.derby.jdbc.EmbeddedDriver") .setUrl("jdbc:derby:memory:ebookshop") .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)") .setClass(IntValue.class) .setClass(StringValue.class) .setClass(StringValue.class) .setClass(FloatValue.class) .setClass(IntValue.class); sink.addInput(source); return new Plan(sink, "JDBC Example Job"); }
public void testMapWithLongCancellingResponse() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(LongCancelTimeIdentityMapper.class) .input(source) .name("Long Cancelling Time Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
public void testMapPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class) .input(source) .name("Stuck-In-Open Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
public void testMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(IdentityMapper.class) .input(source) .name("Identity Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }
public void testSlowMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(DelayingIdentityMapper.class) .input(source) .name("Delay Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }