/** * Adds to the input the union of the given operators. * * @param inputs The operator(s) to be unioned with the input. * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead. */ @SuppressWarnings("unchecked") @Deprecated public void addInputs(List<? extends Operator<Record>> inputs) { Preconditions.checkNotNull(inputs, "The inputs may not be null."); this.input = createUnionCascade(this.input, (Operator<Record>[]) inputs.toArray(new Operator[inputs.size()])); } }
/** * Creates a configuration builder that can be used to set the * output format's parameters to the config in a fluent fashion. * * @return A config builder for setting parameters. */ public static ConfigBuilder configureOutputFormat(GenericDataSink target) { return new ConfigBuilder(target.getParameters()); }
/** * Creates a GenericDataSink with the provided {@link OutputFormat} implementation and the given name. * It uses the given operator as its input. * * @param f The {@link OutputFormat} implementation used to sink the data. * @param input The operator to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. */ public GenericDataSink(Class<? extends OutputFormat<Record>> f, Operator<Record> input, String name) { this(f, name); setInput(input); }
public static void main(String[] args) throws Exception { GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class); MapOperator mapper = MapOperator.builder(new NumberExtractingMapper()) .input(source).name("le mapper").build(); ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1) .input(mapper).name("le reducer").build(); GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer); Plan p = new Plan(sink); p.setDefaultParallelism(4); LocalExecutor.execute(p); }
@Override public Plan getPlan(String[] args) { /* * In this example we use the constructor where the url contains all the settings that are needed. * You could also use the default constructor and deliver a Configuration with all the needed settings. * You also could set the settings to the source-instance. */ GenericDataSource<JDBCInputFormat> source = new GenericDataSource<JDBCInputFormat>( new JDBCInputFormat( "org.apache.derby.jdbc.EmbeddedDriver", "jdbc:derby:memory:ebookshop", "select * from books"), "Data Source"); GenericDataSink sink = new GenericDataSink(new JDBCOutputFormat(), "Data Output"); JDBCOutputFormat.configureOutputFormat(sink) .setDriver("org.apache.derby.jdbc.EmbeddedDriver") .setUrl("jdbc:derby:memory:ebookshop") .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)") .setClass(IntValue.class) .setClass(StringValue.class) .setClass(StringValue.class) .setClass(FloatValue.class) .setClass(IntValue.class); sink.addInput(source); return new Plan(sink, "JDBC Example Job"); }
/** * Creates a GenericDataSink with the provided {@link OutputFormat} implementation and the given name. * It uses the given contracts as its input. * * @param f The {@link OutputFormat} implementation used to sink the data. * @param input The contracts to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead. */ @Deprecated public GenericDataSink(OutputFormat<Record> f, List<Operator<Record>> input, String name) { this(f, name); setInputs(input); }
public void testMapPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class) .input(source) .name("Stuck-In-Open Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
/** * Creates a GenericDataSink with the provided {@link OutputFormat} implementation and the given name. * It uses the given contracts as its input. * * @param f The {@link OutputFormat} implementation used to sink the data. * @param input The contracts to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead. */ @Deprecated public GenericDataSink(Class<? extends OutputFormat<Record>> f, List<Operator<Record>> input, String name) { this(f, name); setInputs(input); }
public void testMapWithLongCancellingResponse() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(LongCancelTimeIdentityMapper.class) .input(source) .name("Long Cancelling Time Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
/** * Creates a GenericDataSink with the provided {@link OutputFormat} implementation and the given name. * It uses the given operator as its input. * * @param f The {@link OutputFormat} implementation used to sink the data. * @param input The operator to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. */ public GenericDataSink(OutputFormat<Record> f, Operator<Record> input, String name) { this(f, name); setInput(input); }
public void testMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(IdentityMapper.class) .input(source) .name("Identity Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }
public void testSlowMapCancelling() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(DelayingIdentityMapper.class) .input(source) .name("Delay Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5 * 1000, 10 * 1000); }
public void testCancelSortMatchWhileReadingSlowInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormatWithDelay> source1 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormatWithDelay> source2 = new GenericDataSource<InfiniteIntegerInputFormatWithDelay>(new InfiniteIntegerInputFormatWithDelay(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 3000, 10*1000); }
public void testCancelSortMatchWithHighDOP() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(new SimpleMatcher(), IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(64); runAndCancelJob(p, 3000, 20*1000); }
public void testCancelSortMatchWhileReadingFastInputs() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000, 10*1000); }
public void testCancelSortMatchPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source1 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 1"); GenericDataSource<InfiniteIntegerInputFormat> source2 = new GenericDataSource<InfiniteIntegerInputFormat>(new InfiniteIntegerInputFormat(), "Source 2"); JoinOperator matcher = JoinOperator.builder(StuckInOpenMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Stuc-In-Open Match") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 5000); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
public void testCancelSortMatchWhileDoingHeavySorting() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 50000); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 100); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 50000); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 100); JoinOperator matcher = JoinOperator.builder(SimpleMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 30 * 1000, 30 * 1000); }
public void testCancelSortMatchWhileJoining() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); JoinOperator matcher = JoinOperator.builder(DelayingMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 20 * 1000); }
public void testCancelSortMatchWithLongCancellingResponse() throws Exception { GenericDataSource<UniformIntInput> source1 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 1"); source1.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source1.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); GenericDataSource<UniformIntInput> source2 = new GenericDataSource<UniformIntInput>(new UniformIntInput(), "Source 2"); source2.setParameter(UniformIntInput.NUM_KEYS_KEY, 500); source2.setParameter(UniformIntInput.NUM_VALUES_KEY, 3); JoinOperator matcher = JoinOperator.builder(LongCancelTimeMatcher.class, IntValue.class, 0, 0) .input1(source1) .input2(source2) .name("Long Cancelling Sort Join") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), matcher, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }