public CollectionDataSource(CollectionInputFormat f, String name, Object[][] data) { super(f, OperatorInfoHelper.source(), name); Collection<Object> tmp = new ArrayList<Object>(); for (Object o : data) { tmp.add(o); } checkFormat(tmp); f.setData(tmp); }
new CollectionDataSource("test_1d_valid_array", "a", "b", "c"); } catch (Exception e) { e.printStackTrace(); new CollectionDataSource("test_2d_valid_array", new Object[][] { { 1, "a" }, { 2, "b" }, { 3, "c" } }); } catch (Exception e) { new CollectionDataSource("test_1d_invalid_array", 1, "b", "c"); Assert.fail("input type is different"); } catch (Exception e) { new CollectionDataSource("test_2d_invalid_array", new Object[][] { { 1, "a" }, { 2, "b" }, { 3, 4 } }); Assert.fail("input type is different");
tmp.add(i); new CollectionDataSource(tmp, "test_valid_collection"); } catch (Exception e) { e.printStackTrace(); tmp.add(inner); new CollectionDataSource(tmp, "test_valid_double_collection"); } catch (Exception e) { e.printStackTrace(); new CollectionDataSource(tmp, "test_invalid_collection"); Assert.fail("input type is different"); } catch (Exception e) { inner.add('a'); tmp.add(inner); new CollectionDataSource(tmp, "test_invalid_double_collection"); Assert.fail("input type is different"); } catch (Exception e) {
public CollectionDataSource(CollectionInputFormat f, Collection<?> data, String name) { super(f, OperatorInfoHelper.source(), name); checkFormat(data); f.setData(data); }
public Plan getPlan(int numSubTasks, String output) { List<Object> tmp = new ArrayList<Object>(); int pos = 0; for (String s : WordCountData.COUNTS.split("\n")) { List<Object> tmpInner = new ArrayList<Object>(); tmpInner.add(pos++); tmpInner.add(Integer.parseInt(s.split(" ")[1])); tmp.add(tmpInner); } // test serializable iterator input, the input record is {id, word} CollectionDataSource source = new CollectionDataSource(new SerializableIteratorTest(), "test_iterator"); // test collection input, the input record is {id, count} CollectionDataSource source2 = new CollectionDataSource(tmp, "test_collection"); JoinOperator join = JoinOperator.builder(Join.class, IntValue.class, 0, 0) .input1(source).input2(source2).build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, join, "Collection Join"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "CollectionDataSource"); plan.setDefaultParallelism(numSubTasks); return plan; }
/** * Creates a new instance for the given input using the given input format. * * @param f * The {@link CollectionInputFormat} implementation used to read the data. * @param data * The input data. It should be a collection, an array or a serializable iterator. * @param name * The given name for the Pact, used in plans, logs and progress messages. */ public CollectionDataSource(CollectionInputFormat f, String name, Object... data) { super(f, OperatorInfoHelper.source(), name); Collection<Object> tmp = new ArrayList<Object>(); for (Object o : data) { tmp.add(o); } checkFormat(tmp); f.setData(tmp); }
@SuppressWarnings("unchecked") public CollectionDataSource(CollectionInputFormat f, Object... data) { super(f, OperatorInfoHelper.source(), DEFAULT_NAME); if (data.length == 1 && data[0] instanceof Iterator) { f.setIter((Iterator<Object>) data[0]); } else if (data.length == 1 && data[0] instanceof Collection) { checkFormat((Collection<Object>) data[0]); f.setData((Collection<Object>) data[0]); } Collection<Object> tmp = new ArrayList<Object>(); for (Object o : data) { tmp.add(o); } checkFormat(tmp); f.setData(tmp); }