@Override public Graph<LongValue, NullValue, NullValue> generate() { // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); // Validate ranges Collections.sort(offsetRanges); Iterator<OffsetRange> iter = offsetRanges.iterator(); OffsetRange lastRange = iter.next(); while (iter.hasNext()) { OffsetRange nextRange = iter.next(); if (lastRange.overlaps(nextRange)) { throw new IllegalArgumentException("Overlapping ranges " + lastRange + " and " + nextRange); } lastRange = nextRange; } DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToOffsets(vertexCount, offsetRanges)) .setParallelism(parallelism) .name("Circulant graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
private Plan getWordCountPlan(File inFile, File outFile, int parallelism) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.readTextFile(inFile.getAbsolutePath()) .flatMap(new Tokenizer()) .groupBy(0) .sum(1) .writeAsCsv(outFile.getAbsolutePath()); return env.createProgramPlan(); } }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Test public void testBatchDistributedCache() throws Exception { String textPath = createTempFile("count.txt", DATA); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(textPath, "cache_test"); env.readTextFile(textPath).flatMap(new WordChecker()).count(); }
.flatMap(new FlatMapFunction<Long, Tuple2<Long, Long>>() { @Override public void flatMap(Long value, Collector<Tuple2<Long, Long>> out) {
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
.flatMap(new ConnectedComponents.UndirectEdge());
public DataFlink<DataInstance> sampleToDataFlink(ExecutionEnvironment env, int nSamples) { try{ int nBatches = nSamples/this.batchSize; DataSet<DataInstance> data = env.generateSequence(0,nBatches-1) .flatMap(new SampleMap(this.localSampler,batchSize,seed)); Attributes attributes = this.localSampler.sampleToDataStream(1).getAttributes(); return new DataFlinkWrapper<>(network.getName(), attributes, data); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Object doOperation(final AddElementsFromFile op, final Context context, final Store store) throws OperationException { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); if (null != op.getParallelism()) { env.setParallelism(op.getParallelism()); } final FlatMapOperator<String, Element> builder = env.readTextFile(op.getFilename()) .flatMap(new GafferMapFunction(String.class, op.getElementGenerator())); if (Boolean.parseBoolean(op.getOption(FlinkConstants.SKIP_REBALANCING))) { builder.output(new GafferOutput(op, store)); } else { builder.rebalance().output(new GafferOutput(op, store)); } try { env.execute(op.getClass().getSimpleName() + "-" + op.getFilename()); } catch (final Exception e) { throw new OperationException("Failed to add elements from file: " + op.getFilename(), e); } return null; } }
/** * Reads the csv file specified by {@link MinimalCSVImporter#path} and converts each valid line * to a {@link Vertex}. * * @param propertyNames list of the property identifier names * @param checkReoccurringHeader set to true if each row of the file should be checked for * reoccurring of the column property names * @return a {@link DataSet} of all vertices from one specific file */ private DataSet<Vertex> readCSVFile(List<String> propertyNames, boolean checkReoccurringHeader) { return config.getExecutionEnvironment() .readTextFile(path) .flatMap(new CsvRowToProperties(tokenSeparator, propertyNames, checkReoccurringHeader)) .map(new PropertiesToVertex<>(config.getVertexFactory())) .returns(config.getVertexFactory().getType()); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }