solution.closeWith(env.generateSequence(1, 2).filter(new RichFilterFunction<Long>() { @Override public void open(Configuration parameters) throws Exception{
.filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> line.startsWith("@attribute"))
/** * Creates a dataset from a given (possibly empty) collection of graph transactions. * * @param transactions graph transactions * @return a dataset containing the given transactions */ private DataSet<GraphTransaction> createGraphTransactionDataSet( Collection<GraphTransaction> transactions) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<GraphTransaction> graphTransactionSet; if (transactions.isEmpty()) { graphTransactionSet = env.fromCollection(Lists.newArrayList(new GraphTransaction()), new TypeHint<GraphTransaction>() { }.getTypeInfo()) .filter(new False<>()); } else { graphTransactionSet = env.fromCollection(transactions); } return graphTransactionSet; } }
/** * Creates a dataset from a given (possibly empty) collection of graph transactions. * * @param transactions graph transactions * @return a dataset containing the given transactions */ private DataSet<GraphTransaction> createGraphTransactionDataSet( Collection<GraphTransaction> transactions) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<GraphTransaction> graphTransactionSet; if (transactions.isEmpty()) { graphTransactionSet = env.fromCollection(Lists.newArrayList(new GraphTransaction()), new TypeHint<GraphTransaction>() { }.getTypeInfo()) .filter(new False<>()); } else { graphTransactionSet = env.fromCollection(transactions); } return graphTransactionSet; } }
private DataSet<DataInstance> loadDataSet(ExecutionEnvironment env){ if (attributes==null) this.loadHeader(env); DataSet<Attributes> attsDataSet = env.fromElements(attributes); DataSource<String> data = env.readTextFile(pathFileData); Configuration config = new Configuration(); config.setString(DataFlinkLoader.RELATION_NAME, this.relationName); return data .filter(w -> !w.isEmpty()) .filter(w -> !w.startsWith("%")) .filter(line -> !line.startsWith("@attribute")) .filter(line -> !line.startsWith("@relation")) .filter(line -> !line.startsWith("@data")) .map(new DataInstanceBuilder(isNormalize())) .withParameters(config) .withBroadcastSet(attsDataSet, DataFlinkLoader.ATTRIBUTES_NAME + "_" + this.relationName); }
/** * Returns a collection of all logical graph contained in the database. * * @return collection of all logical graphs */ public GraphCollection getGraphCollection() { ExecutionEnvironment env = config.getExecutionEnvironment(); DataSet<Vertex> newVertices = env.fromCollection(getVertices()) .filter(vertex -> vertex.getGraphCount() > 0); DataSet<Edge> newEdges = env.fromCollection(getEdges()) .filter(edge -> edge.getGraphCount() > 0); return config.getGraphCollectionFactory() .fromDataSets(env.fromCollection(getGraphHeads()), newVertices, newEdges); }
/** * Returns a collection of all logical graph contained in the database. * * @return collection of all logical graphs */ public GraphCollection getGraphCollection() { ExecutionEnvironment env = config.getExecutionEnvironment(); DataSet<Vertex> newVertices = env.fromCollection(getVertices()) .filter(vertex -> vertex.getGraphCount() > 0); DataSet<Edge> newEdges = env.fromCollection(getEdges()) .filter(edge -> edge.getGraphCount() > 0); return config.getGraphCollectionFactory() .fromDataSets(env.fromCollection(getGraphHeads()), newVertices, newEdges); }
/** * Creates a graph head dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param graphHeads graph heads * @return graph head dataset */ protected DataSet<GraphHead> createGraphHeadDataSet(Collection<GraphHead> graphHeads) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<GraphHead> graphHeadSet; if (graphHeads.isEmpty()) { graphHeadSet = env .fromElements(getConfig().getGraphHeadFactory().createGraphHead()) .filter(new False<>()); } else { graphHeadSet = env.fromCollection(graphHeads); } return graphHeadSet; }
/** * Creates a graph head dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param graphHeads graph heads * @return graph head dataset */ protected DataSet<GraphHead> createGraphHeadDataSet(Collection<GraphHead> graphHeads) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<GraphHead> graphHeadSet; if (graphHeads.isEmpty()) { graphHeadSet = env .fromElements(getConfig().getGraphHeadFactory().createGraphHead()) .filter(new False<>()); } else { graphHeadSet = env.fromCollection(graphHeads); } return graphHeadSet; }
/** * Creates a vertex dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param vertices vertex collection * @return vertex dataset */ protected DataSet<Vertex> createVertexDataSet(Collection<Vertex> vertices) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<Vertex> vertexSet; if (vertices.isEmpty()) { vertexSet = env .fromElements(getConfig().getVertexFactory().createVertex()) .filter(new False<>()); } else { vertexSet = env.fromCollection(vertices); } return vertexSet; }
protected <T extends EPGMElement> DataSet<T> getEmptyDataSet(T dummy) { return getExecutionEnvironment() .fromElements(dummy) .filter(new False<>()); }
/** * Creates a vertex dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param vertices vertex collection * @return vertex dataset */ protected DataSet<Vertex> createVertexDataSet(Collection<Vertex> vertices) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<Vertex> vertexSet; if (vertices.isEmpty()) { vertexSet = env .fromElements(getConfig().getVertexFactory().createVertex()) .filter(new False<>()); } else { vertexSet = env.fromCollection(vertices); } return vertexSet; }
/** * Creates an edge dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param edges edge collection * @return edge dataset */ protected DataSet<Edge> createEdgeDataSet(Collection<Edge> edges) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<Edge> edgeSet; if (edges.isEmpty()) { GradoopId dummyId = GradoopId.get(); edgeSet = env .fromElements(getConfig().getEdgeFactory().createEdge(dummyId, dummyId)) .filter(new False<>()); } else { edgeSet = env.fromCollection(edges); } return edgeSet; } }
/** * Creates an edge dataset from a given collection. * Encapsulates the workaround for dataset creation from an empty collection. * * @param edges edge collection * @return edge dataset */ protected DataSet<Edge> createEdgeDataSet(Collection<Edge> edges) { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); DataSet<Edge> edgeSet; if (edges.isEmpty()) { GradoopId dummyId = GradoopId.get(); edgeSet = env .fromElements(getConfig().getEdgeFactory().createEdge(dummyId, dummyId)) .filter(new False<>()); } else { edgeSet = env.fromCollection(edges); } return edgeSet; } }
/** * Creates a new data source. Paths can be local (file://) or HDFS (hdfs://). * * @param tlfPath tlf data file * @param tlfVertexDictionaryPath tlf vertex dictionary file * @param tlfEdgeDictionaryPath tlf edge dictionary file * @param config Gradoop Flink configuration */ public TLFDataSource(String tlfPath, String tlfVertexDictionaryPath, String tlfEdgeDictionaryPath, GradoopFlinkConfig config) throws Exception { super(tlfPath, tlfVertexDictionaryPath, tlfEdgeDictionaryPath, config); ExecutionEnvironment env = config.getExecutionEnvironment(); if (hasVertexDictionary()) { DataSet<Map<Integer, String>> dictionary = env.createInput(HadoopInputs.readHadoopFile( new TextInputFormat(), LongWritable.class, Text.class, getTLFVertexDictionaryPath())) .filter(t -> !t.f1.toString().isEmpty()) .map(new DictionaryEntry()) .reduceGroup(new Dictionary()); setVertexDictionary(dictionary); } if (hasEdgeDictionary()) { DataSet<Map<Integer, String>> dictionary = env.createInput(HadoopInputs.readHadoopFile( new TextInputFormat(), LongWritable.class, Text.class, getTLFEdgeDictionaryPath())) .filter(t -> !t.f1.toString().isEmpty()) .map(new DictionaryEntry()) .reduceGroup(new Dictionary()); setEdgeDictionary(dictionary); } }
/** * Creates a new data source. Paths can be local (file://) or HDFS (hdfs://). * * @param tlfPath tlf data file * @param tlfVertexDictionaryPath tlf vertex dictionary file * @param tlfEdgeDictionaryPath tlf edge dictionary file * @param config Gradoop Flink configuration */ public TLFDataSource(String tlfPath, String tlfVertexDictionaryPath, String tlfEdgeDictionaryPath, GradoopFlinkConfig config) throws Exception { super(tlfPath, tlfVertexDictionaryPath, tlfEdgeDictionaryPath, config); ExecutionEnvironment env = config.getExecutionEnvironment(); if (hasVertexDictionary()) { DataSet<Map<Integer, String>> dictionary = env.createInput(HadoopInputs.readHadoopFile( new TextInputFormat(), LongWritable.class, Text.class, getTLFVertexDictionaryPath())) .filter(t -> !t.f1.toString().isEmpty()) .map(new DictionaryEntry()) .reduceGroup(new Dictionary()); setVertexDictionary(dictionary); } if (hasEdgeDictionary()) { DataSet<Map<Integer, String>> dictionary = env.createInput(HadoopInputs.readHadoopFile( new TextInputFormat(), LongWritable.class, Text.class, getTLFEdgeDictionaryPath())) .filter(t -> !t.f1.toString().isEmpty()) .map(new DictionaryEntry()) .reduceGroup(new Dictionary()); setEdgeDictionary(dictionary); } }
input.add(vertex2); List<Vertex> result = getExecutionEnvironment().fromCollection(input) .filter(new Or<>(new And<Vertex>(new IdInBroadcast<>(), new IdInBroadcast<>()), new IdInBroadcast<>()).negate().negate()) .withBroadcastSet(getExecutionEnvironment()