/** * Default transformation from a Gradoop Graph to a Gelly Graph. * * @param graph Gradoop Graph. * @return Gelly Graph. */ public Graph<GradoopId, VV, EV> transformToGelly(LogicalGraph graph) { DataSet<Vertex<GradoopId, VV>> gellyVertices = graph.getVertices().map(toGellyVertex); DataSet<Edge<GradoopId, EV>> gellyEdges = graph.getEdges().map(toGellyEdge); return Graph.fromDataSet(gellyVertices, gellyEdges, graph.getConfig().getExecutionEnvironment()); }
@Override public LogicalGraph getLogicalGraph() { DataSet<Vertex> vertices = config.getExecutionEnvironment().readTextFile(jsonPath) .map(new MinimalJsonToVertex(config.getVertexFactory())); return config.getLogicalGraphFactory().fromDataSets(vertices); }
@Override public DataSet<Tuple3<String, String, String>> readDistributed(String path, GradoopFlinkConfig config) { return config.getExecutionEnvironment() .readTextFile(path) .map(line -> StringEscaper.split(line, CSVConstants.TOKEN_DELIMITER, 3)) .map(tokens -> Tuple3.of(tokens[0], tokens[1], tokens[2])) .returns(new TypeHint<Tuple3<String, String, String>>() { }); }
/** * Default transformation from a Gradoop Graph to a Gelly Graph. * * @param graph Gradoop Graph. * @return Gelly Graph. */ public Graph<GradoopId, VV, EV> transformToGelly(LogicalGraph graph) { DataSet<Vertex<GradoopId, VV>> gellyVertices = graph.getVertices().map(toGellyVertex); DataSet<Edge<GradoopId, EV>> gellyEdges = graph.getEdges().map(toGellyEdge); return Graph.fromDataSet(gellyVertices, gellyEdges, graph.getConfig().getExecutionEnvironment()); }
@Override public DataSet<Tuple3<String, String, String>> readDistributed(String path, GradoopFlinkConfig config) { return config.getExecutionEnvironment() .readTextFile(path) .map(line -> StringEscaper.split(line, CSVConstants.TOKEN_DELIMITER, 3)) .map(tokens -> Tuple3.of(tokens[0], tokens[1], tokens[2])) .returns(new TypeHint<Tuple3<String, String, String>>() { }); }
/** * Reads the input as dataset of TLFGraphs. * * @return io graphs */ public DataSet<LabeledGraphStringString> getGraphs() throws IOException { ExecutionEnvironment env = getConfig().getExecutionEnvironment(); return env.createInput(HadoopInputs.readHadoopFile( new TLFInputFormat(), LongWritable.class, Text.class, getFilePath())) .map(new DIMSpanGraphFromText()); }
@Override public GVELayout fromDataSets(DataSet<Vertex> vertices, DataSet<Edge> edges) { Objects.requireNonNull(vertices, "Vertex DataSet was null"); Objects.requireNonNull(edges, "Edge DataSet was null"); GraphHead graphHead = getConfig() .getGraphHeadFactory() .createGraphHead(); DataSet<GraphHead> graphHeadSet = getConfig().getExecutionEnvironment() .fromElements(graphHead); // update vertices and edges with new graph head id vertices = vertices .map(new AddToGraph<>(graphHead)) .withForwardedFields("id;label;properties"); edges = edges .map(new AddToGraph<>(graphHead)) .withForwardedFields("id;sourceId;targetId;label;properties"); return new GVELayout(graphHeadSet, vertices, edges); }
@Override public DataSet<String> execute(GraphCollection collection) { // 1-10. DataSet<GraphHeadString> graphHeadLabels = getGraphHeadStrings(collection); // 11. add empty head to prevent empty result for empty collection graphHeadLabels = graphHeadLabels .union(collection .getConfig() .getExecutionEnvironment() .fromElements(new GraphHeadString(GradoopId.get(), ""))); // 12. label collection return graphHeadLabels .reduceGroup(new ConcatGraphHeadStrings()); }
@Override public DataSet<String> execute(GraphCollection collection) { // 1-10. DataSet<GraphHeadString> graphHeadLabels = getGraphHeadStrings(collection); // 11. add empty head to prevent empty result for empty collection graphHeadLabels = graphHeadLabels .union(collection .getConfig() .getExecutionEnvironment() .fromElements(new GraphHeadString(GradoopId.get(), ""))); // 12. label collection return graphHeadLabels .reduceGroup(new ConcatGraphHeadStrings()); }
/** * Reads the csv file specified by {@link MinimalCSVImporter#path} and converts each valid line * to a {@link Vertex}. * * @param propertyNames list of the property identifier names * @param checkReoccurringHeader set to true if each row of the file should be checked for * reoccurring of the column property names * @return a {@link DataSet} of all vertices from one specific file */ private DataSet<Vertex> readCSVFile(List<String> propertyNames, boolean checkReoccurringHeader) { return config.getExecutionEnvironment() .readTextFile(path) .flatMap(new CsvRowToProperties(tokenSeparator, propertyNames, checkReoccurringHeader)) .map(new PropertiesToVertex<>(config.getVertexFactory())) .returns(config.getVertexFactory().getType()); }
/** * {@inheritDoc} */ @Override public DataSet<Boolean> isEmpty() { return getVertices() .map(new True<>()) .distinct() .union(getConfig().getExecutionEnvironment().fromElements(false)) .reduce(new Or()) .map(new Not()); }
@Override public DataSet<Boolean> isEmpty() { return getVertices() .map(new True<>()) .distinct() .union(getConfig().getExecutionEnvironment().fromElements(false)) .reduce(new Or()) .map(new Not()); }
@Override public DataSet<Boolean> isEmpty() { return getGraphHeads() .map(new True<>()) .distinct() .union(getConfig().getExecutionEnvironment().fromElements(false)) .reduce(new Or()) .map(new Not()); }
/** * {@inheritDoc} */ @Override public DataSet<Boolean> isEmpty() { return getGraphHeads() .map(new True<>()) .distinct() .union(getConfig().getExecutionEnvironment().fromElements(false)) .reduce(new Or()) .map(new Not()); }
@Override public LogicalGraph getLogicalGraph() { DataSet<Tuple3<String, String, String>> metaData = MetaData.fromFile(getMetaDataPath(), getConfig()); DataSet<Vertex> vertices = getConfig().getExecutionEnvironment() .readTextFile(getVertexCSVPath()) .map(new CSVLineToVertex(getConfig().getVertexFactory())) .withBroadcastSet(metaData, BC_METADATA); DataSet<Edge> edges = getConfig().getExecutionEnvironment() .readTextFile(getEdgeCSVPath()) .map(new CSVLineToEdge(getConfig().getEdgeFactory())) .withBroadcastSet(metaData, BC_METADATA); return getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges); }
@Override public LogicalGraph getLogicalGraph() { DataSet<Tuple3<String, String, String>> metaData = MetaData.fromFile(getMetaDataPath(), getConfig()); DataSet<Vertex> vertices = getConfig().getExecutionEnvironment() .readTextFile(getVertexCSVPath()) .map(new CSVLineToVertex(getConfig().getVertexFactory())) .withBroadcastSet(metaData, BC_METADATA); DataSet<Edge> edges = getConfig().getExecutionEnvironment() .readTextFile(getEdgeCSVPath()) .map(new CSVLineToEdge(getConfig().getEdgeFactory())) .withBroadcastSet(metaData, BC_METADATA); return getConfig().getLogicalGraphFactory().fromDataSets(vertices, edges); }
/** * Returns a collection of all logical graph contained in the database. * * @return collection of all logical graphs */ public GraphCollection getGraphCollection() { ExecutionEnvironment env = config.getExecutionEnvironment(); DataSet<Vertex> newVertices = env.fromCollection(getVertices()) .filter(vertex -> vertex.getGraphCount() > 0); DataSet<Edge> newEdges = env.fromCollection(getEdges()) .filter(edge -> edge.getGraphCount() > 0); return config.getGraphCollectionFactory() .fromDataSets(env.fromCollection(getGraphHeads()), newVertices, newEdges); }
/** * Returns a collection of all logical graph contained in the database. * * @return collection of all logical graphs */ public GraphCollection getGraphCollection() { ExecutionEnvironment env = config.getExecutionEnvironment(); DataSet<Vertex> newVertices = env.fromCollection(getVertices()) .filter(vertex -> vertex.getGraphCount() > 0); DataSet<Edge> newEdges = env.fromCollection(getEdges()) .filter(edge -> edge.getGraphCount() > 0); return config.getGraphCollectionFactory() .fromDataSets(env.fromCollection(getGraphHeads()), newVertices, newEdges); }
/** * {@inheritDoc} * * Calls Flink Gelly algorithms to compute the global clustering coefficient for a directed graph. */ @Override protected LogicalGraph executeInternal(Graph<GradoopId, NullValue, NullValue> gellyGraph) throws Exception { GlobalClusteringCoefficient global = new org.apache.flink.graph.library.clustering.directed .GlobalClusteringCoefficient<GradoopId, NullValue, NullValue>().run(gellyGraph); currentGraph.getConfig().getExecutionEnvironment().execute(); double globalValue = global.getResult().getGlobalClusteringCoefficientScore(); DataSet<GraphHead> resultHead = currentGraph.getGraphHead() .map(new WritePropertyToGraphHeadMap(ClusteringCoefficientBase.PROPERTY_KEY_GLOBAL, PropertyValue.create(globalValue))); return currentGraph.getConfig().getLogicalGraphFactory().fromDataSets( resultHead, currentGraph.getVertices(), currentGraph.getEdges()); }
@Override public GraphCollection getGraphCollection() { GraphCollectionFactory factory = getFlinkConfig().getGraphCollectionFactory(); ExecutionEnvironment env = getFlinkConfig().getExecutionEnvironment(); return factory.fromDataSets( /*graph head format*/ env.createInput(new GraphHeadInputFormat( getStore().getConfig().getAccumuloProperties(), graphHeadQuery)), /*vertex input format*/ env.createInput(new VertexInputFormat(getStore().getConfig().getAccumuloProperties(), vertexQuery)), /*edge input format*/ env.createInput(new EdgeInputFormat(getStore().getConfig().getAccumuloProperties(), edgeQuery))); }