/** *Configures the Delimiter that separates fields in a row for the CSV reader used to read the edges * ({@code ','}) is used by default. * * @param delimiter The delimiter that separates the fields in a row. * @return The GraphCsv reader instance itself, to allow for fluent function chaining. */ public GraphCsvReader fieldDelimiterEdges(String delimiter) { this.edgeReader.fieldDelimiter(delimiter); return this; }
/** *Configures the Delimiter that separates fields in a row for the CSV reader used to read the vertices * ({@code ','}) is used by default. * * @param delimiter The delimiter that separates the fields in a row. * @return The GraphCsv reader instance itself, to allow for fluent function chaining. */ public GraphCsvReader fieldDelimiterVertices(String delimiter) { if (this.vertexReader != null) { this.vertexReader.fieldDelimiter(delimiter); } return this; }
private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration) if (params.has("ranks")) { return env.readCsvFile(params.get("ranks")) .fieldDelimiter("|") .types(Integer.class, String.class, Integer.class); } else { System.out.println("Executing WebLogAnalysis example with default ranks data set."); System.out.println("Use --ranks to specify file input."); return WebLogData.getRankDataSet(env); } }
private static DataSet<Order> getOrdersDataSet(ExecutionEnvironment env, String ordersPath) { return env.readCsvFile(ordersPath) .fieldDelimiter("|") .includeFields("110010010") .tupleType(Order.class); }
private static DataSet<Tuple2<String, String>> getDocumentsDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for documents relation (URL, Doc-Text) if (params.has("documents")) { return env.readCsvFile(params.get("documents")) .fieldDelimiter("|") .types(String.class, String.class); } else { System.out.println("Executing WebLogAnalysis example with default documents data set."); System.out.println("Use --documents to specify file input."); return WebLogData.getDocumentDataSet(env); } }
private static DataSet<Lineitem> getLineitemDataSet(ExecutionEnvironment env, String lineitemPath) { return env.readCsvFile(lineitemPath) .fieldDelimiter("|") .includeFields("1000011000100000") .tupleType(Lineitem.class); }
private static DataSet<Tuple2<Integer, String>> getNationsDataSet(ExecutionEnvironment env, String nationPath) { return env.readCsvFile(nationPath) .fieldDelimiter("|") .includeFields("1100") .types(Integer.class, String.class); } }
private static DataSet<Customer> getCustomerDataSet(ExecutionEnvironment env, String customerPath) { return env.readCsvFile(customerPath) .fieldDelimiter("|") .includeFields("10000010") .tupleType(Customer.class); }
private static DataSet<Tuple3<Integer, Integer, String>> getOrdersDataSet(ExecutionEnvironment env, String ordersPath) { return env.readCsvFile(ordersPath) .fieldDelimiter("|") .includeFields("110010000") .types(Integer.class, Integer.class, String.class); }
private static DataSet<Tuple2<Long, Long>> getEdgeDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("edges")) { return env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class); } else { System.out.println("Executing Connected Components example with default edges data set."); System.out.println("Use --edges to specify file input."); return ConnectedComponentsData.getDefaultEdgeDataSet(env); } } }
private static DataSet<Tuple5<Integer, String, String, Integer, Double>> getCustomerDataSet(ExecutionEnvironment env, String customerPath) { return env.readCsvFile(customerPath) .fieldDelimiter("|") .includeFields("11110100") .types(Integer.class, String.class, String.class, Integer.class, Double.class); }
private static DataSet<Tuple4<Integer, Double, Double, String>> getLineitemDataSet(ExecutionEnvironment env, String lineitemPath) { return env.readCsvFile(lineitemPath) .fieldDelimiter("|") .includeFields("1000011010000000") .types(Integer.class, Double.class, Double.class, String.class); }
private static DataSet<Centroid> getCentroidDataSet(ParameterTool params, ExecutionEnvironment env) { DataSet<Centroid> centroids; if (params.has("centroids")) { centroids = env.readCsvFile(params.get("centroids")) .fieldDelimiter(" ") .pojoType(Centroid.class, "id", "x", "y"); } else { System.out.println("Executing K-Means example with default centroid data set."); System.out.println("Use --centroids to specify file input."); centroids = KMeansData.getDefaultCentroidDataSet(env); } return centroids; }
private static DataSet<Point> getPointDataSet(ParameterTool params, ExecutionEnvironment env) { DataSet<Point> points; if (params.has("points")) { // read points from CSV file points = env.readCsvFile(params.get("points")) .fieldDelimiter(" ") .pojoType(Point.class, "x", "y"); } else { System.out.println("Executing K-Means example with default point data set."); System.out.println("Use --points to specify file input."); points = KMeansData.getDefaultPointDataSet(env); } return points; }
@SuppressWarnings("unchecked") private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("input")) { return env.readCsvFile(params.get("input")) .fieldDelimiter(";") .pojoType(StringTriple.class); } else { System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set."); System.out.println("Use --input to specify file input."); return env.fromCollection(getExampleInputTuples()); } }
private static DataSet<Tuple2<String, String>> getVisitsDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for visits relation (URL, Date) if (params.has("visits")) { return env.readCsvFile(params.get("visits")) .fieldDelimiter("|") .includeFields("011000000") .types(String.class, String.class); } else { System.out.println("Executing WebLogAnalysis example with default visits data set."); System.out.println("Use --visits to specify file input."); return WebLogData.getVisitDataSet(env); } }
private static DataSet<Tuple3<String, String, Integer>> getUserSongTripletsData(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(userSongTripletsInputPath) .lineDelimiter("\n").fieldDelimiter("\t") .types(String.class, String.class, Integer.class); } else { return MusicProfilesData.getUserSongTriplets(env); } }
private static DataSet<Edge<Long, Double>> getEdgeDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .fieldDelimiter("\t") .lineDelimiter("\n") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .lineDelimiter("\n") .fieldDelimiter("\t") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .lineDelimiter("\n") .fieldDelimiter("\t") .ignoreComments("%") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }