Refine search
private static DataSet<Tuple3<String, String, Integer>> getUserSongTripletsData(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(userSongTripletsInputPath) .lineDelimiter("\n").fieldDelimiter("\t") .types(String.class, String.class, Integer.class); } else { return MusicProfilesData.getUserSongTriplets(env); } }
private static DataSet<Order> getOrdersDataSet(ExecutionEnvironment env, String ordersPath) { return env.readCsvFile(ordersPath) .fieldDelimiter("|") .includeFields("110010010") .tupleType(Order.class); }
public GraphCsvReader(Path vertexPath, Path edgePath, ExecutionEnvironment context) { this.vertexPath = vertexPath; this.edgePath = edgePath; this.vertexReader = new CsvReader(vertexPath, context); this.edgeReader = new CsvReader(edgePath, context); this.mapper = null; this.executionContext = context; }
private static DataSet<Tuple2<Integer, String>> getNationsDataSet(ExecutionEnvironment env, String nationPath) { return env.readCsvFile(nationPath) .fieldDelimiter("|") .includeFields("1100") .types(Integer.class, String.class); } }
public static void tcph3(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(Integer.parseInt(args[0])); //order id, order status, order data, order prio, ship prio DataSet<Tuple5<Long, String, String, String, Integer>> orders = env.readCsvFile(args[1]) .fieldDelimiter("|").lineDelimiter("\n") .includeFields("101011001").types(Long.class, String.class, String.class, String.class, Integer.class) .name(ORDERS); //order id, extended price DataSet<Tuple2<Long, Double>> lineItems = env.readCsvFile(args[2]) .fieldDelimiter("|").lineDelimiter("\n") .includeFields("100001").types(Long.class, Double.class) .name(LINEITEM); DataSet<Tuple2<Long, Integer>> filterO = orders.flatMap(new FilterO()).name(MAPPER_NAME); DataSet<Tuple3<Long, Integer, Double>> joinLiO = filterO.join(lineItems).where(0).equalTo(0).with(new JoinLiO()).name(JOIN_NAME); DataSet<Tuple3<Long, Integer, Double>> aggLiO = joinLiO.groupBy(0, 1).reduceGroup(new AggLiO()).name(REDUCE_NAME); aggLiO.writeAsCsv(args[3], "\n", "|").name(SINK); env.execute(); }
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); if (params.has("input")) { data = env.readCsvFile(params.get("input")) .fieldDelimiter(" ") .includeFields(true, true) .pojoType(Data.class); } else { System.out.println("Executing LinearRegression example with default input data set.");
private static SingleOutputStreamOperator<Order> getOrdersDataStream(StreamExecutionEnvironment env, String ordersPath, boolean useSourceV2) { final CsvReader csvReader = new CsvReader(ordersPath, ExecutionEnvironment.getExecutionEnvironment()) .fieldDelimiter("|") .includeFields("110010010"); final TupleCsvInputFormat<Order> inputFormat = csvReader.generateTupleCsvInputFormat(Order.class); if (useSourceV2) { return env.createInputV2(inputFormat, inputFormat.getTupleTypeInfo(), "Order source v2"); } else { return env.createInput(inputFormat, inputFormat.getTupleTypeInfo(), "Order source v1"); } }
@SuppressWarnings("unchecked") private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("input")) { return env.readCsvFile(params.get("input")) .fieldDelimiter(";") .pojoType(StringTriple.class); } else { System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set."); System.out.println("Use --input to specify file input."); return env.fromCollection(getExampleInputTuples()); } }
@Test(expected = NullPointerException.class) public void testPOJOTypeWithoutFieldsOrder() throws Exception { final String inputData = ""; final String dataPath = createInputData(inputData); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.readCsvFile(dataPath).pojoType(POJOItem.class, null); }
private static DataSet<Centroid> getCentroidDataSet(ParameterTool params, ExecutionEnvironment env) { DataSet<Centroid> centroids; if (params.has("centroids")) { centroids = env.readCsvFile(params.get("centroids")) .fieldDelimiter(" ") .pojoType(Centroid.class, "id", "x", "y"); } else { System.out.println("Executing K-Means example with default centroid data set."); System.out.println("Use --centroids to specify file input."); centroids = KMeansData.getDefaultCentroidDataSet(env); } return centroids; }
private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration) if (params.has("ranks")) { return env.readCsvFile(params.get("ranks")) .fieldDelimiter("|") .types(Integer.class, String.class, Integer.class); } else { System.out.println("Executing WebLogAnalysis example with default ranks data set."); System.out.println("Use --ranks to specify file input."); return WebLogData.getRankDataSet(env); } }
@Test public void testPOJOTypeWithFieldsOrderAndFieldsSelection() throws Exception { final String inputData = "3,2.20,ABC\n5,5.1,DEF\n1,3.30,DEF\n10,3.30,GHI"; final String dataPath = createInputData(inputData); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<POJOItem> data = env.readCsvFile(dataPath).includeFields(true, false, true).pojoType(POJOItem.class, new String[]{"f2", "f1"}); List<POJOItem> result = data.collect(); expected = "ABC,3,0.00\nDEF,5,0.00\nDEF,1,0.00\nGHI,10,0.00"; compareResultAsText(result, expected); }
@Test public void testValueTypes() throws Exception { final String inputData = "ABC,true,1,2,3,4,5.0,6.0\nBCD,false,1,2,3,4,5.0,6.0"; final String dataPath = createInputData(inputData); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple8<StringValue, BooleanValue, ByteValue, ShortValue, IntValue, LongValue, FloatValue, DoubleValue>> data = env.readCsvFile(dataPath).types(StringValue.class, BooleanValue.class, ByteValue.class, ShortValue.class, IntValue.class, LongValue.class, FloatValue.class, DoubleValue.class); List<Tuple8<StringValue, BooleanValue, ByteValue, ShortValue, IntValue, LongValue, FloatValue, DoubleValue>> result = data.collect(); expected = inputData; compareResultAsTuples(result, expected); }
private static DataSet<Vertex<Long, Point>> getVerticesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(verticesInputPath) .lineDelimiter("\n") .types(Long.class, Double.class, Double.class) .map(new MapFunction<Tuple3<Long, Double, Double>, Vertex<Long, Point>>() { @Override public Vertex<Long, Point> map(Tuple3<Long, Double, Double> value) throws Exception { return new Vertex<>(value.f0, new Point(value.f1, value.f2)); } }); } else { return EuclideanGraphData.getDefaultVertexDataSet(env); } }
/** * Configures which fields of the CSV file containing edges data should be included and which should be skipped. The * parser will look at the first {@code n} fields, where {@code n} is the length of the boolean * array. The parser will skip over all fields where the boolean value at the corresponding position * in the array is {@code false}. The result contains the fields where the corresponding position in * the boolean array is {@code true}. * The number of fields in the result is consequently equal to the number of times that {@code true} * occurs in the fields array. * * @param edgeFields The array of flags that describes which fields are to be included from the CSV file for edges. * @return The GraphCSVReader instance itself, to allow for fluent function chaining. */ public GraphCsvReader includeFieldsEdges(boolean ... edgeFields) { this.edgeReader.includeFields(edgeFields); return this; }
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("vertices")) { return env.readCsvFile(params.get("vertices")).types(Long.class).map( new MapFunction<Tuple1<Long>, Long>() { public Long map(Tuple1<Long> value) { return value.f0; } }); } else { System.out.println("Executing Connected Components example with default vertices data set."); System.out.println("Use --vertices to specify file input."); return ConnectedComponentsData.getDefaultVertexDataSet(env); } }
@Test(expected = IllegalArgumentException.class) public void testWithInvalidValueType2() throws Exception { CsvReader reader = getCsvReader(); // CsvReader doesn't support custom Value type reader.types(ValueItem.class); }
private static CsvReader getCsvReader() { return new CsvReader("/some/none/existing/path", ExecutionEnvironment.createLocalEnvironment(1)); }
/** *Configures the Delimiter that separates rows for the CSV reader used to read the edges * ({@code '\n'}) is used by default. * *@param delimiter The delimiter that separates the rows. * @return The GraphCSVReader instance itself, to allow for fluent function chaining. */ public GraphCsvReader lineDelimiterEdges(String delimiter) { edgeReader.lineDelimiter(delimiter); return this; }
/** *Configures the Delimiter that separates fields in a row for the CSV reader used to read the edges * ({@code ','}) is used by default. * * @param delimiter The delimiter that separates the fields in a row. * @return The GraphCsv reader instance itself, to allow for fluent function chaining. */ public GraphCsvReader fieldDelimiterEdges(String delimiter) { this.edgeReader.fieldDelimiter(delimiter); return this; }