Refine search
private static DataSet<Tuple2<Integer, String>> getNationsDataSet(ExecutionEnvironment env, String nationPath) { return env.readCsvFile(nationPath) .fieldDelimiter("|") .includeFields("1100") .types(Integer.class, String.class); } }
private static DataSet<Tuple3<Integer, Integer, String>> getOrdersDataSet(ExecutionEnvironment env, String ordersPath) { return env.readCsvFile(ordersPath) .fieldDelimiter("|") .includeFields("110010000") .types(Integer.class, Integer.class, String.class); }
private static DataSet<Tuple5<Integer, String, String, Integer, Double>> getCustomerDataSet(ExecutionEnvironment env, String customerPath) { return env.readCsvFile(customerPath) .fieldDelimiter("|") .includeFields("11110100") .types(Integer.class, String.class, String.class, Integer.class, Double.class); }
private static DataSet<Tuple4<Integer, Double, Double, String>> getLineitemDataSet(ExecutionEnvironment env, String lineitemPath) { return env.readCsvFile(lineitemPath) .fieldDelimiter("|") .includeFields("1000011010000000") .types(Integer.class, Double.class, Double.class, String.class); }
private static DataSet<Tuple2<String, String>> getDocumentsDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for documents relation (URL, Doc-Text) if (params.has("documents")) { return env.readCsvFile(params.get("documents")) .fieldDelimiter("|") .types(String.class, String.class); } else { System.out.println("Executing WebLogAnalysis example with default documents data set."); System.out.println("Use --documents to specify file input."); return WebLogData.getDocumentDataSet(env); } }
private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration) if (params.has("ranks")) { return env.readCsvFile(params.get("ranks")) .fieldDelimiter("|") .types(Integer.class, String.class, Integer.class); } else { System.out.println("Executing WebLogAnalysis example with default ranks data set."); System.out.println("Use --ranks to specify file input."); return WebLogData.getRankDataSet(env); } }
private static DataSet<Tuple2<Long, Long>> getEdgeDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("edges")) { return env.readCsvFile(params.get("edges")).fieldDelimiter(" ").types(Long.class, Long.class); } else { System.out.println("Executing Connected Components example with default edges data set."); System.out.println("Use --edges to specify file input."); return ConnectedComponentsData.getDefaultEdgeDataSet(env); } } }
private static DataSet<Tuple3<String, String, Integer>> getUserSongTripletsData(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(userSongTripletsInputPath) .lineDelimiter("\n").fieldDelimiter("\t") .types(String.class, String.class, Integer.class); } else { return MusicProfilesData.getUserSongTriplets(env); } }
private static DataSet<Long> getVertexDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("vertices")) { return env.readCsvFile(params.get("vertices")).types(Long.class).map( new MapFunction<Tuple1<Long>, Long>() { public Long map(Tuple1<Long> value) { return value.f0; } }); } else { System.out.println("Executing Connected Components example with default vertices data set."); System.out.println("Use --vertices to specify file input."); return ConnectedComponentsData.getDefaultVertexDataSet(env); } }
private static DataSet<Vertex<Long, Point>> getVerticesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(verticesInputPath) .lineDelimiter("\n") .types(Long.class, Double.class, Double.class) .map(new MapFunction<Tuple3<Long, Double, Double>, Vertex<Long, Point>>() { @Override public Vertex<Long, Point> map(Tuple3<Long, Double, Double> value) throws Exception { return new Vertex<>(value.f0, new Point(value.f1, value.f2)); } }); } else { return EuclideanGraphData.getDefaultVertexDataSet(env); } }
private static DataSet<Tuple2<String, String>> getVisitsDataSet(ExecutionEnvironment env, ParameterTool params) { // Create DataSet for visits relation (URL, Date) if (params.has("visits")) { return env.readCsvFile(params.get("visits")) .fieldDelimiter("|") .includeFields("011000000") .types(String.class, String.class); } else { System.out.println("Executing WebLogAnalysis example with default visits data set."); System.out.println("Use --visits to specify file input."); return WebLogData.getVisitDataSet(env); } }
private static DataSet<Tuple2<Long, Long>> getLinksDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("links")) { return env.readCsvFile(params.get("links")) .fieldDelimiter(" ") .lineDelimiter("\n") .types(Long.class, Long.class); } else { System.out.println("Executing PageRank example with default links data set."); System.out.println("Use --links to specify file input."); return PageRankData.getDefaultEdgeDataSet(env); } } }
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .lineDelimiter("\n") .types(Long.class, Long.class) .map(new MapFunction<Tuple2<Long, Long>, Edge<Long, Double>>() { @Override public Edge<Long, Double> map(Tuple2<Long, Long> tuple2) throws Exception { return new Edge<>(tuple2.f0, tuple2.f1, 0.0); } }); } else { return EuclideanGraphData.getDefaultEdgeDataSet(env); } } }
private static DataSet<Edge<Long, Double>> getEdgeDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .fieldDelimiter("\t") .lineDelimiter("\n") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .lineDelimiter("\n") .fieldDelimiter("\t") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }
private static DataSet<Long> getPagesDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("pages")) { return env.readCsvFile(params.get("pages")) .fieldDelimiter(" ") .lineDelimiter("\n") .types(Long.class) .map(new MapFunction<Tuple1<Long>, Long>() { @Override public Long map(Tuple1<Long> v) { return v.f0; } }); } else { System.out.println("Executing PageRank example with default pages data set."); System.out.println("Use --pages to specify file input."); return PageRankData.getDefaultPagesDataSet(env); } }
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) { if (fileOutput) { return env.readCsvFile(edgesInputPath) .lineDelimiter("\n") .fieldDelimiter("\t") .ignoreComments("%") .types(Long.class, Long.class, Double.class) .map(new Tuple3ToEdgeMap<>()); } else { return SingleSourceShortestPathsData.getDefaultEdgeDataSet(env); } }
@Test public void testValueTypes() throws Exception { final String inputData = "ABC,true,1,2,3,4,5.0,6.0\nBCD,false,1,2,3,4,5.0,6.0"; final String dataPath = createInputData(inputData); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple8<StringValue, BooleanValue, ByteValue, ShortValue, IntValue, LongValue, FloatValue, DoubleValue>> data = env.readCsvFile(dataPath).types(StringValue.class, BooleanValue.class, ByteValue.class, ShortValue.class, IntValue.class, LongValue.class, FloatValue.class, DoubleValue.class); List<Tuple8<StringValue, BooleanValue, ByteValue, ShortValue, IntValue, LongValue, FloatValue, DoubleValue>> result = data.collect(); expected = inputData; compareResultAsTuples(result, expected); }
@SuppressWarnings("serial") public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>"); return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0]) .fieldDelimiter("\t").types(Long.class, Long.class); DataSet<Tuple2<Long, Long>> result = input.map( new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { public Tuple2<Long, Long> map(Tuple2<Long, Long> value){ return new Tuple2<Long, Long>(value.f0, value.f1 + 1); } }); result.writeAsCsv(args[1], "\n", "\t"); env.execute(); }
@SuppressWarnings("serial") public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: TestOptimizerPlan <input-file-path> <output-file-path>"); return; } ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile(args[0]) .fieldDelimiter("\t").types(Long.class, Long.class); DataSet<Tuple2<Long, Long>> result = input.map( new MapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>() { public Tuple2<Long, Long> map(Tuple2<Long, Long> value){ return new Tuple2<Long, Long>(value.f0, value.f1 + 1); } }); result.writeAsCsv(args[1], "\n", "\t"); env.execute(); }