FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers"); MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build(); MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build(); .setBroadcastVariable("centers", iter.getPartialSolution()) .input(dataPoints) .name("Find Nearest Centers") .build();
.input(docs) .name("Filter Docs") .build(); filterDocs.getCompilerHints().setFilterFactor(0.15f); .input(ranks) .name("Filter Ranks") .build(); filterRanks.getCompilerHints().setFilterFactor(0.25f); .input(visits) .name("Filter Visits") .build(); filterVisits.getCompilerHints().setFilterFactor(0.2f);
FileDataSource sourceNode = new FileDataSource(new TextInputFormat(), IN_FILE, "Input Lines"); MapOperator mapNode = MapOperator.builder(new TokenizeLine()) .input(sourceNode) .name("Tokenize Lines") .build(); ReduceOperator reduceNode = ReduceOperator.builder(new CountWords(), StringValue.class, 0) .input(mapNode)
FileDataSource clustersSource = new FileDataSource(new CsvInputFormat('|', IntValue.class, DoubleValue.class, DoubleValue.class, DoubleValue.class), clusterInput, "Centers"); MapOperator dataPoints = MapOperator.builder(new PointBuilder()).name("Build data points").input(pointsSource).build(); MapOperator clusterPoints = MapOperator.builder(new PointBuilder()).name("Build cluster points").input(clustersSource).build(); .setBroadcastVariable("centers", clusterPoints) .input(dataPoints) .name("Find Nearest Centers") .build();