public static void main(String[] args) throws Exception { if (args.length != 2) { throw new Exception("Usage BasicFlatMap sparkMaster inputFile"); } JavaSparkContext sc = new JavaSparkContext( args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<String> rdd = sc.textFile(args[1]); JavaRDD<String> words = rdd.flatMap( new FlatMapFunction<String, String>() { public Iterable<String> call(String x) { return Arrays.asList(x.split(" ")); }}); Map<String, Long> result = words.countByValue(); for (Entry<String, Long> entry: result.entrySet()) { System.out.println(entry.getKey() + ":" + entry.getValue()); } } }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
public static void main(String[] args) throws Exception { String master = args[0]; JavaSparkContext sc = new JavaSparkContext( master, "wordcount", System.getenv("SPARK_HOME"), System.getenv("JARS")); JavaRDD<String> rdd = sc.textFile(args[1]); JavaPairRDD<String, Integer> counts = rdd.flatMap( new FlatMapFunction<String, String>() { public Iterable<String> call(String x) { return Arrays.asList(x.split(" ")); }}).mapToPair(new PairFunction<String, String, Integer>(){ public Tuple2<String, Integer> call(String x){ return new Tuple2(x, 1); }}).reduceByKey(new Function2<Integer, Integer, Integer>(){ public Integer call(Integer x, Integer y){ return x+y;}}); counts.saveAsTextFile(args[2]); } }
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
JavaRDD<String> callSigns = rdd.flatMap( new FlatMapFunction<String, String>() { public Iterable<String> call(String line) { if (line.equals("")) { String distScriptName = "finddistance.R"; sc.addFile(distScript); JavaRDD<String> pipeInputs = contactsContactLists.values().map(new VerifyCallLogs()).flatMap( new FlatMapFunction<CallLog[], String>() { public Iterable<String> call(CallLog[] calls) { ArrayList<String> latLons = new ArrayList<String>();
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
JavaRDD<String> words = lines.flatMap((String s) -> { return Util.convertStringToWords(s, N).iterator(); });
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); Assert.assertEquals("Hello", words.first()); Assert.assertEquals(11, words.count()); JavaPairRDD<String, String> pairs = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs2 = new LinkedList<>(); for (String word : s.split(" ")) { pairs2.add(new Tuple2<>(word, word)); } return pairs2.iterator(); }); Assert.assertEquals(new Tuple2<>("Hello", "Hello"), pairs.first()); Assert.assertEquals(11, pairs.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); Assert.assertEquals(5.0, doubles.first(), 0.01); Assert.assertEquals(11, pairs.count()); }
JavaRDD<String> words = lines.flatMap((String line) -> Util.convertLineToWords(line, N).iterator());
@Test public void flatMap() { JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello World!", "The quick brown fox jumps over the lazy dog.")); JavaRDD<String> words = rdd.flatMap(x -> Arrays.asList(x.split(" ")).iterator()); assertEquals("Hello", words.first()); assertEquals(11, words.count()); JavaPairRDD<String, String> pairsRDD = rdd.flatMapToPair(s -> { List<Tuple2<String, String>> pairs = new LinkedList<>(); for (String word : s.split(" ")) { pairs.add(new Tuple2<>(word, word)); } return pairs.iterator(); } ); assertEquals(new Tuple2<>("Hello", "Hello"), pairsRDD.first()); assertEquals(11, pairsRDD.count()); JavaDoubleRDD doubles = rdd.flatMapToDouble(s -> { List<Double> lengths = new LinkedList<>(); for (String word : s.split(" ")) { lengths.add((double) word.length()); } return lengths.iterator(); }); assertEquals(5.0, doubles.first(), 0.01); assertEquals(11, pairsRDD.count()); }
JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
lines.flatMap((String s) -> Util.convertToPairOfWordAndDocument(s, N).iterator() );
JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
lines.flatMap(new FlatMapFunction<
public void registerTable(EntityMetadata m, SparkClient sparkClient) { final Class clazz = m.getEntityClazz(); SparkContext sc = sparkClient.sparkContext; Configuration config = new Configuration(); config.set( "mongo.input.uri", buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(), m.getTableName())); JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class, Object.class, BSONObject.class).toJavaRDD(); JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>() { @Override public Iterable<Object> call(Tuple2<Object, BSONObject> arg) { BSONObject obj = arg._2(); Object javaObject = generateJavaObjectFromBSON(obj, clazz); return Arrays.asList(javaObject); } }); sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName()); }
.flatMap(new FlatMapFunction<String, Row>() { private static final long serialVersionUID = 5481855142090322683L;
/** * * @param schema * @param data * @return */ public static DataQualityAnalysis analyzeQualitySequence(Schema schema, JavaRDD<List<List<Writable>>> data) { JavaRDD<List<Writable>> fmSeq = data.flatMap(new SequenceFlatMapFunction()); return analyzeQuality(schema, fmSeq); }
final JavaRDD<Traverser.Admin<Object>> nextRDD = inputRDD.values() .flatMap(vertexWritable -> { if (identityTraversal) // g.V.count()-style (identity) return IteratorUtils.of(traversal.getTraverserGenerator().generate(vertexWritable.get(), (Step) graphStep, 1l));
@Override public <R> SparkStream<R> flatMap(@NonNull SerializableFunction<? super T, Stream<? extends R>> mapper) { return new SparkStream<>(rdd.flatMap(t -> { Configurator.INSTANCE.configure(configBroadcast.value()); return Cast.as(mapper.apply(t).iterator()); })); }