How to use
countByValue
method
in
org.apache.spark.api.java.JavaRDD

Best Java code snippets using org.apache.spark.api.java.JavaRDD.countByValue (Showing top 7 results out of 315)

/**
 * @param trainPointData data to cluster
 * @param model trained KMeans Model
 * @return map of ClusterId, count of points associated with the clusterId
 */
private static Map<Integer,Long> fetchClusterCountsFromModel(JavaRDD<? extends Vector> trainPointData,
                               KMeansModel model) {
  return trainPointData.map(model::predict).countByValue();
}

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

 public static void main(String[] args) throws Exception {

    if (args.length != 2) {
   throw new Exception("Usage BasicFlatMap sparkMaster inputFile");
    }

  JavaSparkContext sc = new JavaSparkContext(
   args[0], "basicflatmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
  JavaRDD<String> rdd = sc.textFile(args[1]);
  JavaRDD<String> words = rdd.flatMap(
   new FlatMapFunction<String, String>() { public Iterable<String> call(String x) {
     return Arrays.asList(x.split(" "));
    }});
  Map<String, Long> result = words.countByValue();
  for (Entry<String, Long> entry: result.entrySet()) {
   System.out.println(entry.getKey() + ":" + entry.getValue());
  }
 }
}

@Test
public void approximateResults() {
 JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 1, 2, 3, 5, 8, 13));
 Map<Integer, Long> countsByValue = rdd.countByValue();
 assertEquals(2, countsByValue.get(1).longValue());
 assertEquals(1, countsByValue.get(13).longValue());
 PartialResult<Map<Integer, BoundedDouble>> approx = rdd.countByValueApprox(1);
 Map<Integer, BoundedDouble> finalValue = approx.getFinalValue();
 assertEquals(2.0, finalValue.get(1).mean(), 0.01);
 assertEquals(1.0, finalValue.get(13).mean(), 0.01);
}

@Override
public Map<T, Long> countByValue() {
 return rdd.countByValue();
}

public NGramBuilder(String regexpFileName, String inputFileName, String unigramFileName, String bigramFileName) {
  JavaRDD<String> lines = jsc.textFile(inputFileName).filter(new InvalidLineFilter());
  System.out.println("#(lines) = " + lines.count());
  // create unigrams and save them
  //
  converter = new Converter(regexpFileName);
  Map<String, Long> unigrams = lines.flatMap(new UnigramFunction()).countByValue();
  List<Tuple2<String, Long>> tuples = new ArrayList<Tuple2<String, Long>>(unigrams.size());
  for (String word : unigrams.keySet()) {
    Long f = unigrams.get(word);
    if (f >= 2)
      tuples.add(new Tuple2<String, Long>(word, f));
  }
  
  JavaPairRDD<String, Long> jprdd = jsc.parallelizePairs(tuples);
  jprdd.saveAsTextFile(unigramFileName, GzipCodec.class);
  
  // create bigrams and save them
  Map<Tuple2<String, String>, Long> bigrams = lines.flatMap(new BigramFunction()).countByValue();
  tuples = new ArrayList<Tuple2<String, Long>>(bigrams.size());
  for (Tuple2<String, String> pair : bigrams.keySet()) {
    Long f = bigrams.get(pair);
    if (f >= 2)
      tuples.add(new Tuple2<String, Long>(pair._1() + ',' + pair._2(), f));
  }
  jprdd = jsc.parallelizePairs(tuples);
  jprdd.saveAsTextFile(bigramFileName, GzipCodec.class);		
}

Popular methods of JavaRDD

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
onRequestPermissionsResult (Fragment)
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Best IntelliJ plugins

How to use countByValuemethodin org.apache.spark.api.java.JavaRDD

Best Java code snippets using org.apache.spark.api.java.JavaRDD.countByValue (Showing top 7 results out of 315)

How to use
countByValue
method
in
org.apache.spark.api.java.JavaRDD