org.apache.spark.api.java.JavaPairRDD.take java code examples

public Void call(JavaPairRDD<Integer, Long> rdd) {
 currentResponseCodeCounts = rdd.take(100);
 return null;
}});

   public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
  Tuple4<Long, Long, Long, Long> contentSizeStats =
    Functions.contentSizeStats(accessLogs);
  List<Tuple2<Integer, Long>> responseCodeToCount =
    Functions.responseCodeCount(accessLogs)
    .take(100);
  JavaPairRDD<String, Long> ipAddressCounts =
    Functions.ipAddressCount(accessLogs);
  List<String> ip = Functions.filterIPAddress(ipAddressCounts)
    .take(100);
  Object ordering = Ordering.natural();
  Comparator<Long> cmp = (Comparator<Long>)ordering;
  List<Tuple2<String, Long>> topEndpoints =
    Functions.endpointCount(accessLogs)
   .top(10, new Functions.ValueComparator<String, Long>(cmp));
  logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount,
    ip, topEndpoints);
  return null;
   }});
}

@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

    return Util.closestPoint(in._2(), centroids) == index;
}).take(numArticles);

  @Override
  public Void call(JavaPairRDD<Integer, String> rdd) {
    String out = "\nSpark, Top 10 entries for stream id: " + rdd.id() + "\n";
    for (Tuple2<Integer, String> t : rdd.take(10)) {
      out = out + t.toString() + "\n";
    }
    System.out.println(out);
    return null;
  }
});

@Override
public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get());
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

@Override
public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) {
  final Configuration configuration = new BaseConfiguration();
  configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location);
  configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName());
  try {
    if (InputRDD.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    } else if (InputFormat.class.isAssignableFrom(readerClass)) {
      return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2()));
    }
  } catch (final Exception e) {
    throw new IllegalArgumentException(e.getMessage(), e);
  }
  throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName());
}

for (int i = 0; i < centroids.size(); i++) {
  final int index = i;
  List<Tuple2<String, Vector>> samples = data.filter((Function<Tuple2<String, Vector>, Boolean>) in -> Util.closestPoint(in._2(), centroids) == index).take(numArticles);

List<Tuple2<CategorySortKey,String>> top10CategoryList=sortedCategoryRDD.take(10);
List<Top10Category> top10Categories=new ArrayList<Top10Category>();
for(Tuple2<CategorySortKey,String> tuple2:top10CategoryList)

for (Tuple2<Integer, String> t: rdd.take(25)) out.append(t.toString()).append("\n");

    sortedCategoryCountRDD.take(10);
for(Tuple2<CategorySortKey, String> tuple : top10CategoryList) {
  String countInfo = tuple._2;

Popular methods of JavaPairRDD

Popular in Java

Making http post requests using okhttp
setScale (BigDecimal)
setContentView (Activity)
startActivity (Activity)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
Collectors (java.util.stream)
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Top plugins for Android Studio

How to use takemethodin org.apache.spark.api.java.JavaPairRDD

Best Java code snippets using org.apache.spark.api.java.JavaPairRDD.take (Showing top 12 results out of 315)

How to use
take
method
in
org.apache.spark.api.java.JavaPairRDD