public Void call(JavaPairRDD<Integer, Long> rdd) { currentResponseCodeCounts = rdd.take(100); return null; }});
public Void call(JavaRDD<ApacheAccessLog> accessLogs) { Tuple4<Long, Long, Long, Long> contentSizeStats = Functions.contentSizeStats(accessLogs); List<Tuple2<Integer, Long>> responseCodeToCount = Functions.responseCodeCount(accessLogs) .take(100); JavaPairRDD<String, Long> ipAddressCounts = Functions.ipAddressCount(accessLogs); List<String> ip = Functions.filterIPAddress(ipAddressCounts) .take(100); Object ordering = Ordering.natural(); Comparator<Long> cmp = (Comparator<Long>)ordering; List<Tuple2<String, Long>> topEndpoints = Functions.endpointCount(accessLogs) .top(10, new Functions.ValueComparator<String, Long>(cmp)); logStatistics = new LogStatistics(contentSizeStats, responseCodeToCount, ip, topEndpoints); return null; }}); }
@Override public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get()); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get()); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
@Override public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
return Util.closestPoint(in._2(), centroids) == index; }).take(numArticles);
@Override public Void call(JavaPairRDD<Integer, String> rdd) { String out = "\nSpark, Top 10 entries for stream id: " + rdd.id() + "\n"; for (Tuple2<Integer, String> t : rdd.take(10)) { out = out + t.toString() + "\n"; } System.out.println(out); return null; } });
@Override public Iterator<Vertex> head(final String location, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get()); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readGraphRDD(configuration, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> tuple._2().get()); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
@Override public <K, V> Iterator<KeyValue<K, V>> head(final String location, final String memoryKey, final Class readerClass, final int totalLines) { final Configuration configuration = new BaseConfiguration(); configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, location); configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, readerClass.getCanonicalName()); try { if (InputRDD.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(((InputRDD) readerClass.getConstructor().newInstance()).readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } else if (InputFormat.class.isAssignableFrom(readerClass)) { return IteratorUtils.map(new InputFormatRDD().readMemoryRDD(configuration, memoryKey, new JavaSparkContext(Spark.getContext())).take(totalLines).iterator(), tuple -> new KeyValue(tuple._1(), tuple._2())); } } catch (final Exception e) { throw new IllegalArgumentException(e.getMessage(), e); } throw new IllegalArgumentException("The provided parserClass must be an " + InputFormat.class.getCanonicalName() + " or an " + InputRDD.class.getCanonicalName() + ": " + readerClass.getCanonicalName()); }
for (int i = 0; i < centroids.size(); i++) { final int index = i; List<Tuple2<String, Vector>> samples = data.filter((Function<Tuple2<String, Vector>, Boolean>) in -> Util.closestPoint(in._2(), centroids) == index).take(numArticles);
List<Tuple2<CategorySortKey,String>> top10CategoryList=sortedCategoryRDD.take(10); List<Top10Category> top10Categories=new ArrayList<Top10Category>(); for(Tuple2<CategorySortKey,String> tuple2:top10CategoryList)
for (Tuple2<Integer, String> t: rdd.take(25)) out.append(t.toString()).append("\n");
sortedCategoryCountRDD.take(10); for(Tuple2<CategorySortKey, String> tuple : top10CategoryList) { String countInfo = tuple._2;