@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreachPartition() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreachPartition(iter -> { while (iter.hasNext()) { iter.next(); accum.add(1); } }); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
@Test public void foreach() { LongAccumulator accum = sc.sc().longAccumulator(); JavaRDD<String> rdd = sc.parallelize(Arrays.asList("Hello", "World")); rdd.foreach(s -> accum.add(1)); assertEquals(2, accum.value().intValue()); }
final LongAccumulator bytesWritten = sc.sc().longAccumulator();
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
@Test public void testForeach() { LongAccumulator accum = jsc.sc().longAccumulator(); List<String> data = Arrays.asList("a", "b", "c"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); ds.foreach((ForeachFunction<String>) s -> accum.add(1)); Assert.assertEquals(3, accum.value().intValue()); }
public void registerAccumulators(@NonNull final SparkContext sparkContext) { this.registeredKeys.entrySet().forEach( entry -> { entry.setValue(Optional.of(sparkContext.longAccumulator())); } ); }
public static LongAccumulator getInstance(JavaSparkContext jsc) { if (instance == null) { synchronized (RecordCounter.class) { if (instance == null) { System.out.println("*** Initializing RecordCounter"); instance = jsc.sc().longAccumulator("RecordCounter"); } } } return instance; } }
private void logWriteMetrics(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent() && this.dataFeedMetrics.isPresent()) { final LongAccumulator totalCount = writesStatuses.get().rdd().sparkContext().longAccumulator(); final LongAccumulator errorCount = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach(writeStatus -> { errorCount.add(writeStatus.getFailedRecords().size()); totalCount.add(writeStatus.getTotalRecords()); }); this.dataFeedMetrics.get().createLongMetric(DataFeedMetricNames.ERROR_ROWCOUNT, errorCount.value(), this.dataFeedMetricsTags); this.dataFeedMetrics.get().createLongMetric(DataFeedMetricNames.OUTPUT_ROWCOUNT, totalCount.value() - errorCount.value(), this.dataFeedMetricsTags); } }
public Accumulators(Set<AccumulatorRequest> requests) { for (AccumulatorRequest request : requests) { String name = request.getName(); Class<?> clazz = request.getClazz(); if (clazz == Long.class) { LongAccumulator acc = Contexts.getSparkSession().sparkContext().longAccumulator(name); longAccumulators.put(name, acc); } if (clazz == Double.class) { DoubleAccumulator acc = Contexts.getSparkSession().sparkContext().doubleAccumulator(name); doubleAccumulators.put(name, acc); } LOG.info("Processed accumulator request: " + name); } }
/** * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}. * As a part of {@link SinkStat} computation; it will compute avg record size for current run. * @param writesStatuses */ private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent()) { final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach( writeStatus -> { final long writeBytes = writeStatus.getStat().getTotalWriteBytes(); final long numInserts = writeStatus.getStat().getNumWrites() - writeStatus.getStat().getNumUpdateWrites(); if (writeBytes > 0 && numInserts > 0) { avgRecordSizeCounter.add(writeBytes / numInserts); } } ); final long avgRecordSize = (int) avgRecordSizeCounter.avg(); if (avgRecordSize > 0) { log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize); this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize)); } } this.sinkStatMgr.persist(); }
final LongAccumulator bytesWritten = sc.sc().longAccumulator();
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }
@SuppressWarnings("unchecked") @Test public void testForeachRDD() { final LongAccumulator accumRdd = ssc.sparkContext().sc().longAccumulator(); final LongAccumulator accumEle = ssc.sparkContext().sc().longAccumulator(); List<List<Integer>> inputData = Arrays.asList( Arrays.asList(1,1,1), Arrays.asList(1,1,1)); JavaDStream<Integer> stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); JavaTestUtils.attachTestOutputStream(stream.count()); // dummy output stream.foreachRDD(rdd -> { accumRdd.add(1); rdd.foreach(i -> accumEle.add(1)); }); // This is a test to make sure foreachRDD(VoidFunction2) can be called from Java stream.foreachRDD((rdd, time) -> {}); JavaTestUtils.runStreams(ssc, 2, 2); Assert.assertEquals(2, accumRdd.value().intValue()); Assert.assertEquals(6, accumEle.value().intValue()); }