org.apache.spark.sql.Dataset.count java code examples

LOG.debug("Found {} telemetry record(s)", telemetry.cache().count());
LOG.debug("Generated {} message route(s)", routes.cache().count());
    .groupByKey(new GroupByPeriodFunction(profilerProps), Encoders.STRING())
    .mapGroups(new ProfileBuilderFunction(profilerProps, globals), Encoders.bean(ProfileMeasurementAdapter.class));
LOG.debug("Produced {} profile measurement(s)", measurements.cache().count());

@Test
public void testJsonRDDToDataFrame() {
 // This is a test for the deprecated API in SPARK-15615.
 JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}"));
 Dataset<Row> df = spark.read().json(rdd);
 Assert.assertEquals(1L, df.count());
 Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0));
}

@Test
public void testJsonRDDToDataFrame() {
 // This is a test for the deprecated API in SPARK-15615.
 JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}"));
 Dataset<Row> df = spark.read().json(rdd);
 Assert.assertEquals(1L, df.count());
 Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0));
}

public Long size() {
  return this.dataset.count();
}

@Test
public void testJsonRDDToDataFrame() {
 // This is a test for the deprecated API in SPARK-15615.
 JavaRDD<String> rdd = jsc.parallelize(Arrays.asList("{\"a\": 2}"));
 Dataset<Row> df = spark.read().json(rdd);
 Assert.assertEquals(1L, df.count());
 Assert.assertEquals(2L, df.collectAsList().get(0).getLong(0));
}

@Test
public void testTextLoad() {
 Dataset<String> ds1 = spark.read().textFile(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, ds1.count());
 Dataset<String> ds2 = spark.read().textFile(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, ds2.count());
}

@Test
public void testTextLoad() {
 Dataset<String> ds1 = spark.read().textFile(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, ds1.count());
 Dataset<String> ds2 = spark.read().textFile(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, ds2.count());
}

@Test
public void testTextLoad() {
 Dataset<String> ds1 = spark.read().textFile(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, ds1.count());
 Dataset<String> ds2 = spark.read().textFile(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, ds2.count());
}

@Test
public void testGenericLoad() {
 Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, df1.count());
 Dataset<Row> df2 = spark.read().format("text").load(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, df2.count());
}

@Test
public void testGenericLoad() {
 Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, df1.count());
 Dataset<Row> df2 = spark.read().format("text").load(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, df2.count());
}

@Test
public void testGenericLoad() {
 Dataset<Row> df1 = spark.read().format("text").load(getResource("test-data/text-suite.txt"));
 Assert.assertEquals(4L, df1.count());
 Dataset<Row> df2 = spark.read().format("text").load(
  getResource("test-data/text-suite.txt"),
  getResource("test-data/text-suite2.txt"));
 Assert.assertEquals(5L, df2.count());
}

/**
 * Returns true if the UrlAndVersions of new value sets contains duplicates with the current
 * ValueSets.
 */
protected boolean hasDuplicateUrlAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

/**
 * Returns true if the UrlAndVersions if the membersToCheck has any duplicates with the members
 * of this value sets instance.
 */
protected boolean hasDuplicateUrlAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

/**
 * Returns true if the UrlAndVersions of new value sets contains duplicates with the current
 * ValueSets.
 */
protected boolean hasDuplicateUrlAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

private boolean hasDuplicateUriAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

/**
 * Returns true if the UrlAndVersions if the membersToCheck has any duplicates with the members
 * of this value sets instance.
 */
protected boolean hasDuplicateUrlAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

private boolean hasDuplicateUriAndVersions(Dataset<UrlAndVersion> membersToCheck) {
 return this.members.intersect(membersToCheck).count() > 0;
}

@Override
public void process(Exchange exchange) throws Exception {
  HiveContext hiveContext = resolveHiveContext();
  String sql = exchange.getIn().getBody(String.class);
  Dataset<Row> resultFrame = hiveContext.sql(sql);
  exchange.getIn().setBody(getEndpoint().isCollect() ? resultFrame.collectAsList() : resultFrame.count());
}

@Override
public Dataset<Row> check(Dataset<Row> dataset, Map<String, Dataset<Row>> stepDependencies) {
 if (isDependency()) {
  Dataset<Row> expectedDependency = stepDependencies.get(dependency);
  if (expectedDependency.count() == 1 && expectedDependency.schema().fields().length == 1
    && expectedDependency.schema().apply(0).dataType() == DataTypes.LongType) {
   expected = expectedDependency.collectAsList().get(0).getLong(0);
  } else {
   throw new RuntimeException("Step dependency for count rule must have one row with a single field of long type");
  }
 }
 if (expected < 0) {
  throw new RuntimeException("Failed to determine expected count: must be specified either as literal or step dependency");
 }
 return dataset.groupBy().count().map(new CheckCount(expected, name), RowEncoder.apply(SCHEMA));
}

 @Test
 public void normalizer() {
  // The tests are to check Java compatibility.
  JavaRDD<VectorIndexerSuite.FeatureData> points = jsc.parallelize(Arrays.asList(
   new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)),
   new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)),
   new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0))
  ));
  Dataset<Row> dataFrame = spark.createDataFrame(points, VectorIndexerSuite.FeatureData.class);
  Normalizer normalizer = new Normalizer()
   .setInputCol("features")
   .setOutputCol("normFeatures");

  // Normalize each Vector using $L^2$ norm.
  Dataset<Row> l2NormData = normalizer.transform(dataFrame, normalizer.p().w(2));
  l2NormData.count();

  // Normalize each Vector using $L^\infty$ norm.
  Dataset<Row> lInfNormData =
   normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY));
  lInfNormData.count();
 }
}

Popular methods of Dataset

Popular in Java

Reactive rest calls using spring rest template
scheduleAtFixedRate (ScheduledExecutorService)
setScale (BigDecimal)
scheduleAtFixedRate (Timer)
Socket (java.net)
Provides a client-side TCP socket.
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Top plugins for WebStorm

How to use countmethodin org.apache.spark.sql.Dataset

Best Java code snippets using org.apache.spark.sql.Dataset.count (Showing top 20 results out of 315)

How to use
count
method
in
org.apache.spark.sql.Dataset