How to use
as
method
in
org.apache.spark.sql.Column

Best Java code snippets using org.apache.spark.sql.Column.as (Showing top 8 results out of 315)

@Test
public void testSampleBy() {
 Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key"));
 Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L);
 List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList();
 Assert.assertEquals(0, actual.get(0).getLong(0));
 Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8);
 Assert.assertEquals(1, actual.get(1).getLong(0));
 Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13);
}

@Test
public void testSampleBy() {
 Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key"));
 Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L);
 List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList();
 Assert.assertEquals(0, actual.get(0).getLong(0));
 Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8);
 Assert.assertEquals(1, actual.get(1).getLong(0));
 Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13);
}

@Test
public void testSampleBy() {
 Dataset<Row> df = spark.range(0, 100, 1, 2).select(col("id").mod(3).as("key"));
 Dataset<Row> sampled = df.stat().sampleBy("key", ImmutableMap.of(0, 0.1, 1, 0.2), 0L);
 List<Row> actual = sampled.groupBy("key").count().orderBy("key").collectAsList();
 Assert.assertEquals(0, actual.get(0).getLong(0));
 Assert.assertTrue(0 <= actual.get(0).getLong(1) && actual.get(0).getLong(1) <= 8);
 Assert.assertEquals(1, actual.get(1).getLong(0));
 Assert.assertTrue(2 <= actual.get(1).getLong(1) && actual.get(1).getLong(1) <= 13);
}

/**
 * Use pattern matching to fill a column with the corresponding value (if it
 * exists), otherwise null.
 *
 * @param cols    the columns to fill in
 * @param allCols a set containing all columns of interest
 * @return a list containing the filled columns
 */
private static Column[] expr(final Set<String> cols, final Set<String> allCols) {
  return allCols.stream()
      .map(x -> {
        if (cols.contains(x)) {
          return col(x);
        } else {
          return lit(null).as(x);
        }
      }).toArray(Column[]::new);
}

  /**
   * Create an empty {@link Dataset} of {@link Row}s for use as edges in a {@link org.graphframes.GraphFrame}.
   *
   * @param sparkSession the spark session
   * @return an empty {@link Dataset} of {@link Row}s with a src and dst column.
   */
  public static Dataset<Row> emptyEdges(final SparkSession sparkSession) {
    return sparkSession.emptyDataFrame().select(lit(null).as("src"), lit(null).as("dst"));
  }
}

linesDS
  .where(col("points").getItem(2).getField("y").gt(7.0))
  .select(col("name"), size(col("points")).as("count")).show();
      .where(size(col("points")).gt(1))
      .select(col("name"),
          size(col("points")).as("count"),
          col("points").getItem("p1")).show();

 @Test
 public void testUDAF() {
  Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value"));
  UserDefinedAggregateFunction udaf = new MyDoubleSum();
  UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf);
  // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if
  // we want to use distinct aggregation.
  Dataset<Row> aggregatedDF =
   df.groupBy()
    .agg(
     udaf.distinct(col("value")),
     udaf.apply(col("value")),
     registeredUDAF.apply(col("value")),
     callUDF("mydoublesum", col("value")));

  List<Row> expectedResult = new ArrayList<>();
  expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0));
  checkAnswer(
   aggregatedDF,
   expectedResult);
 }
}

 @Test
 public void testUDAF() {
  Dataset<Row> df = hc.range(0, 100).union(hc.range(0, 100)).select(col("id").as("value"));
  UserDefinedAggregateFunction udaf = new MyDoubleSum();
  UserDefinedAggregateFunction registeredUDAF = hc.udf().register("mydoublesum", udaf);
  // Create Columns for the UDAF. For now, callUDF does not take an argument to specific if
  // we want to use distinct aggregation.
  Dataset<Row> aggregatedDF =
   df.groupBy()
    .agg(
     udaf.distinct(col("value")),
     udaf.apply(col("value")),
     registeredUDAF.apply(col("value")),
     callUDF("mydoublesum", col("value")));

  List<Row> expectedResult = new ArrayList<>();
  expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0));
  checkAnswer(
   aggregatedDF,
   expectedResult);
 }
}

Popular methods of Column

Popular in Java

Making http post requests using okhttp
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
scheduleAtFixedRate (Timer)
setScale (BigDecimal)
ObjectMapper (com.fasterxml.jackson.databind)
ObjectMapper provides functionality for reading and writing JSON, either to and from basic POJOs (Pl
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
JFrame (javax.swing)
Best plugins for Eclipse

How to use asmethodin org.apache.spark.sql.Column

Best Java code snippets using org.apache.spark.sql.Column.as (Showing top 8 results out of 315)

How to use
as
method
in
org.apache.spark.sql.Column