Refine search
@Test public void testCollect() { List<String> data = Arrays.asList("hello", "world"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); List<String> collected = ds.collectAsList(); Assert.assertEquals(Arrays.asList("hello", "world"), collected); }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@Test public void testCollect() { List<String> data = Arrays.asList("hello", "world"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); List<String> collected = ds.collectAsList(); Assert.assertEquals(Arrays.asList("hello", "world"), collected); }
@SuppressWarnings("unchecked") @Test(expected = AnalysisException.class) public void udf5Test() { spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType); List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList(); }
@Test public void testCollect() { List<String> data = Arrays.asList("hello", "world"); Dataset<String> ds = spark.createDataset(data, Encoders.STRING()); List<String> collected = ds.collectAsList(); Assert.assertEquals(Arrays.asList("hello", "world"), collected); }
@Test public void testUDF() { UserDefinedFunction foo = udf((Integer i, String s) -> i.toString() + s, DataTypes.StringType); Dataset<Row> df = spark.table("testData").select(foo.apply(col("key"), col("value"))); String[] result = df.collectAsList().stream().map(row -> row.getString(0)) .toArray(String[]::new); String[] expected = spark.table("testData").collectAsList().stream() .map(row -> row.get(0).toString() + row.getString(1)).toArray(String[]::new); Assert.assertArrayEquals(expected, result); } }
@Test public void testSerializeNull() { NestedSmallBean bean = new NestedSmallBean(); Encoder<NestedSmallBean> encoder = Encoders.bean(NestedSmallBean.class); List<NestedSmallBean> beans = Arrays.asList(bean); Dataset<NestedSmallBean> ds1 = spark.createDataset(beans, encoder); Assert.assertEquals(beans, ds1.collectAsList()); Dataset<NestedSmallBean> ds2 = ds1.map((MapFunction<NestedSmallBean, NestedSmallBean>) b -> b, encoder); Assert.assertEquals(beans, ds2.collectAsList()); } }
@Test public void testUDF() { UserDefinedFunction foo = udf((Integer i, String s) -> i.toString() + s, DataTypes.StringType); Dataset<Row> df = spark.table("testData").select(foo.apply(col("key"), col("value"))); String[] result = df.collectAsList().stream().map(row -> row.getString(0)) .toArray(String[]::new); String[] expected = spark.table("testData").collectAsList().stream() .map(row -> row.get(0).toString() + row.getString(1)).toArray(String[]::new); Assert.assertArrayEquals(expected, result); } }
@Test public void testSerializeNull() { NestedSmallBean bean = new NestedSmallBean(); Encoder<NestedSmallBean> encoder = Encoders.bean(NestedSmallBean.class); List<NestedSmallBean> beans = Arrays.asList(bean); Dataset<NestedSmallBean> ds1 = spark.createDataset(beans, encoder); Assert.assertEquals(beans, ds1.collectAsList()); Dataset<NestedSmallBean> ds2 = ds1.map((MapFunction<NestedSmallBean, NestedSmallBean>) b -> b, encoder); Assert.assertEquals(beans, ds2.collectAsList()); }
@Test public void test() { /* SPARK-15285 Large numbers of Nested JavaBeans generates more than 64KB java bytecode */ List<NestedComplicatedJavaBean> data = new ArrayList<>(); data.add(NestedComplicatedJavaBean.newBuilder().build()); NestedComplicatedJavaBean obj3 = new NestedComplicatedJavaBean(); Dataset<NestedComplicatedJavaBean> ds = spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class)); ds.collectAsList(); }
@Test public void testSerializeNull() { NestedSmallBean bean = new NestedSmallBean(); Encoder<NestedSmallBean> encoder = Encoders.bean(NestedSmallBean.class); List<NestedSmallBean> beans = Arrays.asList(bean); Dataset<NestedSmallBean> ds1 = spark.createDataset(beans, encoder); Assert.assertEquals(beans, ds1.collectAsList()); Dataset<NestedSmallBean> ds2 = ds1.map((MapFunction<NestedSmallBean, NestedSmallBean>) b -> b, encoder); Assert.assertEquals(beans, ds2.collectAsList()); }
@Test public void test() { /* SPARK-15285 Large numbers of Nested JavaBeans generates more than 64KB java bytecode */ List<NestedComplicatedJavaBean> data = new ArrayList<>(); data.add(NestedComplicatedJavaBean.newBuilder().build()); NestedComplicatedJavaBean obj3 = new NestedComplicatedJavaBean(); Dataset<NestedComplicatedJavaBean> ds = spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class)); ds.collectAsList(); }
@Test public void testTypedAggregationCount() { KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset(); Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(v -> v)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), agged.collectAsList()); }
@Test public void test() { /* SPARK-15285 Large numbers of Nested JavaBeans generates more than 64KB java bytecode */ List<NestedComplicatedJavaBean> data = new ArrayList<>(); data.add(NestedComplicatedJavaBean.newBuilder().build()); NestedComplicatedJavaBean obj3 = new NestedComplicatedJavaBean(); Dataset<NestedComplicatedJavaBean> ds = spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class)); ds.collectAsList(); }
@Test public void testTypedAggregationCount() { KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset(); Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(value -> value)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), agged.collectAsList()); }
@Test public void testFrequentItems() { Dataset<Row> df = spark.table("testData2"); String[] cols = {"a"}; Dataset<Row> results = df.stat().freqItems(cols, 0.2); Assert.assertTrue(results.collectAsList().get(0).getSeq(0).contains(1)); }
@Test public void testTypedAggregationCount() { KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset(); Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(value -> value)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), agged.collectAsList()); }
@Test public void testFrequentItems() { Dataset<Row> df = spark.table("testData2"); String[] cols = {"a"}; Dataset<Row> results = df.stat().freqItems(cols, 0.2); Assert.assertTrue(results.collectAsList().get(0).getSeq(0).contains(1)); }
@Test public void testTypedAggregationCount() { KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset(); Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(v -> v)); Assert.assertEquals( Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)), agged.collectAsList()); }
@Test public void saveAndLoad() { Map<String, String> options = new HashMap<>(); options.put("path", path.toString()); df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); Dataset<Row> loadedDF = spark.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); }