@Test public void testJavaBeanEncoder2() { // This is a regression test of SPARK-12404 OuterScopes.addOuterScope(this); SimpleJavaBean2 obj = new SimpleJavaBean2(); obj.setA(new Timestamp(0)); obj.setB(new Date(0)); obj.setC(java.math.BigDecimal.valueOf(1)); Dataset<SimpleJavaBean2> ds = spark.createDataset(Arrays.asList(obj), Encoders.bean(SimpleJavaBean2.class)); ds.collect(); }
Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class)); ds.collect();
Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class)); ds.collect();
@Test public void testJavaBeanEncoder2() { // This is a regression test of SPARK-12404 OuterScopes.addOuterScope(this); SimpleJavaBean2 obj = new SimpleJavaBean2(); obj.setA(new Timestamp(0)); obj.setB(new Date(0)); obj.setC(java.math.BigDecimal.valueOf(1)); Dataset<SimpleJavaBean2> ds = spark.createDataset(Arrays.asList(obj), Encoders.bean(SimpleJavaBean2.class)); ds.collect(); }
Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class)); ds.collect();
@Test public void testJavaBeanEncoder2() { // This is a regression test of SPARK-12404 OuterScopes.addOuterScope(this); SimpleJavaBean2 obj = new SimpleJavaBean2(); obj.setA(new Timestamp(0)); obj.setB(new Date(0)); obj.setC(java.math.BigDecimal.valueOf(1)); Dataset<SimpleJavaBean2> ds = spark.createDataset(Arrays.asList(obj), Encoders.bean(SimpleJavaBean2.class)); ds.collect(); }
@Test public void isInCollectionWorksCorrectlyOnJava() { List<Row> rows = Arrays.asList( RowFactory.create(1, "x"), RowFactory.create(2, "y"), RowFactory.create(3, "z")); StructType schema = createStructType(Arrays.asList( createStructField("a", IntegerType, false), createStructField("b", StringType, false))); Dataset<Row> df = spark.createDataFrame(rows, schema); // Test with different types of collections Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(Arrays.asList(1, 2))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 1 || r.getInt(0) == 2).collect() )); Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(new HashSet<>(Arrays.asList(1, 2)))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 1 || r.getInt(0) == 2).collect() )); Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(new ArrayList<>(Arrays.asList(3, 1)))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 3 || r.getInt(0) == 1).collect() )); }
@Test public void isInCollectionWorksCorrectlyOnJava() { List<Row> rows = Arrays.asList( RowFactory.create(1, "x"), RowFactory.create(2, "y"), RowFactory.create(3, "z")); StructType schema = createStructType(Arrays.asList( createStructField("a", IntegerType, false), createStructField("b", StringType, false))); Dataset<Row> df = spark.createDataFrame(rows, schema); // Test with different types of collections Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(Arrays.asList(1, 2))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 1 || r.getInt(0) == 2).collect() )); Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(new HashSet<>(Arrays.asList(1, 2)))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 1 || r.getInt(0) == 2).collect() )); Assert.assertTrue(Arrays.equals( (Row[]) df.filter(df.col("a").isInCollection(new ArrayList<>(Arrays.asList(3, 1)))).collect(), (Row[]) df.filter((FilterFunction<Row>) r -> r.getInt(0) == 3 || r.getInt(0) == 1).collect() )); }
public static DataSetResult getDataSetResult(Dataset<Row> df) { DataSetResult result = new DataSetResult(); String[] fieldNames = df.schema().fieldNames(); result.getColumnNames().addAll(Arrays.asList(fieldNames)); Row[] rows = (Row[]) df.collect(); for (Row row : rows) { List<Object> values = new ArrayList<>(); for (int i = 0; i < fieldNames.length; i++) { Object obj = row.get(i); values.add(obj); } result.getRows().add(values); } return result; }
@Test public void javaCompatibilityTest() { StopWordsRemover remover = new StopWordsRemover() .setInputCol("raw") .setOutputCol("filtered"); List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).collect(); } }
@Test public void javaCompatibilityTest() { StopWordsRemover remover = new StopWordsRemover() .setInputCol("raw") .setOutputCol("filtered"); List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).collect(); } }
@Test public void javaCompatibilityTest() { StopWordsRemover remover = new StopWordsRemover() .setInputCol("raw") .setOutputCol("filtered"); List<Row> data = Arrays.asList( RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) ); StructType schema = new StructType(new StructField[]{ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) }); Dataset<Row> dataset = spark.createDataFrame(data, schema); remover.transform(dataset).collect(); } }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }
@Test public void linearRegressionDefaultParams() { LinearRegression lr = new LinearRegression(); assertEquals("label", lr.getLabelCol()); assertEquals("auto", lr.getSolver()); LinearRegressionModel model = lr.fit(dataset); model.transform(dataset).createOrReplaceTempView("prediction"); Dataset<Row> predictions = spark.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults assertEquals("features", model.getFeaturesCol()); assertEquals("prediction", model.getPredictionCol()); }
Row[] rows = (Row[]) dataSet.collect(); int record1Count = 0; int record2Count = 0;