public static synchronized SparkSession getSparkSession() { if (INSTANCE.ss == null) { initializeBatchJob(); } return INSTANCE.ss; }
public static synchronized JavaStreamingContext getJavaStreamingContext() { if (INSTANCE.jsc == null) { initializeStreamingJob(); } return INSTANCE.jsc; }
@Override public Row zero() { return new RowWithSchema(SCHEMA, name, true); }
@Override public Dataset<Row> check(Dataset<Row> dataset, Map<String, Dataset<Row>> stepDependencies) { boolean schemasMatch = schemasMatch(requiredSchema, dataset.schema(), exactMatch); List<Row> datasetRows = Lists.newArrayList((Row)new RowWithSchema(SCHEMA, name, schemasMatch)); return Contexts.getSparkSession().createDataFrame(datasetRows, SCHEMA); }
@Test public void testNewAccumulatorRequest() { AccumulatorRequest requestInt = new AccumulatorRequest("hello", Long.class); assertEquals(requestInt.getName(), "hello"); assertEquals(requestInt.getClazz(), Long.class); AccumulatorRequest requestDouble = new AccumulatorRequest("world", Double.class); assertEquals(requestDouble.getName(), "world"); assertEquals(requestDouble.getClazz(), Double.class); }
@Override public Dataset<Row> derive(Map<String, Dataset<Row>> dependencies) throws Exception { String query = config.getString("query.literal"); Dataset<Row> derived = Contexts.getSparkSession().sql(query); return derived; } }
public static synchronized void closeSparkSession() { closeSparkSession(false); }
@Test (expected = IllegalArgumentException.class) public void testUnsupportedClass() { new AccumulatorRequest("hello", Float.class); }
@Override public boolean equals(Object other) { if (other == null) return false; if (!(other instanceof AccumulatorRequest)) return false; // Accumulator requests are unique only by their name. If multiple objects request accumulators // with the same name but different classes then it is not defined which one Envelope will request from Spark. if (!((AccumulatorRequest)other).getName().equals(this.getName())) return false; return true; }
public static synchronized void closeJavaStreamingContext() { closeJavaStreamingContext(false); }
@Override public <T> T getAs(String arg0) { return internalRow.getAs(fieldIndex(arg0)); }
@Override public Row call(Row row) throws Exception { return new RowWithSchema(SCHEMA, name, row.<Long>getAs("count") == thisExpected); }
public static synchronized void closeJavaStreamingContext(boolean cleanupHiveMetastore) { if (INSTANCE.jsc != null) { INSTANCE.jsc.close(); INSTANCE.jsc = null; closeSparkSession(cleanupHiveMetastore); } }
@Override public Row call(Row row) throws Exception { return new RowWithSchema(SCHEMA, name, theRule.check(row)); }
@Before public void setup() { Contexts.closeSparkSession(true); }
@Override public Row merge(Row a, Row b) { return new RowWithSchema(SCHEMA, name, a.<Boolean>getAs("result") && b.<Boolean>getAs("result")); }
@Before public void before() { Contexts.closeSparkSession(true); }
@Override public Row reduce(Row a, Row b) { return new RowWithSchema(SCHEMA, name, a.<Boolean>getAs("result") && b.<Boolean>getAs("result")); }
@AfterClass public static void after() throws IOException { List<Path> files = SecurityUtils.getExistingTokenStoreFiles(securityConfig, hadoopConf, true); SecurityUtils.deleteTokenStoreFiles(files, 0, hadoopConf); Contexts.closeSparkSession(true); }
public static Row set(Row row, String fieldName, Object replacement) { Object[] values = new Object[row.length()]; for (int i = 0; i < row.schema().fields().length; i++) { if (i == row.fieldIndex(fieldName)) { values[i] = replacement; } else { values[i] = row.get(i); } } return new RowWithSchema(row.schema(), values); }