com.cloudera.labs.envelope.spark java code examples

public static synchronized SparkSession getSparkSession() {
 if (INSTANCE.ss == null) {
  initializeBatchJob();
 }
 return INSTANCE.ss;
}

public static synchronized JavaStreamingContext getJavaStreamingContext() {
 if (INSTANCE.jsc == null) {
  initializeStreamingJob();
 }
 return INSTANCE.jsc;
}

@Override
public Row zero() {
 return new RowWithSchema(SCHEMA, name, true);
}

@Override
public Dataset<Row> check(Dataset<Row> dataset, Map<String, Dataset<Row>> stepDependencies) {
 boolean schemasMatch = schemasMatch(requiredSchema, dataset.schema(), exactMatch);
 List<Row> datasetRows = Lists.newArrayList((Row)new RowWithSchema(SCHEMA, name, schemasMatch));
 return Contexts.getSparkSession().createDataFrame(datasetRows, SCHEMA);
}

@Test
public void testNewAccumulatorRequest() {
 AccumulatorRequest requestInt = new AccumulatorRequest("hello", Long.class);
 assertEquals(requestInt.getName(), "hello");
 assertEquals(requestInt.getClazz(), Long.class);
 
 AccumulatorRequest requestDouble = new AccumulatorRequest("world", Double.class);
 assertEquals(requestDouble.getName(), "world");
 assertEquals(requestDouble.getClazz(), Double.class);
}

 @Override
 public Dataset<Row> derive(Map<String, Dataset<Row>> dependencies) throws Exception {
  String query = config.getString("query.literal");
  Dataset<Row> derived = Contexts.getSparkSession().sql(query);
  return derived;
 }
}

public static synchronized void closeSparkSession() {
 closeSparkSession(false);
}

@Test
(expected = IllegalArgumentException.class)
public void testUnsupportedClass() {
 new AccumulatorRequest("hello", Float.class);
}

@Override
public boolean equals(Object other) {
 if (other == null) return false;
 
 if (!(other instanceof AccumulatorRequest)) return false;
 
 // Accumulator requests are unique only by their name. If multiple objects request accumulators
 // with the same name but different classes then it is not defined which one Envelope will request from Spark.
 if (!((AccumulatorRequest)other).getName().equals(this.getName())) return false;
 
 return true;
}

public static synchronized void closeJavaStreamingContext() {
 closeJavaStreamingContext(false);
}

@Override
public <T> T getAs(String arg0) {
 return internalRow.getAs(fieldIndex(arg0));
}

@Override
public Row call(Row row) throws Exception {
 return new RowWithSchema(SCHEMA, name, row.<Long>getAs("count") == thisExpected);
}

public static synchronized void closeJavaStreamingContext(boolean cleanupHiveMetastore) {
 if (INSTANCE.jsc != null) {
  INSTANCE.jsc.close();
  INSTANCE.jsc = null;
  closeSparkSession(cleanupHiveMetastore);
 }
}

@Override
public Row call(Row row) throws Exception {
 return new RowWithSchema(SCHEMA, name, theRule.check(row));
}

@Before
public void setup() {
 Contexts.closeSparkSession(true);
}

@Override
public Row merge(Row a, Row b) {
 return new RowWithSchema(SCHEMA, name,
   a.<Boolean>getAs("result") && b.<Boolean>getAs("result"));
}

@Before
public void before() {
 Contexts.closeSparkSession(true);
}

@Override
public Row reduce(Row a, Row b) {
 return new RowWithSchema(SCHEMA, name,
   a.<Boolean>getAs("result") && b.<Boolean>getAs("result"));
}

@AfterClass
public static void after() throws IOException {
 List<Path> files = SecurityUtils.getExistingTokenStoreFiles(securityConfig, hadoopConf, true);
 SecurityUtils.deleteTokenStoreFiles(files, 0, hadoopConf);
 Contexts.closeSparkSession(true);
}

public static Row set(Row row, String fieldName, Object replacement) {
 Object[] values = new Object[row.length()];
 for (int i = 0; i < row.schema().fields().length; i++) {
  if (i == row.fieldIndex(fieldName)) {
   values[i] = replacement;
  } else {
   values[i] = row.get(i);
  }
 }
 return new RowWithSchema(row.schema(), values);
}

How to use com.cloudera.labs.envelope.spark

Best Java code snippets using com.cloudera.labs.envelope.spark (Showing top 20 results out of 315)