org.apache.spark.api.java.JavaSparkContext.hadoopConfiguration java code examples

public static JavaRDD<String[]> hiveRecordInputRDD(boolean isSequenceFile, JavaSparkContext sc, String inputPath, String hiveTable) throws IOException {
  JavaRDD<String[]> recordRDD;
  if (isSequenceFile && HadoopUtil.isSequenceDir(sc.hadoopConfiguration(), new Path(inputPath))) {
    recordRDD = getSequenceFormatHiveInput(sc, inputPath);
  } else {
    recordRDD = getOtherFormatHiveInput(sc, hiveTable);
  }
  return recordRDD;
}

if (path != null) {
 if (fs == null) {
  fs = FileSystem.get(path.toUri(), sparkContext.hadoopConfiguration());

log.info("Writing model to {}", modelPath);
try {
 FileSystem fs = FileSystem.get(candidatePath.toUri(), sparkContext.hadoopConfiguration());
 fs.mkdirs(candidatePath);
 try (OutputStream out = fs.create(modelPath)) {

Configuration hadoopConf = sparkContext.hadoopConfiguration();
if (hadoopConf.getResource("core-site.xml") == null) {
 log.warn("Hadoop config like core-site.xml was not found; " +

Path candidatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));
FileSystem fs = FileSystem.get(modelDir.toUri(), sparkContext.hadoopConfiguration());
fs.mkdirs(candidatesPath);

try (JavaSparkContext sc = new JavaSparkContext(conf)) {
  sc.sc().addSparkListener(jobListener);
  final FileSystem fs = partitionFilePath.getFileSystem(sc.hadoopConfiguration());
  if (!fs.exists(partitionFilePath)) {
    throw new IllegalArgumentException("File not exist: " + partitionFilePath.toString());
  HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
  final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
  try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, partitionFilePath, sc.hadoopConfiguration())) {
    RowKeyWritable key = new RowKeyWritable();
    Writable value = NullWritable.get();
  final FileSystem hbaseClusterFs = hbaseConfFilePath.getFileSystem(sc.hadoopConfiguration());
  HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap);

Configuration hadoopConf = jsc.hadoopConfiguration();
hadoopConf.set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem");
if (cmdLineArgs.aws_access_key_id != null && !"".equals(cmdLineArgs.aws_access_key_id))

Configuration hadoopConf = streamingContext.sparkContext().hadoopConfiguration();
new Thread(LoggingCallable.log(() -> {
 try {

Configuration hadoopConf = sparkContext.hadoopConfiguration();

sc.sc().addSparkListener(jobListener);
HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(dictOutputPath));
final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
final KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

try (JavaSparkContext sc = new JavaSparkContext(conf)) {
  sc.sc().addSparkListener(jobListener);
  HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
  final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
  KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);
  HadoopUtil.writeToSequenceFile(sc.hadoopConfiguration(), counterPath, counterMap);

sparkConf.setAppName("vgg16");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
FileSystem fs = FileSystem.get(sc.hadoopConfiguration());

FileSystem fileSystem = FileSystem.get(sc.hadoopConfiguration());
try (BufferedOutputStream os = new BufferedOutputStream(fileSystem.create(new Path(networkPath)))) {
  ModelSerializer.writeModel(sparkNet.getNetwork(), os, true);

sc.sc().addSparkListener(jobListener);
HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
final KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

JavaSparkContext sc = new JavaSparkContext(conf);
sc.sc().addSparkListener(jobListener);
HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(outputPath));
final SerializableConfiguration sConf = new SerializableConfiguration(sc.hadoopConfiguration());
KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

FileSystem fs = FileSystem.get(sc.hadoopConfiguration());

public DFSSource(TypedProperties props, JavaSparkContext sparkContext, SchemaProvider schemaProvider) {
 super(props, sparkContext, schemaProvider);
 DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.ROOT_INPUT_PATH_PROP));
 this.fs = FSUtils.getFs(props.getString(Config.ROOT_INPUT_PATH_PROP), sparkContext.hadoopConfiguration());
}

private static int deduplicatePartitionPath(JavaSparkContext jsc, String duplicatedPartitionPath,
  String repairedOutputPath, String basePath) throws Exception {
 DedupeSparkJob job = new DedupeSparkJob(basePath, duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc),
   FSUtils.getFs(basePath, jsc.hadoopConfiguration()));
 job.fixDuplicates(true);
 return 0;
}

private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
  HoodieTable<T> table, String commitTime) {
 // Update the index back
 JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
 // Trigger the insert and collect statuses
 statuses = statuses.persist(config.getWriteStatusStorageLevel());
 commitOnAutoCommit(commitTime, statuses,
   new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true)
     .getCommitActionType());
 return statuses;
}

private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg)
  throws IOException {
 client.scheduleCompactionAtInstant(compactionInstantTime, Optional.empty());
 HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
 HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
 assertEquals("Last compaction instant must be the one set",
   instant.getTimestamp(), compactionInstantTime);
}

Popular methods of JavaSparkContext

Popular in Java

Updating database using SQL prepared statement
setContentView (Activity)
findViewById (Activity)
onCreateOptionsMenu (Activity)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
ImageIO (javax.imageio)
Top 12 Jupyter Notebook extensions

How to use hadoopConfigurationmethodin org.apache.spark.api.java.JavaSparkContext

Best Java code snippets using org.apache.spark.api.java.JavaSparkContext.hadoopConfiguration (Showing top 20 results out of 315)

How to use
hadoopConfiguration
method
in
org.apache.spark.api.java.JavaSparkContext