Learn how Tabnine’s Al coding assistant generates code and provides accurate, personalized code completions.

How to use
newAPIHadoopRDD
method
in
org.apache.spark.SparkContext

Best Java code snippets using org.apache.spark.SparkContext.newAPIHadoopRDD (Showing top 8 results out of 315)

public void registerTable(EntityMetadata m, SparkClient sparkClient)
{
  final Class clazz = m.getEntityClazz();
  SparkContext sc = sparkClient.sparkContext;
  Configuration config = new Configuration();
  config.set(
      "mongo.input.uri",
      buildMongoURIPath(sc.getConf().get("hostname"), sc.getConf().get("portname"), m.getSchema(),
          m.getTableName()));
  JavaRDD<Tuple2<Object, BSONObject>> mongoJavaRDD = sc.newAPIHadoopRDD(config, MongoInputFormat.class,
      Object.class, BSONObject.class).toJavaRDD();
  JavaRDD<Object> mongoRDD = mongoJavaRDD.flatMap(new FlatMapFunction<Tuple2<Object, BSONObject>, Object>()
  {
    @Override
    public Iterable<Object> call(Tuple2<Object, BSONObject> arg)
    {
      BSONObject obj = arg._2();
      Object javaObject = generateJavaObjectFromBSON(obj, clazz);
      return Arrays.asList(javaObject);
    }
  });
  sparkClient.sqlContext.createDataFrame(mongoRDD, m.getEntityClazz()).registerTempTable(m.getTableName());
}

GraphXInputFormat.setScanAuthorizations(job, authorizations);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXInputFormat.class, Object.class, RyaTypeWritable.class);

    String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
    InputFormatBase.setInputTableName(job, tableName);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);

private RDD<Element> doOperationUsingElementInputFormat(final GetRDDOfAllElements operation,
                            final Context context,
                            final AccumuloStore accumuloStore)
    throws OperationException {
  final Configuration conf = getConfiguration(operation);
  addIterators(accumuloStore, conf, context.getUser(), operation);
  final String useBatchScannerRDD = operation.getOption(USE_BATCH_SCANNER_RDD);
  if (Boolean.parseBoolean(useBatchScannerRDD)) {
    InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
  }
  final RDD<Tuple2<Element, NullWritable>> pairRDD = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext().newAPIHadoopRDD(conf,
      ElementInputFormat.class,
      Element.class,
      NullWritable.class);
  return pairRDD.map(new FirstElement(), ELEMENT_CLASS_TAG);
}

private RDD<Element> doOperation(final GetRDDOfElements operation,
                 final Context context,
                 final AccumuloStore accumuloStore)
    throws OperationException {
  final Configuration conf = getConfiguration(operation);
  final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
  sparkContext.hadoopConfiguration().addResource(conf);
  // Use batch scan option when performing seeded operation
  InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
  addIterators(accumuloStore, conf, context.getUser(), operation);
  addRanges(accumuloStore, conf, operation);
  final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
      ElementInputFormat.class,
      Element.class,
      NullWritable.class);
  return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}

  private RDD<Element> doOperation(final GetRDDOfElementsInRanges operation,
                   final Context context,
                   final AccumuloStore accumuloStore)
      throws OperationException {
    final Configuration conf = getConfiguration(operation);
    final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
    sparkContext.hadoopConfiguration().addResource(conf);
    // Use batch scan option when performing seeded operation
    InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
    addIterators(accumuloStore, conf, context.getUser(), operation);
    addRangesFromPairs(accumuloStore, conf, operation);
    final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
        ElementInputFormat.class,
        Element.class,
        NullWritable.class);
    return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
  }
}

sc.newAPIHadoopRDD(
  conf,
  GeoWaveInputFormat.class,

RDD<Tuple2<Text, Tuple>> hadoopRDD = sparkContext.newAPIHadoopRDD(
    jobConf, PigInputFormatSpark.class, Text.class, Tuple.class);

Popular methods of SparkContext

Popular in Java

Making http requests using okhttp
getApplicationContext (Context)
runOnUiThread (Activity)
getSystemService (Context)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Best IntelliJ plugins

How to use newAPIHadoopRDDmethodin org.apache.spark.SparkContext

Best Java code snippets using org.apache.spark.SparkContext.newAPIHadoopRDD (Showing top 8 results out of 315)

How to use
newAPIHadoopRDD
method
in
org.apache.spark.SparkContext