Tabnine Logo
Mapper$Context.getInputSplit
Code IndexAdd Tabnine to your IDE (free)

How to use
getInputSplit
method
in
org.apache.hadoop.mapreduce.Mapper$Context

Best Java code snippets using org.apache.hadoop.mapreduce.Mapper$Context.getInputSplit (Showing top 20 results out of 558)

origin: apache/kylin

@Override
protected void doSetup(Context context) throws IOException {
  tmpBuf = ByteBuffer.allocate(4096);
  Configuration conf = context.getConfiguration();
  bindCurrentConfiguration(conf);
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME));
  List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns();
  FileSplit fileSplit = (FileSplit) context.getInputSplit();
  String colName = fileSplit.getPath().getParent().getName();
  for (int i = 0; i < uhcColumns.size(); i++) {
    if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) {
      index = i;
      break;
    }
  }
  type = uhcColumns.get(index).getType();
  //for debug
  logger.info("column name: " + colName);
  logger.info("index: " + index);
  logger.info("type: " + type);
}
origin: apache/hbase

@Override
protected void setup(Context context) throws IOException {
 Configuration conf = context.getConfiguration();
 sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
 sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
 targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
   TableOutputFormat.OUTPUT_CONF_PREFIX);
 sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
 targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
 dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
 doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true);
 doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true);
 sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
 LOG.info("Read source hash manifest: " + sourceTableHash);
 LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
 TableSplit split = (TableSplit) context.getInputSplit();
 ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
 sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
 findNextKeyHashPair();
 // create a hasher, but don't start it right away
 // instead, find the first hash batch at or after the start row
 // and skip any rows that come before.  they will be caught by the previous task
 targetHasher = new HashTable.ResultHasher();
}
origin: apache/kylin

@Override
protected void doSetup(Context context) throws IOException {
  Configuration conf = context.getConfiguration();
  bindCurrentConfiguration(conf);
  config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
  cube = CubeManager.getInstance(config).getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID));
  flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
  intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc);
  globalColumns = cubeDesc.getAllGlobalDictColumns();
  globalColumnIndex = new int[globalColumns.size()];
  globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size());
  for (int i = 0; i < globalColumns.size(); i++) {
    TblColRef colRef = globalColumns.get(i);
    int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef);
    globalColumnIndex[i] = columnIndexOnFlatTbl;
    globalColumnValues.add(Sets.<String> newHashSet());
  }
  splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit());
}
origin: org.apache.hadoop/hadoop-mapred

@SuppressWarnings("unchecked")
protected void setup(Context context)
  throws IOException, InterruptedException {
 // Find the Mapper from the TaggedInputSplit.
 TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
 mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
   .getMapperClass(), context.getConfiguration());
 
}
origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core

@SuppressWarnings("unchecked")
protected void setup(Context context)
  throws IOException, InterruptedException {
 // Find the Mapper from the TaggedInputSplit.
 TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
 mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
   .getMapperClass(), context.getConfiguration());
 
}
origin: ch.cern.hadoop/hadoop-mapreduce-client-core

@SuppressWarnings("unchecked")
protected void setup(Context context)
  throws IOException, InterruptedException {
 // Find the Mapper from the TaggedInputSplit.
 TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
 mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
   .getMapperClass(), context.getConfiguration());
 
}
origin: io.prestosql.hadoop/hadoop-apache

@SuppressWarnings("unchecked")
protected void setup(Context context)
  throws IOException, InterruptedException {
 // Find the Mapper from the TaggedInputSplit.
 TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
 mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
   .getMapperClass(), context.getConfiguration());
 
}
origin: alexholmes/hadoop-book

@Override
protected void setup(Context context) {
 String filename =
   ((FileSplit) context.getInputSplit()).getPath().getName();
 documentId = new Text(filename);
}
origin: bsspirit/maven_hadoop_template

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  FileSplit split = (FileSplit) context.getInputSplit();
  flag = split.getPath().getName();// 判断读的数据集
}
origin: io.hops/hadoop-mapreduce-client-core

@SuppressWarnings("unchecked")
protected void setup(Context context)
  throws IOException, InterruptedException {
 // Find the Mapper from the TaggedInputSplit.
 TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit();
 mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
   .getMapperClass(), context.getConfiguration());
 
}
origin: cdapio/cdap

@Override
protected void setup(Context context) throws IOException, InterruptedException {
 // assert that the user gets FileInputSplit (as opposed to the MultiInputTaggedSplit) from the context
 Preconditions.checkArgument(context.getInputSplit() instanceof FileSplit);
 try {
  // assert that the user gets the TextInputFormat, as opposed to the MultiInputFormat from the context
  Preconditions.checkArgument(context.getInputFormatClass() == TextInputFormat.class);
 } catch (ClassNotFoundException e) {
  Throwables.propagate(e);
 }
}
origin: KylinOLAP/Kylin

@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.publishConfiguration(context.getConfiguration());
  cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase();
  segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase();
  config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration());
  cubeManager = CubeManager.getInstance(config);
  cube = cubeManager.getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  mergedCubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
  // int colCount = cubeDesc.getRowkey().getRowKeyColumns().length;
  newKeyBuf = new byte[256];// size will auto-grow
  // decide which source segment
  InputSplit inputSplit = context.getInputSplit();
  String filePath = ((FileSplit) inputSplit).getPath().toString();
  System.out.println("filePath:" + filePath);
  String jobID = extractJobIDFromPath(filePath);
  System.out.println("jobID:" + jobID);
  sourceCubeSegment = findSegmentWithUuid(jobID, cube);
  System.out.println(sourceCubeSegment);
  this.rowKeySplitter = new RowKeySplitter(sourceCubeSegment, 65, 255);
}
origin: apache/hbase

sourceTable = sourceConnection.getTable(tableName);
final InputSplit tableSplit = context.getInputSplit();
origin: apache/kylin

flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), conf);
origin: apache/incubator-druid

@Override
protected void map(String key, String value, final Context context) throws IOException, InterruptedException
 final InputSplit split = context.getInputSplit();
 if (!(split instanceof DatasourceInputSplit)) {
  throw new IAE(
origin: apache/hbase

private void finishRemainingHashRanges(Context context) throws IOException,
  InterruptedException {
 TableSplit split = (TableSplit) context.getInputSplit();
 byte[] splitEndRow = split.getEndRow();
 boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
origin: apache/incubator-gobblin

@Override
protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context)
  throws IOException, InterruptedException {
 if (context.getNumReduceTasks() == 0) {
  context.write(key, NullWritable.get());
 } else {
  populateComparableKeyRecord(key.datum(), this.outKey.datum());
  this.outValue.datum(key.datum());
  try {
   context.write(this.outKey, this.outValue);
  } catch (AvroRuntimeException e) {
   final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths();
   throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e);
  }
 }
 context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1);
}
origin: apache/kylin

@Override
protected void doSetup(Context context) throws IOException, InterruptedException {
  super.bindCurrentConfiguration(context.getConfiguration());
  String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
  String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata();
  CubeManager cubeManager = CubeManager.getInstance(config);
  CubeInstance cube = cubeManager.getCube(cubeName);
  CubeDesc cubeDesc = cube.getDescriptor();
  CubeSegment mergedCubeSegment = cube.getSegmentById(segmentID);
  // decide which source segment
  FileSplit fileSplit = (FileSplit) context.getInputSplit();
  IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(mergedCubeSegment)
      .getOuputFormat();
  CubeSegment sourceCubeSegment = outputFormat.findSourceSegment(fileSplit, cube);
  reEncoder = new SegmentReEncoder(cubeDesc, sourceCubeSegment, mergedCubeSegment, config);
}
origin: apache/kylin

@Override
protected void doSetup(Context context) throws IOException {
  super.bindCurrentConfiguration(context.getConfiguration());
  cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME);
  segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID);
  final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinPropsAndMetadata();
  cube = CubeManager.getInstance(kylinConfig).getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  cubeSegment = cube.getSegmentById(segmentID);
  CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
  Map<TblColRef, Dictionary<String>> dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment,
      context.getInputSplit(), context.getConfiguration());
  baseCuboidBuilder = new BaseCuboidBuilder(kylinConfig, cubeDesc, cubeSegment, intermediateTableDesc,
      dictionaryMap);
}
origin: apache/hbase

@Override
protected void setup(Context context) throws IOException, InterruptedException {
 targetBatchSize = context.getConfiguration()
   .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE);
 hasher = new ResultHasher();
 TableSplit split = (TableSplit) context.getInputSplit();
 hasher.startBatch(new ImmutableBytesWritable(split.getStartRow()));
}
org.apache.hadoop.mapreduceMapper$ContextgetInputSplit

Popular methods of Mapper$Context

  • write
  • getConfiguration
  • getCounter
  • progress
  • setStatus
  • getTaskAttemptID
  • nextKeyValue
  • getCurrentValue
  • getCurrentKey
  • getNumReduceTasks
  • getJobID
  • getInputFormatClass
  • getJobID,
  • getInputFormatClass,
  • getLocalCacheFiles,
  • getOutputCommitter,
  • getCredentials,
  • getLocalCacheArchives,
  • getStatus,
  • getCacheArchives,
  • getCacheFiles

Popular in Java

  • Reading from database using SQL prepared statement
  • orElseThrow (Optional)
    Return the contained value, if present, otherwise throw an exception to be created by the provided s
  • getResourceAsStream (ClassLoader)
  • getSystemService (Context)
  • MalformedURLException (java.net)
    This exception is thrown when a program attempts to create an URL from an incorrect specification.
  • Proxy (java.net)
    This class represents proxy server settings. A created instance of Proxy stores a type and an addres
  • Queue (java.util)
    A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
  • Vector (java.util)
    Vector is an implementation of List, backed by an array and synchronized. All optional operations in
  • Executor (java.util.concurrent)
    An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
  • Options (org.apache.commons.cli)
    Main entry-point into the library. Options represents a collection of Option objects, which describ
  • Top PhpStorm plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now