final InputSplit split = context.getInputSplit(); if (!(split instanceof DatasourceInputSplit)) { throw new IAE( final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY); final File tmpDir = Paths.get(tmpDirLoc).toFile(); final HadoopDruidConverterConfig config = converterConfigFromConfiguration(context.getConfiguration()); context.setStatus("DOWNLOADING"); context.progress(); final Path inPath = new Path(JobHelper.getURIFromSegment(segment)); final File inDir = new File(tmpDir, "in"); final long inSize = JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context, null); log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath()); context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize); context.setStatus("CONVERTING"); context.progress(); final File outDir = new File(tmpDir, "out"); FileUtils.forceMkdir(outDir); context.setStatus("Validating"); HadoopDruidConverterConfig.INDEX_IO.validateTwoSegments(inDir, outDir); context.progress(); context.setStatus("Starting PUSH"); final Path baseOutputPath = new Path(config.getSegmentOutputPath());
public void map(K key, Text value, Context context) throws IOException, InterruptedException { String text = value.toString(); Matcher matcher = pattern.matcher(text); while (matcher.find()) { context.write(new Text(matcher.group(group)), new LongWritable(1)); } } }
@Override protected void map(LongWritable key, Text line, Context context) throws IOException, InterruptedException { String[] fields = splitter.split(line.toString()); if (fields.length < 4) { context.getCounter("Map", "LinesWithErrors").increment(1); return; } String artist = fields[1]; context.write(new Text(artist), new IntWritable(0)); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true); filesGroup = conf.get(CONF_FILES_GROUP); filesUser = conf.get(CONF_FILES_USER); filesMode = (short)conf.getInt(CONF_FILES_MODE, 0); outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT)); inputRoot = new Path(conf.get(CONF_INPUT_ROOT)); inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY); context.getCounter(c).increment(0); if (context.getConfiguration().getBoolean(Testing.CONF_TEST_FAILURE, false)) { testing.failuresCountToInject = conf.getInt(Testing.CONF_TEST_FAILURE_COUNT, 0); testing.injectedFailureCount = context.getTaskAttemptID().getId();
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] lineSplits = line.split(" "); Configuration conf = context.getConfiguration(); final FileSystem fs = FileSystem.get(conf); final Path hdfsInputFilePath = new Path(_inputFilePath); new Path(localInputDataDir.getAbsolutePath() + "/" + hdfsInputFilePath.getName()); LOGGER.info("Copy from " + hdfsInputFilePath + " to " + localInputFilePath); fs.copyToLocalFile(hdfsInputFilePath, localInputFilePath); String schemaString = context.getConfiguration().get("data.schema"); try { schema = Schema.fromString(schemaString); context.write(new LongWritable(Long.parseLong(lineSplits[2])), new Text( FileSystem.get(_properties).listStatus(new Path(_localHdfsSegmentTarPath + "/"))[0].getPath().getName())); LOGGER.info("Finished the job successfully");
throws IOException, InterruptedException { int index = key.get(); String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath(); context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath)); new SerializableConfiguration(context.getConfiguration()), context.getConfiguration().get(BatchConstants.ARG_META_URL)); final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME); final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID); final String statOutputPath = context.getConfiguration() .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt()); CubeInstance cubeInstance = CubeManager.getInstance(kylinConfig).getCube(cubeName); conf = HadoopUtil.getCurrentConfiguration(); reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf); LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf); if (keyW.get() == 0L) { context.write(new IntWritable(-1), new Text(""));
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close(); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false); doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true); doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
/** * Test SampleUploader from examples */ @SuppressWarnings("unchecked") @Test public void testSampleUploader() throws Exception { Configuration configuration = new Configuration(); Uploader uploader = new Uploader(); Mapper<LongWritable, Text, ImmutableBytesWritable, Put>.Context ctx = mock(Context.class); doAnswer(new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { ImmutableBytesWritable writer = (ImmutableBytesWritable) invocation.getArgument(0); Put put = (Put) invocation.getArgument(1); assertEquals("row", Bytes.toString(writer.get())); assertEquals("row", Bytes.toString(put.getRow())); return null; } }).when(ctx).write(any(), any()); uploader.map(null, new Text("row,family,qualifier,value"), ctx); Path dir = util.getDataTestDirOnTestFS("testSampleUploader"); String[] args = { dir.toString(), "simpleTable" }; Job job = SampleUploader.configureJob(configuration, args); assertEquals(SequenceFileInputFormat.class, job.getInputFormatClass()); }
@Override protected void doSetup(Context context) throws IOException { tmpBuf = ByteBuffer.allocate(4096); Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME)); List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns(); FileSplit fileSplit = (FileSplit) context.getInputSplit(); String colName = fileSplit.getPath().getParent().getName(); for (int i = 0; i < uhcColumns.size(); i++) { if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) { index = i; break; } } type = uhcColumns.get(index).getType(); //for debug logger.info("column name: " + colName); logger.info("index: " + index); logger.info("type: " + type); }
@Override protected void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration(); separator = conf.get("ReportSeparator",":"); }
private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo) throws IOException { try { Configuration conf = context.getConfiguration(); FileLink link = null; switch (fileInfo.getType()) { case HFILE: Path inputPath = new Path(fileInfo.getHfile()); link = getFileLink(inputPath, conf); break; case WAL: link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName()); break; default: throw new IOException("Invalid File Type: " + fileInfo.getType().toString()); } return link.getFileStatus(inputFs); } catch (FileNotFoundException e) { context.getCounter(Counter.MISSING_FILES).increment(1); LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e); throw e; } catch (IOException e) { LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e); throw e; } }
@Override public void setup(org.apache.hadoop.mapreduce.Mapper.Context context) { index = context.getConfiguration().getInt(INDEX_KEY, -1); labels = context.getConfiguration().get(LABELS_KEY); split = labels.split(COMMA); super.setup(context); }
public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException { System.out.println(values.toString()); String[] tokens = DELIMITER.split(values.toString()); if (tokens[3].startsWith(month)) {// 1月的数据 money = Integer.parseInt(tokens[1]) * Integer.parseInt(tokens[2]);//单价*数量 v.set(money); context.write(k, v); } } }
@Override protected void setup(Context context) { final File tmpFile = Files.createTempDir(); context.getConfiguration().set(TMP_FILE_LOC_KEY, tmpFile.getAbsolutePath()); }
private void write(VectorWritable vw, Context context, int clusterIndex, double weight) throws IOException, InterruptedException { Cluster cluster = clusterModels.get(clusterIndex); clusterId.set(cluster.getId()); DistanceMeasureCluster distanceMeasureCluster = (DistanceMeasureCluster) cluster; DistanceMeasure distanceMeasure = distanceMeasureCluster.getMeasure(); double distance = distanceMeasure.distance(cluster.getCenter(), vw.get()); Map<Text, Text> props = Maps.newHashMap(); props.put(new Text("distance"), new Text(Double.toString(distance))); context.write(clusterId, new WeightedPropertyVectorWritable(weight, vw.get(), props)); }
currentFamily = null; currentQualifier = null; context.getCounter(Counters.ROWS).increment(1); context.write(new Text("Total ROWS"), new IntWritable(1)); currentFamilyName = Bytes.toStringBinary(currentFamily); currentQualifier = null; context.getCounter("CF", currentFamilyName).increment(1); if (1 == context.getCounter("CF", currentFamilyName).getValue()) { context.write(new Text("Total Families Across all Rows"), new IntWritable(1)); context.write(new Text(currentFamily), new IntWritable(1)); currentRowQualifierName = currentRowKey + separator + currentQualifierName; context.write(new Text("Total Qualifiers across all Rows"), new IntWritable(1)); context.write(new Text(currentQualifierName), new IntWritable(1)); context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1)); context.getCounter(Counters.CELLS).increment(cellCount);
if (outputStat != null && sameFile(inputStat, outputStat)) { LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file."); context.getCounter(Counter.FILES_SKIPPED).increment(1); context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen()); return; int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100); if (Integer.MAX_VALUE != bandwidthMB) { in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024 * 1024L); context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen()); createOutputPath(outputPath.getParent()); FSDataOutputStream out = outputFs.create(outputPath, true); try {