String tableId = connector.tableOperations().tableIdMap().get(table); Scanner scanner = connector.createScanner("accumulo.metadata", auths); scanner.fetchColumnFamily(new Text("loc")); Iterator<Entry<Key, Value>> iter = scanner.iterator(); if (iter.hasNext()) { location = Optional.of(iter.next().getValue().toString()); Text splitCompareKey = new Text(); key.getRow(splitCompareKey); Text scannedCompareKey = new Text(); byte[] keyBytes = entry.getKey().getRow().copyBytes(); location = Optional.of(entry.getValue().toString()); break; scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3); if (scannedCompareKey.getLength() > 0) { int compareTo = splitCompareKey.compareTo(scannedCompareKey); if (compareTo <= 0) { location = Optional.of(entry.getValue().toString());
@Override public void deserialize(Entry<Key, Value> entry) { if (!columnValues.containsKey(rowIdName)) { entry.getKey().getRow(rowId); columnValues.put(rowIdName, rowId.toString()); } if (rowOnly) { return; } entry.getKey().getColumnFamily(family); entry.getKey().getColumnQualifier(qualifier); if (family.equals(ROW_ID_COLUMN) && qualifier.equals(ROW_ID_COLUMN)) { return; } value.set(entry.getValue().get()); columnValues.put(familyQualifierColumnMap.get(family.toString()).get(qualifier.toString()), value.toString()); }
@Override public byte[] getBytes(Text writable) { //@TODO There is no reason to decode then encode the string to bytes really //@FIXME this issue with CTRL-CHAR ^0 added by Text at the end of string and Json serd does not like that. try { return Text.decode(writable.getBytes(), 0, writable.getLength()).getBytes(Charset.forName("UTF-8")); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }
@BeforeClass public static void testWriteSequenceFile() throws IOException { Configuration c = new Configuration(); URI uri = file().toURI(); try(SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(uri, c), c, new Path(uri.toString()), LongWritable.class, Text.class)) { final LongWritable key = new LongWritable(); final Text val = new Text(); for (int i = 0; i < COUNT; ++i) { key.set(i); val.set(Integer.toString(i)); writer.append(key, val); } } }
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path src = new Path(job.get( org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE, null)); FileSystem fs = src.getFileSystem(job); ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits); LongWritable key = new LongWritable(); Text value = new Text(); for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job); sl.next(key, value);) { splits.add(new IndirectSplit(new Path(value.toString()), key.get())); } return splits.toArray(new IndirectSplit[splits.size()]); }
/** * Get token from the token sequence file. * @param authPath * @param proxyUserName * @return Token for proxyUserName if it exists. * @throws IOException */ private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException { try (Closer closer = Closer.create()) { FileSystem localFs = FileSystem.getLocal(new Configuration()); SequenceFile.Reader tokenReader = closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf())); Text key = new Text(); Token<?> value = new Token<>(); while (tokenReader.next(key, value)) { LOG.info("Found token for " + key); if (key.toString().equals(proxyUserName)) { return Optional.<Token<?>> of(value); } } } return Optional.absent(); } }
private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException { FileSystem fs = FileSystem.get(mfile.toUri(), job); if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) { DistributedCache.createSymlink(job); FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs)); LongWritable key = new LongWritable(); Text value = new Text(); for (int i=0; i<fstats.length; i++) { Path curMap = fstats[i].getPath(); MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job); if (mreader.next(key, value)) { int rid = (int) (key.get() % slots); String uriWithLink = curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid); DistributedCache.addCacheFile(new URI(uriWithLink), job); } else { System.exit(-1); } mreader.close(); } } job.setInt(symbol, slots); }
public JobState.DatasetState getInternal(String storeName, String tableName, String stateId, boolean sanitizeKeyForComparison) throws IOException { Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName); if (!this.fs.exists(tablePath)) { return null; Configuration deserializeConf = new Configuration(this.conf); WritableShimSerialization.addToHadoopConfiguration(deserializeConf); try (@SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, tablePath, Text key = new Text(); sanitizeKeyForComparison ? sanitizeDatasetStatestoreNameFromDatasetURN(storeName, key.toString()) : key.toString(); writable = reader.getCurrentValue(writable); if (stringKey.equals(stateId)) {
@Override public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException { String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try { Text keySchema = meta.get(new Text("key.schema")); Text valueSchema = meta.get(new Text("value.schema")); if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(); } // update Joboconf with schemas conf.set("mapper.input.key.schema", keySchema.toString()); conf.set("mapper.input.value.schema", valueSchema.toString()); } catch(Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } return super.getRecordReader(split, conf, reporter); }
@SuppressWarnings({"rawtypes", "unchecked"}) private static Path createEmptyFile(Path hiveScratchDir, HiveOutputFormat outFileFormat, JobConf job, Properties props, boolean dummyRow) throws IOException, InstantiationException, IllegalAccessException { // create a dummy empty file in a new directory String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString(); Path newPath = new Path(newDir); FileSystem fs = newPath.getFileSystem(job); fs.mkdirs(newPath); //Qualify the path against the file system. The user configured path might contain default port which is skipped //in the file status. This makes sure that all paths which goes into PathToPartitionInfo are always listed status //file path. newPath = fs.makeQualified(newPath); String newFile = newDir + Path.SEPARATOR + "emptyFile"; Path newFilePath = new Path(newFile); RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath, Text.class, false, props, null); if (dummyRow) { // empty files are omitted at CombineHiveInputFormat. // for meta-data only query, it effectively makes partition columns disappear.. // this could be fixed by other methods, but this seemed to be the most easy (HIVEV-2955) recWriter.write(new Text("empty")); // written via HiveIgnoreKeyTextOutputFormat } recWriter.close(false); return StringInternUtils.internUriStringsInPath(newPath); }
@BeforeClass public void setUp() throws IOException { this.configuration = new Configuration(); this.fileSystem = FileSystem.getLocal(this.configuration); this.tokenFilePath = new Path(HelixUtilsTest.class.getSimpleName(), "token"); this.token = new Token<>(); this.token.setKind(new Text("test")); this.token.setService(new Text("test")); }
private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException { Configuration conf = new Configuration(); try { if (fs.exists(file)) { fs.delete(file, false); } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class); for (int i = 0; i < rowCount; i++) { w.append(new IntWritable(i), new Text("line " + i)); } w.close(); System.out.println("done"); } catch (IOException e) { e.printStackTrace(); } }
@SuppressWarnings("unchecked") @Test public void testInitNextRecordReader() throws IOException{ JobConf conf = new JobConf(); Path[] paths = new Path[3]; long[] fileLength = new long[3]; File[] files = new File[3]; LongWritable key = new LongWritable(1); Text value = new Text(); try { for(int i=0;i<3;i++){ fileLength[i] = i; File dir = new File(outDir.toString()); dir.mkdir(); files[i] = new File(dir,"testfile"+i); FileWriter fileWriter = new FileWriter(files[i]); fileWriter.close(); paths[i] = new Path(outDir+"/testfile"+i); verify(reporter, times(3)).progress(); } finally { FileUtil.fullyDelete(new File(outDir.toString()));
@Test @SuppressWarnings("deprecation") public void testSerializeToSequenceFile() throws IOException { Closer closer = Closer.create(); Configuration conf = new Configuration(); WritableShimSerialization.addToHadoopConfiguration(conf); try { SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class)); Text key = new Text(); WorkUnitState workUnitState = new WorkUnitState(); TestWatermark watermark = new TestWatermark(); watermark.setLongWatermark(10L); workUnitState.setActualHighWatermark(watermark); writer1.append(key, workUnitState); SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class)); watermark.setLongWatermark(100L); workUnitState.setActualHighWatermark(watermark); writer2.append(key, workUnitState); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
static private void finalize(Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return; } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); try (SequenceFile.Reader in = new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) { Text dsttext = new Text(); FilePair pair = new FilePair(); for(; in.next(dsttext, pair); ) { Path absdst = new Path(destPath, pair.output); updateDestStatus(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } }
private static ExportProtos.ExportRequest getConfiguredRequest(Configuration conf, Path dir, final Scan scan, final Token<?> userToken) throws IOException { boolean compressed = conf.getBoolean(FileOutputFormat.COMPRESS, false); String compressionType = conf.get(FileOutputFormat.COMPRESS_TYPE, DEFAULT_TYPE.toString()); String compressionCodec = conf.get(FileOutputFormat.COMPRESS_CODEC, DEFAULT_CODEC.getName()); DelegationToken protoToken = null; if (userToken != null) { protoToken = DelegationToken.newBuilder() .setIdentifier(ByteStringer.wrap(userToken.getIdentifier())) .setPassword(ByteStringer.wrap(userToken.getPassword())) .setKind(userToken.getKind().toString()) .setService(userToken.getService().toString()).build(); } LOG.info("compressed=" + compressed + ", compression type=" + compressionType + ", compression codec=" + compressionCodec + ", userToken=" + userToken); ExportProtos.ExportRequest.Builder builder = ExportProtos.ExportRequest.newBuilder() .setScan(ProtobufUtil.toScan(scan)) .setOutputPath(dir.toString()) .setCompressed(compressed) .setCompressCodec(compressionCodec) .setCompressType(compressionType); if (protoToken != null) { builder.setFsToken(protoToken); } return builder.build(); }
final List<String> list = new ArrayList<>(); final boolean snappyDecode = conf != null && conf.getBoolean(FileOutputFormat.COMPRESS, false); SequenceFile.Reader.file(new Path(fileName)))) { Text key = new Text(); IntWritable val = new IntWritable();