org.apache.hadoop.io.Text java code examples

Refine search

private Text getKey(Path path) {
 int level = conf.getInt(NUMBER_OF_LEVELS_TO_PRESERVE_KEY, 1);
 int count = 0;
 String relPath = "";
 while (count++ < level) {
  relPath = Path.SEPARATOR + path.getName() + relPath;
  path = path.getParent();
 }
 return new Text(relPath);
}

 @Override
 public void map(LongWritable key, Text value, Context context)
   throws InterruptedException, IOException {
  Path path = new Path(value.toString());
  this.compactor.compact(path, compactOnce, major);
 }
}

String tableId = connector.tableOperations().tableIdMap().get(table);
Scanner scanner = connector.createScanner("accumulo.metadata", auths);
scanner.fetchColumnFamily(new Text("loc"));
  Iterator<Entry<Key, Value>> iter = scanner.iterator();
  if (iter.hasNext()) {
    location = Optional.of(iter.next().getValue().toString());
  Text splitCompareKey = new Text();
  key.getRow(splitCompareKey);
  Text scannedCompareKey = new Text();
    byte[] keyBytes = entry.getKey().getRow().copyBytes();
      location = Optional.of(entry.getValue().toString());
      break;
      scannedCompareKey.set(keyBytes, 3, keyBytes.length - 3);
      if (scannedCompareKey.getLength() > 0) {
        int compareTo = splitCompareKey.compareTo(scannedCompareKey);
        if (compareTo <= 0) {
          location = Optional.of(entry.getValue().toString());

 public IntWritable evaluate(Text s) {
  if (s == null) {
   return null;
  }

  if (s.getLength() > 0) {
   result.set(s.getBytes()[0]);
  } else {
   result.set(0);
  }

  return result;
 }
}

@Override
public void deserialize(Entry<Key, Value> entry)
{
  if (!columnValues.containsKey(rowIdName)) {
    entry.getKey().getRow(rowId);
    columnValues.put(rowIdName, rowId.toString());
  }
  if (rowOnly) {
    return;
  }
  entry.getKey().getColumnFamily(family);
  entry.getKey().getColumnQualifier(qualifier);
  if (family.equals(ROW_ID_COLUMN) && qualifier.equals(ROW_ID_COLUMN)) {
    return;
  }
  value.set(entry.getValue().get());
  columnValues.put(familyQualifierColumnMap.get(family.toString()).get(qualifier.toString()), value.toString());
}

@Override public byte[] getBytes(Text writable) {
 //@TODO  There is no reason to decode then encode the string to bytes really
 //@FIXME this issue with CTRL-CHAR ^0 added by Text at the end of string and Json serd does not like that.
 try {
  return Text.decode(writable.getBytes(), 0, writable.getLength()).getBytes(Charset.forName("UTF-8"));
 } catch (CharacterCodingException e) {
  throw new RuntimeException(e);
 }
}

@BeforeClass
public static void testWriteSequenceFile() throws IOException {
 Configuration c = new Configuration();
 URI uri = file().toURI();
 try(SequenceFile.Writer writer
  = new SequenceFile.Writer(FileSystem.get(uri, c), c,
               new Path(uri.toString()),
               LongWritable.class, Text.class)) {
  final LongWritable key = new LongWritable();
  final Text val = new Text();
  for (int i = 0; i < COUNT; ++i) {
   key.set(i);
   val.set(Integer.toString(i));
   writer.append(key, val);
  }
 }
}

public InputSplit[] getSplits(JobConf job, int numSplits)
  throws IOException {
 Path src = new Path(job.get(
  org.apache.hadoop.mapreduce.GenericMRLoadGenerator.INDIRECT_INPUT_FILE,
  null));
 FileSystem fs = src.getFileSystem(job);
 ArrayList<IndirectSplit> splits = new ArrayList<IndirectSplit>(numSplits);
 LongWritable key = new LongWritable();
 Text value = new Text();
 for (SequenceFile.Reader sl = new SequenceFile.Reader(fs, src, job);
    sl.next(key, value);) {
  splits.add(new IndirectSplit(new Path(value.toString()), key.get()));
 }
 return splits.toArray(new IndirectSplit[splits.size()]);
}

 /**
  * Get token from the token sequence file.
  * @param authPath
  * @param proxyUserName
  * @return Token for proxyUserName if it exists.
  * @throws IOException
  */
 private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException {
  try (Closer closer = Closer.create()) {
   FileSystem localFs = FileSystem.getLocal(new Configuration());
   SequenceFile.Reader tokenReader =
     closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf()));
   Text key = new Text();
   Token<?> value = new Token<>();
   while (tokenReader.next(key, value)) {
    LOG.info("Found token for " + key);
    if (key.toString().equals(proxyUserName)) {
     return Optional.<Token<?>> of(value);
    }
   }
  }
  return Optional.absent();
 }
}

private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
  
  FileSystem fs = FileSystem.get(mfile.toUri(), job);
  if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {
    DistributedCache.createSymlink(job);
    
    FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
    
    LongWritable key = new LongWritable();
    Text value = new Text();
    for (int i=0; i<fstats.length; i++) {
      Path curMap = fstats[i].getPath();
      MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
      if (mreader.next(key, value)) {
        int rid = (int) (key.get() % slots);
        String uriWithLink =
            curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
        DistributedCache.addCacheFile(new URI(uriWithLink), job);
      } else {
        System.exit(-1);
      }
      mreader.close();
    }
  }
  
  job.setInt(symbol, slots);
}

public JobState.DatasetState getInternal(String storeName, String tableName, String stateId,
  boolean sanitizeKeyForComparison)
  throws IOException {
 Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
 if (!this.fs.exists(tablePath)) {
  return null;
 Configuration deserializeConf = new Configuration(this.conf);
 WritableShimSerialization.addToHadoopConfiguration(deserializeConf);
 try (@SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, tablePath,
   Text key = new Text();
      sanitizeKeyForComparison ? sanitizeDatasetStatestoreNameFromDatasetURN(storeName, key.toString())
        : key.toString();
    writable = reader.getCurrentValue(writable);
    if (stringKey.equals(stateId)) {

@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split,
                                 JobConf conf,
                                 Reporter reporter)
    throws IOException {
  String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
  log.info("Input file path:" + inputPathString);
  Path inputPath = new Path(inputPathString);
  SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf),
                             inputPath,
                             conf);
  SequenceFile.Metadata meta = reader.getMetadata();
  try {
    Text keySchema = meta.get(new Text("key.schema"));
    Text valueSchema = meta.get(new Text("value.schema"));
    if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
      throw new Exception();
    }
    // update Joboconf with schemas
    conf.set("mapper.input.key.schema", keySchema.toString());
    conf.set("mapper.input.value.schema", valueSchema.toString());
  } catch(Exception e) {
    throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
  }
  return super.getRecordReader(split, conf, reporter);
}

@SuppressWarnings({"rawtypes", "unchecked"})
private static Path createEmptyFile(Path hiveScratchDir,
  HiveOutputFormat outFileFormat, JobConf job,
  Properties props, boolean dummyRow)
    throws IOException, InstantiationException, IllegalAccessException {
 // create a dummy empty file in a new directory
 String newDir = hiveScratchDir + Path.SEPARATOR + UUID.randomUUID().toString();
 Path newPath = new Path(newDir);
 FileSystem fs = newPath.getFileSystem(job);
 fs.mkdirs(newPath);
 //Qualify the path against the file system. The user configured path might contain default port which is skipped
 //in the file status. This makes sure that all paths which goes into PathToPartitionInfo are always listed status
 //file path.
 newPath = fs.makeQualified(newPath);
 String newFile = newDir + Path.SEPARATOR + "emptyFile";
 Path newFilePath = new Path(newFile);
 RecordWriter recWriter = outFileFormat.getHiveRecordWriter(job, newFilePath,
   Text.class, false, props, null);
 if (dummyRow) {
  // empty files are omitted at CombineHiveInputFormat.
  // for meta-data only query, it effectively makes partition columns disappear..
  // this could be fixed by other methods, but this seemed to be the most easy (HIVEV-2955)
  recWriter.write(new Text("empty"));  // written via HiveIgnoreKeyTextOutputFormat
 }
 recWriter.close(false);
 return StringInternUtils.internUriStringsInPath(newPath);
}

@BeforeClass
public void setUp() throws IOException {
 this.configuration = new Configuration();
 this.fileSystem = FileSystem.getLocal(this.configuration);
 this.tokenFilePath = new Path(HelixUtilsTest.class.getSimpleName(), "token");
 this.token = new Token<>();
 this.token.setKind(new Text("test"));
 this.token.setService(new Text("test"));
}

private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException {
  Configuration conf = new Configuration();
  try {
    if (fs.exists(file)) {
      fs.delete(file, false);
    }
    SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class);
    for (int i = 0; i < rowCount; i++) {
      w.append(new IntWritable(i), new Text("line " + i));
    }
    w.close();
    System.out.println("done");
  } catch (IOException e) {
    e.printStackTrace();
  }
}

@SuppressWarnings("unchecked")
@Test
public void testInitNextRecordReader() throws IOException{
 JobConf conf = new JobConf();
 Path[] paths = new Path[3];
 long[] fileLength = new long[3];
 File[] files = new File[3];
 LongWritable key = new LongWritable(1);
 Text value = new Text();
 try {
  for(int i=0;i<3;i++){
   fileLength[i] = i;
   File dir = new File(outDir.toString());
   dir.mkdir();
   files[i] = new File(dir,"testfile"+i);
   FileWriter fileWriter = new FileWriter(files[i]);
   fileWriter.close();
   paths[i] = new Path(outDir+"/testfile"+i);
  verify(reporter, times(3)).progress();
 } finally {
  FileUtil.fullyDelete(new File(outDir.toString()));

@Test
@SuppressWarnings("deprecation")
public void testSerializeToSequenceFile() throws IOException {
 Closer closer = Closer.create();
 Configuration conf = new Configuration();
 WritableShimSerialization.addToHadoopConfiguration(conf);
 try {
  SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf,
    new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class));
  Text key = new Text();
  WorkUnitState workUnitState = new WorkUnitState();
  TestWatermark watermark = new TestWatermark();
  watermark.setLongWatermark(10L);
  workUnitState.setActualHighWatermark(watermark);
  writer1.append(key, workUnitState);
  SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf,
    new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class));
  watermark.setLongWatermark(100L);
  workUnitState.setActualHighWatermark(watermark);
  writer2.append(key, workUnitState);
 } catch (Throwable t) {
  throw closer.rethrow(t);
 } finally {
  closer.close();
 }
}

static private void finalize(Configuration conf, JobConf jobconf,
  final Path destPath, String presevedAttributes) throws IOException {
 if (presevedAttributes == null) {
  return;
 }
 EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes);
 if (!preseved.contains(FileAttribute.USER)
   && !preseved.contains(FileAttribute.GROUP)
   && !preseved.contains(FileAttribute.PERMISSION)) {
  return;
 }
 FileSystem dstfs = destPath.getFileSystem(conf);
 Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL));
 try (SequenceFile.Reader in =
   new SequenceFile.Reader(jobconf, Reader.file(dstdirlist))) {
  Text dsttext = new Text();
  FilePair pair = new FilePair(); 
  for(; in.next(dsttext, pair); ) {
   Path absdst = new Path(destPath, pair.output);
   updateDestStatus(pair.input, dstfs.getFileStatus(absdst),
     preseved, dstfs);
  }
 }
}

private static ExportProtos.ExportRequest getConfiguredRequest(Configuration conf,
    Path dir, final Scan scan, final Token<?> userToken) throws IOException {
 boolean compressed = conf.getBoolean(FileOutputFormat.COMPRESS, false);
 String compressionType = conf.get(FileOutputFormat.COMPRESS_TYPE,
     DEFAULT_TYPE.toString());
 String compressionCodec = conf.get(FileOutputFormat.COMPRESS_CODEC,
     DEFAULT_CODEC.getName());
 DelegationToken protoToken = null;
 if (userToken != null) {
  protoToken = DelegationToken.newBuilder()
      .setIdentifier(ByteStringer.wrap(userToken.getIdentifier()))
      .setPassword(ByteStringer.wrap(userToken.getPassword()))
      .setKind(userToken.getKind().toString())
      .setService(userToken.getService().toString()).build();
 }
 LOG.info("compressed=" + compressed
     + ", compression type=" + compressionType
     + ", compression codec=" + compressionCodec
     + ", userToken=" + userToken);
 ExportProtos.ExportRequest.Builder builder = ExportProtos.ExportRequest.newBuilder()
     .setScan(ProtobufUtil.toScan(scan))
     .setOutputPath(dir.toString())
     .setCompressed(compressed)
     .setCompressCodec(compressionCodec)
     .setCompressType(compressionType);
 if (protoToken != null) {
  builder.setFsToken(protoToken);
 }
 return builder.build();
}

final List<String> list = new ArrayList<>();
final boolean snappyDecode = conf != null && conf.getBoolean(FileOutputFormat.COMPRESS, false);
      SequenceFile.Reader.file(new Path(fileName)))) {
    Text key = new Text();
    IntWritable val = new IntWritable();

Javadoc

This class stores text using standard UTF8 encoding. It provides methods to serialize, deserialize, and compare texts at byte level. The type of length is integer and is serialized using zero-compressed format.

In addition, it provides methods for string traversal without converting the byte array to a string.

Also includes utilities for serializing/deserialing a string, coding/decoding a string, checking if a byte array contains valid UTF8 code, calculating the length of an encoded string.

Most used methods

<init>
Construct from a byte array.
toString
set
Set the Text to range of bytes
getBytes
Returns the raw bytes; however, only data up to #getLength() is valid. Please use #copyBytes() if yo
getLength
Returns the number of bytes in the byte array
equals
readString
Read a UTF8 encoded string with a maximum size
writeString
Write a UTF8 encoded string with a maximum size to out
readFields
write
compareTo
clear
Clear the string to empty.Note: For performance reasons, this call does not clear the underlying byt

Popular in Java

Creating JSON documents from java classes using gson
getResourceAsStream (ClassLoader)
getApplicationContext (Context)
scheduleAtFixedRate (Timer)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Top Vim plugins

How to useText in org.apache.hadoop.io

Best Java code snippets using org.apache.hadoop.io.Text (Showing top 20 results out of 6,579)

Refine search

How to use
Text
in
org.apache.hadoop.io