org.apache.hadoop.io.SequenceFile$Reader.<init> java code examples

Refine search

public SequenceFileReader(URI uri, Configuration c) throws IOException {
 this(new SequenceFile.Reader(FileSystem.get(uri, c),
                new Path(uri.toString()), c), c);
}

/**
 * Read the metadata from a hadoop SequenceFile
 * 
 * @param fs The filesystem to read from
 * @param path The file to read from
 * @return The metadata from this file
 */
public static Map<String, String> getMetadataFromSequenceFile(FileSystem fs, Path path) {
  try {
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 4096);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, new Configuration());
    SequenceFile.Metadata meta = reader.getMetadata();
    reader.close();
    TreeMap<Text, Text> map = meta.getMetadata();
    Map<String, String> values = new HashMap<String, String>();
    for(Map.Entry<Text, Text> entry: map.entrySet())
      values.put(entry.getKey().toString(), entry.getValue().toString());
    return values;
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}

/**
 * Retrives a {@link Token} from a given sequence file for a specified user. The sequence file should contain a list
 * of key, value pairs where each key corresponds to a user and each value corresponds to a {@link Token} for that
 * user.
 *
 * @param userNameKey The name of the user to retrieve a {@link Token} for
 * @param tokenFilePath The path to the sequence file containing the {@link Token}s
 *
 * @return A {@link Token} for the given user name
 */
public static Optional<Token<?>> getTokenFromSeqFile(String userNameKey, Path tokenFilePath) throws IOException {
 log.info("Reading tokens from sequence file " + tokenFilePath);
 try (Closer closer = Closer.create()) {
  FileSystem localFs = FileSystem.getLocal(new Configuration());
  @SuppressWarnings("deprecation")
  SequenceFile.Reader tokenReader =
    closer.register(new SequenceFile.Reader(localFs, tokenFilePath, localFs.getConf()));
  Text key = new Text();
  Token<?> value = new Token<>();
  while (tokenReader.next(key, value)) {
   log.debug("Found token for user: " + key);
   if (key.toString().equals(userNameKey)) {
    return Optional.<Token<?>> of(value);
   }
  }
 }
 log.warn("Did not find any tokens for user " + userNameKey);
 return Optional.absent();
}

@Override
void doRecover(Path srcPath, long nBytes) throws Exception {
  SequenceFile.Reader reader = new SequenceFile.Reader(this.hdfsConfig,
                             SequenceFile.Reader.file(srcPath), SequenceFile.Reader.length(nBytes));
  Writable key = (Writable) this.format.keyClass().newInstance();
  Writable value = (Writable) this.format.valueClass().newInstance();
  while (reader.next(key, value)) {
    this.writer.append(key, value);
  }
}

public void performSequenceFileRead(FileSystem fs, int count, Path file) throws IOException {
 SequenceFile.Reader reader = new SequenceFile.Reader(fs, file, conf);
 ByteWritable key = new ByteWritable();
 BytesRefArrayWritable val = new BytesRefArrayWritable();
 for (int i = 0; i < count; i++) {
  reader.next(key, val);
 }
}

private void readPartitionFile(FileSystem fs, Configuration conf, Path path)
   throws IOException {
 @SuppressWarnings("deprecation")
 SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
 ImmutableBytesWritable key = new ImmutableBytesWritable();
 partitions = new ArrayList<>();
 while (reader.next(key)) {
  partitions.add(new ImmutableBytesWritable(key.copyBytes()));
 }
 reader.close();
 if (!Ordering.natural().isOrdered(partitions)) {
  throw new IOException("Partitions are not ordered!");
 }
}

@Ignore("convenient trial tool for dev")
@Test
public void test() throws IOException, InterruptedException {
  Configuration hconf = HadoopUtil.getCurrentConfiguration();
  HiveToBaseCuboidMapper mapper = new HiveToBaseCuboidMapper();
  Context context = MockupMapContext.create(hconf, metadataUrl, cubeName, null);
  mapper.doSetup(context);
  Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
  Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
  Text value = new Text();
  while (reader.next(key, value)) {
    mapper.map(key, value, context);
  }
  reader.close();
}

@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
  List<FileStatus> files) throws IOException {
 if (files.size() <= 0) {
  return false;
 }
 for (int fileId = 0; fileId < files.size(); fileId++) {
  SequenceFile.Reader reader = null;
  try {
   reader = new SequenceFile.Reader(fs, files.get(
     fileId).getPath(), conf);
   reader.close();
   reader = null;
  } catch (IOException e) {
   return false;
  } finally{
   IOUtils.closeStream(reader);
  }
 }
 return true;
}

 @Override
 public Void call() throws Exception {
  Configuration conf = new Configuration(ParallelRunner.this.fs.getConf());
  WritableShimSerialization.addToHadoopConfiguration(conf);
  try (@SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(
    ParallelRunner.this.fs, inputFilePath, conf)) {
   Writable key = keyClass.newInstance();
   T state = stateClass.newInstance();
   while (reader.next(key)) {
    state = (T) reader.getCurrentValue(state);
    states.add(state);
    state = stateClass.newInstance();
   }
   if (deleteAfter) {
    HadoopUtils.deletePath(ParallelRunner.this.fs, inputFilePath, false);
   }
  }
  return null;
 }
}), "Deserialize state from file " + inputFilePath));

public void initIOContext(FileSplit split, JobConf job,
  Class inputFormatClass, RecordReader recordReader) throws IOException {
 boolean blockPointer = false;
 long blockStart = -1;
 FileSplit fileSplit = split;
 Path path = fileSplit.getPath();
 FileSystem fs = path.getFileSystem(job);
 if (inputFormatClass.getName().contains("SequenceFile")) {
  SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
  blockPointer = in.isBlockCompressed();
  in.sync(fileSplit.getStart());
  blockStart = in.getPosition();
  in.close();
 } else if (recordReader instanceof RCFileRecordReader) {
  blockPointer = true;
  blockStart = ((RCFileRecordReader) recordReader).getStart();
 } else if (inputFormatClass.getName().contains("RCFile")) {
  blockPointer = true;
  RCFile.Reader in = new RCFile.Reader(fs, path, job);
  in.sync(fileSplit.getStart());
  blockStart = in.getPosition();
  in.close();
 }
 this.jobConf = job;
 this.initIOContext(blockStart, blockPointer, path.makeQualified(fs));
 this.initIOContextSortedProps(split, recordReader, job);
}

@Override
public boolean validateInput(FileSystem fs, HiveConf conf,
  List<FileStatus> files) throws IOException {
 if (files.size() <= 0) {
  return false;
 }
 for (int fileId = 0; fileId < files.size(); fileId++) {
  SequenceFile.Reader reader = null;
  try {
   reader = new SequenceFile.Reader(fs, files.get(
     fileId).getPath(), conf);
   reader.close();
   reader = null;
  } catch (IOException e) {
   return false;
  } finally{
   IOUtils.closeStream(reader);
  }
 }
 return true;
}

public CubeStatsResult(Path path, int precision) throws IOException {
  Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
  Option seqInput = SequenceFile.Reader.file(path);
  try (Reader reader = new SequenceFile.Reader(hadoopConf, seqInput)) {
    LongWritable key = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), hadoopConf);
    BytesWritable value = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), hadoopConf);
    while (reader.next(key, value)) {
      if (key.get() == 0L) {
        percentage = Bytes.toInt(value.getBytes());
      } else if (key.get() == -1) {
        mapperOverlapRatio = Bytes.toDouble(value.getBytes());
      } else if (key.get() == -2) {
        mapperNumber = Bytes.toInt(value.getBytes());
      } else if (key.get() == -3) {
        sourceRecordCount = Bytes.toLong(value.getBytes());
      } else if (key.get() > 0) {
        HLLCounter hll = new HLLCounter(precision);
        ByteArray byteArray = new ByteArray(value.getBytes());
        hll.readRegisters(byteArray.asBuffer());
        counterMap.put(key.get(), hll);
      }
    }
  }
}

/**
 * Check if there's partition files for hfile, if yes replace the table splits, to make the job more reducers
 * @param conf the job configuration
 * @param path the hfile partition file
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private void reconfigurePartitions(Configuration conf, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  if (fs.exists(path)) {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
      int partitionCount = 0;
      Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
      while (reader.next(key, value)) {
        partitionCount++;
      }
      TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), path);
      // The reduce tasks should be one more than partition keys
      job.setNumReduceTasks(partitionCount + 1);
    }
  } else {
    logger.info("File '" + path.toString() + " doesn't exist, will not reconfigure hfile Partitions");
  }
}

 /**
  * Open and read just the metadata header from a SequenceFile.
  *
  * @param fs The FileSystem the SequenceFile is on.
  * @param path The path to the file.
  * @param conf The Hadoop configuration.
  * @return The metadata header.
  * @throws IOException If the metadata cannot be read from the file.
  */
 private static Metadata getMetadata(FileSystem fs, Path path, Configuration conf)
   throws IOException {
  SequenceFile.Reader metadataReader = null;
  try {
   metadataReader = new SequenceFile.Reader(fs, path, conf);
   return metadataReader.getMetadata();
  } finally {
   if (null != metadataReader) {
    metadataReader.close();
   }
  }
 }
}

/**
 * Clones the attributes (like compression of the input file and creates a 
 * corresponding Writer
 * @param inputFile the path of the input file whose attributes should be 
 * cloned
 * @param outputFile the path of the output file 
 * @param prog the Progressable to report status during the file write
 * @return Writer
 * @throws IOException
 */
public Writer cloneFileAttributes(Path inputFile, Path outputFile, 
                 Progressable prog) throws IOException {
 Reader reader = new Reader(conf,
               Reader.file(inputFile),
               new Reader.OnlyHeaderOption());
 CompressionType compress = reader.getCompressionType();
 CompressionCodec codec = reader.getCompressionCodec();
 reader.close();
 Writer writer = createWriter(conf, 
                Writer.file(outputFile), 
                Writer.keyClass(keyClass), 
                Writer.valueClass(valClass), 
                Writer.compression(compress, codec), 
                Writer.progressable(prog));
 return writer;
}

public void initIOContext(FileSplit split, JobConf job,
  Class inputFormatClass, RecordReader recordReader) throws IOException {
 boolean blockPointer = false;
 long blockStart = -1;
 FileSplit fileSplit = split;
 Path path = fileSplit.getPath();
 FileSystem fs = path.getFileSystem(job);
 if (inputFormatClass.getName().contains("SequenceFile")) {
  SequenceFile.Reader in = new SequenceFile.Reader(fs, path, job);
  blockPointer = in.isBlockCompressed();
  in.sync(fileSplit.getStart());
  blockStart = in.getPosition();
  in.close();
 } else if (recordReader instanceof RCFileRecordReader) {
  blockPointer = true;
  blockStart = ((RCFileRecordReader) recordReader).getStart();
 } else if (inputFormatClass.getName().contains("RCFile")) {
  blockPointer = true;
  RCFile.Reader in = new RCFile.Reader(fs, path, job);
  in.sync(fileSplit.getStart());
  blockStart = in.getPosition();
  in.close();
 }
 this.jobConf = job;
 this.initIOContext(blockStart, blockPointer, path.makeQualified(fs));
 this.initIOContextSortedProps(split, recordReader, job);
}

final SequenceFile.Reader reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
int counter = 0;
Text key = new Text();
try {
  while (reader.next(key)) {
    String fileName = key.toString();

reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
final Text key = new Text();
final KeyValueWriterCallback callback = new KeyValueWriterCallback(reader);
LOG.debug("Read from SequenceFile: {} ", new Object[]{file});
try {
  while (reader.next(key)) {
    String fileName = key.toString();

in = new Reader(fs, inFiles[currentFile], conf);
compressionType = in.getCompressionType();
codec = in.getCompressionCodec();
  int recordLength = in.nextRaw(rawKeys, rawValue);
  if (recordLength == -1) {
   in.close();
   if (deleteInput) {
    fs.delete(inFiles[currentFile], true);
   atEof = currentFile >= inFiles.length;
   if (!atEof) {
    in = new Reader(fs, inFiles[currentFile], conf);
   } else {
    in = null;

SequenceFile.Reader reader = new SequenceFile.Reader(conf,
  SequenceFile.Reader.file(p), new Reader.OnlyHeaderOption());
try {
 reader.close();

Javadoc

Construct a reader by the given input stream.

Popular methods of SequenceFile$Reader

next
Read the next key/value pair in the file into key andval. Returns true if such a pair exists and fal
close
Close the file.
getKeyClass
Returns the class of keys in this file.
getValueClass
Returns the class of values in this file.
file
Create an option to specify the path name of the sequence file.
getCurrentValue
Get the 'value' corresponding to the last read 'key'.
getPosition
Return the current byte position in the input file.
sync
Seek to the next sync mark past a given position.
seek
Set the current byte position in the input file.The position passed must be a position returned by S
getValueClassName
Returns the name of the value class.
syncSeen
Returns true iff the previous call to next passed a sync mark.
createValueBytes

Popular in Java

Making http post requests using okhttp
findViewById (Activity)
getSupportFragmentManager (FragmentActivity)
setScale (BigDecimal)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Cipher (javax.crypto)
This class provides access to implementations of cryptographic ciphers for encryption and decryption
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Best plugins for Eclipse

How to use org.apache.hadoop.io.SequenceFile$Readerconstructor

Best Java code snippets using org.apache.hadoop.io.SequenceFile$Reader.<init> (Showing top 20 results out of 1,251)

Refine search

How to use
org.apache.hadoop.io.SequenceFile$Reader
constructor