org.apache.hadoop.io.SequenceFile$Reader java code examples

Refine search

final String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
final String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
final Path partitionFilePath = new Path(optionsHelper.getOptionValue(OPTION_PARTITION_FILE_PATH));
final String hbaseConfFile = optionsHelper.getOptionValue(AbstractHadoopJob.OPTION_HBASE_CONF_PATH);
final String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
try (JavaSparkContext sc = new JavaSparkContext(conf)) {
  sc.sc().addSparkListener(jobListener);
  final FileSystem fs = partitionFilePath.getFileSystem(sc.hadoopConfiguration());
  if (!fs.exists(partitionFilePath)) {
    throw new IllegalArgumentException("File not exist: " + partitionFilePath.toString());
  try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, partitionFilePath, sc.hadoopConfiguration())) {
    RowKeyWritable key = new RowKeyWritable();
    Writable value = NullWritable.get();
    while (reader.next(key, value)) {
      keys.add(key);
      logger.info(" ------- split key: {}", key);
  final FileSystem hbaseClusterFs = hbaseConfFilePath.getFileSystem(sc.hadoopConfiguration());
  try (FSDataInputStream confInput = hbaseClusterFs.open(new Path(hbaseConfFile))) {
    Configuration hbaseJobConf = new Configuration();
    hbaseJobConf.addResource(confInput);
    hbaseJobConf.set("spark.hadoop.dfs.replication", "3"); // HFile, replication=3
    Job job = Job.getInstance(hbaseJobConf, cubeSegment.getStorageLocationIdentifier());

/**
 * Read the metadata from a hadoop SequenceFile
 * 
 * @param fs The filesystem to read from
 * @param path The file to read from
 * @return The metadata from this file
 */
public static Map<String, String> getMetadataFromSequenceFile(FileSystem fs, Path path) {
  try {
    Configuration conf = new Configuration();
    conf.setInt("io.file.buffer.size", 4096);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, new Configuration());
    SequenceFile.Metadata meta = reader.getMetadata();
    reader.close();
    TreeMap<Text, Text> map = meta.getMetadata();
    Map<String, String> values = new HashMap<String, String>();
    for(Map.Entry<Text, Text> entry: map.entrySet())
      values.put(entry.getKey().toString(), entry.getValue().toString());
    return values;
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}

  public void printFile(String path) throws Exception {
    FileSystem fileSystem = FileUtil.getFileSystem(path);
    Path fsPath = new Path(path);
    SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath,
        new Configuration());
    LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
    BytesWritable value = (BytesWritable) reader.getValueClass().newInstance();
    System.out.println("reading file " + path);
    while (reader.next(key, value)) {
      if (mPrintOffsetsOnly) {
        System.out.println(Long.toString(key.get()));
      } else {
        byte[] nonPaddedBytes = new byte[value.getLength()];
        System.arraycopy(value.getBytes(), 0, nonPaddedBytes, 0, value.getLength());
        System.out.println(Long.toString(key.get()) + ": " + new String(nonPaddedBytes)); 
      }
    }
  }
}

 /**
  * Get token from the token sequence file.
  * @param authPath
  * @param proxyUserName
  * @return Token for proxyUserName if it exists.
  * @throws IOException
  */
 private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException {
  try (Closer closer = Closer.create()) {
   FileSystem localFs = FileSystem.getLocal(new Configuration());
   SequenceFile.Reader tokenReader =
     closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf()));
   Text key = new Text();
   Token<?> value = new Token<>();
   while (tokenReader.next(key, value)) {
    LOG.info("Found token for " + key);
    if (key.toString().equals(proxyUserName)) {
     return Optional.<Token<?>> of(value);
    }
   }
  }
  return Optional.absent();
 }
}

public JobState.DatasetState getInternal(String storeName, String tableName, String stateId,
  boolean sanitizeKeyForComparison)
  throws IOException {
 Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
 if (!this.fs.exists(tablePath)) {
  return null;
 Configuration deserializeConf = new Configuration(this.conf);
 WritableShimSerialization.addToHadoopConfiguration(deserializeConf);
 try (@SuppressWarnings("deprecation") SequenceFile.Reader reader = new SequenceFile.Reader(this.fs, tablePath,
   deserializeConf)) {
  Object writable = reader.getValueClass() == JobState.class ? new JobState() : new JobState.DatasetState();
   Text key = new Text();
   while (reader.next(key)) {
    String stringKey =
      sanitizeKeyForComparison ? sanitizeDatasetStatestoreNameFromDatasetURN(storeName, key.toString())
        : key.toString();
    writable = reader.getCurrentValue(writable);
    if (stringKey.equals(stateId)) {
     if (writable instanceof JobState.DatasetState) {

@Override
public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split,
                                 JobConf conf,
                                 Reporter reporter)
    throws IOException {
  String inputPathString = ((FileSplit) split).getPath().toUri().getPath();
  log.info("Input file path:" + inputPathString);
  Path inputPath = new Path(inputPathString);
  SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf),
                             inputPath,
                             conf);
  SequenceFile.Metadata meta = reader.getMetadata();
  try {
    Text keySchema = meta.get(new Text("key.schema"));
    Text valueSchema = meta.get(new Text("value.schema"));
    if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) {
      throw new Exception();
    }
    // update Joboconf with schemas
    conf.set("mapper.input.key.schema", keySchema.toString());
    conf.set("mapper.input.value.schema", valueSchema.toString());
  } catch(Exception e) {
    throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n");
  }
  return super.getRecordReader(split, conf, reporter);
}

/** Tests that reading and writing ordinary Writables still works. */
@Test
public void testReadWritables() throws IOException {
 Path sequenceFilePath = new Path(new File(mTempDir.getRoot(), "output.seq").getPath());
 writeSequenceFile(sequenceFilePath, Text.class, IntWritable.class, null, null,
   new Text("one"), new IntWritable(1),
   new Text("two"), new IntWritable(2));
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);
 AvroSequenceFile.Reader.Options options = new AvroSequenceFile.Reader.Options()
   .withFileSystem(fs)
   .withInputPath(sequenceFilePath)
   .withConfiguration(conf);
 SequenceFile.Reader reader = new AvroSequenceFile.Reader(options);
 Text key = new Text();
 IntWritable value = new IntWritable();
 // Read the first record.
 assertTrue(reader.next(key));
 assertEquals("one", key.toString());
 reader.getCurrentValue(value);
 assertNotNull(value);
 assertEquals(1, value.get());
 // Read the second record.
 assertTrue(reader.next(key));
 assertEquals("two", key.toString());
 reader.getCurrentValue(value);
 assertNotNull(value);
 assertEquals(2, value.get());
 assertFalse("Should be no more records.", reader.next(key));
}

public static void copyTo64MB(String src, String dst) throws IOException {
  Configuration hconf = new Configuration();
  Path srcPath = new Path(src);
  Path dstPath = new Path(dst);
  FileSystem fs = FileSystem.get(hconf);
  long srcSize = fs.getFileStatus(srcPath).getLen();
  int copyTimes = (int) (67108864 / srcSize); // 64 MB
  System.out.println("Copy " + copyTimes + " times");
  Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath));
  Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf);
  Text value = new Text();
  Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf)));
  int count = 0;
  while (reader.next(key, value)) {
    for (int i = 0; i < copyTimes; i++) {
      writer.append(key, value);
      count++;
    }
  }
  System.out.println("Len: " + writer.getLength());
  System.out.println("Rows: " + count);
  reader.close();
  writer.close();
}

@SuppressWarnings("unchecked")
public void testReadAvroWithoutReaderSchemas() throws IOException {
 Path sequenceFilePath = new Path(new File(mTempDir.getRoot(), "output.seq").getPath());
   new AvroKey<CharSequence>("two"), new AvroValue<>(2));
 Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);
 AvroSequenceFile.Reader.Options options = new AvroSequenceFile.Reader.Options()
   .withFileSystem(fs)
 key = (AvroKey<CharSequence>) reader.next(key);
 assertNotNull(key);
 assertEquals("one", key.datum().toString());
 value = (AvroValue<Integer>) reader.getCurrentValue(value);
 assertNotNull(value);
 assertEquals(1, value.datum().intValue());
 key = (AvroKey<CharSequence>) reader.next(key);
 assertNotNull(key);
 assertEquals("two", key.datum().toString());
 value = (AvroValue<Integer>) reader.getCurrentValue(value);
 assertNotNull(value);
 assertEquals(2, value.datum().intValue());
 assertNull("Should be no more records.", reader.next(key));

public static ClusteringPolicy readPolicy(Path path) throws IOException {
 Path policyPath = new Path(path, POLICY_FILE_NAME);
 Configuration config = new Configuration();
 FileSystem fs = FileSystem.get(policyPath.toUri(), config);
 SequenceFile.Reader reader = new SequenceFile.Reader(fs, policyPath, config);
 Text key = new Text();
 ClusteringPolicyWritable cpw = new ClusteringPolicyWritable();
 reader.next(key, cpw);
 Closeables.close(reader, true);
 return cpw.getValue();
}

private String[] getContent(Configuration conf, Path path) throws Exception {
 ClassLoader prevClassLoader = ClassLoaderStack.addJarFile(
   new Path(new Path(new SqoopOptions().getJarOutputDir()), getTableName() + ".jar").toString(),
   getTableName());
 FileSystem fs = FileSystem.getLocal(conf);
 FileStatus[] stats = fs.listStatus(path);
 Path[] paths = new Path[stats.length];
 for (int i = 0; i < stats.length; i++) {
  paths[i] = stats[i].getPath();
 }
 // Read all the files adding the value lines to the list.
 List<String> strings = new ArrayList<String>();
 for (Path filePath : paths) {
  if (filePath.getName().startsWith("_") || filePath.getName().startsWith(".")) {
   continue;
  }
  // Need to use new configuration object so that it has the proper classloaders.
  SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, new Configuration());
  WritableComparable key = (WritableComparable)
    reader.getKeyClass().newInstance();
  Writable value = (Writable) reader.getValueClass().newInstance();
  while (reader.next(key, value)) {
   strings.add(value.toString());
  }
 }
 ClassLoaderStack.setCurrentClassLoader(prevClassLoader);
 return strings.toArray(new String[0]);
}

String dictInfoPath = mergedDictInfo == null ? "" : mergedDictInfo.getResourcePath();
context.write(new IntWritable(-1), new Text(tblCol + "=" + dictInfoPath));
    context.getConfiguration().get(BatchConstants.ARG_META_URL));
final String cubeName = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
final String segmentId = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID);
final String statOutputPath = context.getConfiguration()
    .get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt());
    conf = HadoopUtil.getCurrentConfiguration();
    reader = new SequenceFile.Reader(fs, new Path(tempFile.getAbsolutePath()), conf);
    LongWritable keyW = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    BytesWritable valueW = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    while (reader.next(keyW, valueW)) {
      if (keyW.get() == 0L) {
CubeStatsWriter.writeCuboidStatistics(conf, new Path(statOutputPath), cuboidHLLMap,
    averageSamplingPercentage);
Path statisticsFilePath = new Path(statOutputPath,
FSDataInputStream fis = fs.open(statisticsFilePath);
context.write(new IntWritable(-1), new Text(""));

@SuppressWarnings("unchecked")
public static Path merge(Configuration configuration, String inputPath, String outputPath,
  int mapperTasks, boolean textFileFormat, boolean deleteSource) throws IOException,
  InstantiationException, IllegalAccessException {
 Class<? extends Writable> keyClass = LongWritable.class;
 Class<? extends Writable> valueClass = Text.class;
 FileSystem fs = FileSystem.get(new Configuration());
 if (!textFileFormat) {
  FileStatus[] fileStatus = fs.globStatus(new Path(inputPath));
  Preconditions.checkArgument(fileStatus.length > 0, "Invalid input path...");
  SequenceFile.Reader reader = new SequenceFile.Reader(fs,
    fileStatus[fileStatus.length - 1].getPath(), fs.getConf());
  try {
   keyClass = (Class<? extends Writable>) reader.getKeyClass();
   valueClass = (Class<? extends Writable>) reader.getValueClass();
   sLogger.info("Key type: " + keyClass.toString());
   sLogger.info("Value type: " + valueClass.toString());
  } catch (Exception e) {
   throw new RuntimeException("Error in loading key/value class");
  }
  reader.close();
 }
 if (textFileFormat) {
  return mergeTextFiles(configuration, inputPath, outputPath, mapperTasks, deleteSource);
 } else {
  return mergeSequenceFiles(configuration, inputPath, outputPath, mapperTasks, keyClass,
    valueClass, deleteSource);
 }
}

private List<MyMessage> readMessages(Path path) throws IOException, InstantiationException, IllegalAccessException {
 List<MyMessage> messages = new ArrayList<MyMessage>();
 try {
  for (FileStatus file : fs.listStatus(path)) {
   if (file.isDir()) {
    messages.addAll(readMessages(file.getPath()));
   } else {
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), new Configuration());
    try {
     LongWritable key = (LongWritable) reader.getKeyClass().newInstance();
     Text value = (Text) reader.getValueClass().newInstance();
     while (reader.next(key, value)) {
      messages.add(gson.fromJson(value.toString(), MyMessage.class));
     }
    } finally {
     reader.close();
    }
   }
  }
 } catch (FileNotFoundException e) {
  System.out.println("No camus messages were found in [" + path + "]");
 }
 return messages;
}

  segmentsConsidered++;
  updateProgress(mStream[i].in.getPosition());
 new Path(tmpDir, "intermediate").suffix("." + passNo);
                  tmpFilename.toString(),
                  approxOutputSize, conf);
if(LOG.isDebugEnabled()) { 
                  fs.makeQualified(segmentsToMerge.get(0).segmentPathName), 
                  fs.makeQualified(outputFile), null);
   fs.getFileStatus(outputFile).getLen(), outputFile);

protected static void checkOutputData(Path dataDir, int expectedCount) throws IOException {
 List<Path> dataFiles = IOUtils.listFiles(dataDir, "*/part-*");
 Configuration hadoopConf = new Configuration();
 Writable key = new Text();
 Writable message = new Text();
 int count = 0;
 for (Path dataFile : dataFiles) {
  SequenceFile.Reader.Option fileOptions =
    SequenceFile.Reader.file(new org.apache.hadoop.fs.Path(dataFile.toUri().toString()));
  try (SequenceFile.Reader reader = new SequenceFile.Reader(hadoopConf, fileOptions)) {
   while (reader.next(key, message)) {
    count++;
   log.info("Data file: {}", dataFile);
   SequenceFile.Reader.Option fileOptions =
     SequenceFile.Reader.file(new org.apache.hadoop.fs.Path(dataFile.toUri().toString()));
   try (SequenceFile.Reader reader = new SequenceFile.Reader(hadoopConf, fileOptions)) {
    while (reader.next(key, message)) {
     log.info("  {} = {}", key, message);

configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class,
 IntWritable.class);
Path outDir = new Path(workDir.getPath(), job.getJobName());
FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));
String[] textOutput = readFully(textOutPath).split("\n");
Path seqOutPath = new Path(outDir, "out2/part-m-00000");
SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
Text key = new Text();
IntWritable value = new IntWritable();
String[] words = fileContent.split(" ");
for (int i = 0; i < words.length; i++) {
 Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
 reader.next(key, value);
 Assert.assertEquals(words[i], key.toString());
 Assert.assertEquals((i + 1), value.get());
Assert.assertFalse(reader.next(key, value));

public ObjectWritableIterator(final Configuration configuration, final Path path) throws IOException {
  for (final FileStatus status : FileSystem.get(configuration).listStatus(path, HiddenFileFilter.instance())) {
    this.readers.add(new SequenceFile.Reader(configuration, SequenceFile.Reader.file(status.getPath())));
  }
}

public static void checkOutput(FileSystem fileSys, Configuration conf,
  int tasks) throws Exception {
 FileStatus[] listStatus = fileSys.globStatus(new Path(OUTPUT_PATH + "/part-*"));
  if (!status.isDir()) {
   SequenceFile.Reader reader = new SequenceFile.Reader(fileSys,
     status.getPath(), conf);
   int superStep = 0;
   int taskstep = 0;
   IntWritable key = new IntWritable();
   Text value = new Text();
   while (reader.next(key, value)) {
    assertEquals(superStep, key.get());
    taskstep++;
   reader.close();
 fileSys.delete(new Path(TMP_OUTPUT), true);

private Map<CamusRequest, EtlKey> getPreviousOffsets(Path[] inputs, JobContext context) throws IOException {
 Map<CamusRequest, EtlKey> offsetKeysMap = new HashMap<CamusRequest, EtlKey>();
 for (Path input : inputs) {
  FileSystem fs = input.getFileSystem(context.getConfiguration());
  for (FileStatus f : fs.listStatus(input, new OffsetFileFilter())) {
   log.info("previous offset file:" + f.getPath().toString());
   SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), context.getConfiguration());
   EtlKey key = new EtlKey();
   while (reader.next(key, NullWritable.get())) {
    //TODO: factor out kafka specific request functionality
    CamusRequest request = new EtlRequest(context, key.getTopic(), key.getLeaderId(), key.getPartition());
    if (offsetKeysMap.containsKey(request)) {
     EtlKey oldKey = offsetKeysMap.get(request);
     if (oldKey.getOffset() < key.getOffset()) {
      offsetKeysMap.put(request, key);
     }
    } else {
     offsetKeysMap.put(request, key);
    }
    key = new EtlKey();
   }
   reader.close();
  }
 }
 return offsetKeysMap;
}

Javadoc

Reads key/value pairs from a sequence-format file.

Most used methods

<init>
Construct a reader by opening a file from the given file system.
next
Read the next key/value pair in the file into key andval. Returns true if such a pair exists and fal
close
Close the file.
getKeyClass
Returns the class of keys in this file.
getValueClass
Returns the class of values in this file.
file
Create an option to specify the path name of the sequence file.
getCurrentValue
Get the 'value' corresponding to the last read 'key'.
getPosition
Return the current byte position in the input file.
sync
Seek to the next sync mark past a given position.
seek
Set the current byte position in the input file.The position passed must be a position returned by S
getValueClassName
Returns the name of the value class.
syncSeen
Returns true iff the previous call to next passed a sync mark.

Popular in Java

Reactive rest calls using spring rest template
getSupportFragmentManager (FragmentActivity)
getApplicationContext (Context)
runOnUiThread (Activity)
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
JButton (javax.swing)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top Vim plugins

How to useSequenceFile$Reader in org.apache.hadoop.io

Best Java code snippets using org.apache.hadoop.io.SequenceFile$Reader (Showing top 20 results out of 1,305)

Refine search

How to use
SequenceFile$Reader
in
org.apache.hadoop.io