org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf); Path path = new Path(sequenceFile.getAbsolutePath()); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < kvCount; i++) { if (i == 1) { value.set(i + " - somestring"); writer.append(key, value); value.set(i + " - somestring"); writer.append(key, value); path = new Path(sequenceFileInPathNull); writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass()); for (int i = 0; i < kvCount; i++) { value1.set(i); writer1.append(NullWritable.get(), value1);
/** * Gets type of compression for the output sequence file. * * @param conf The job configuration. * @return The compression type. */ public static CompressionType getOutputCompressionType(Configuration conf) { String typeName = conf.get(FileOutputFormat.COMPRESS_TYPE); if (typeName != null) { return CompressionType.valueOf(typeName); } return SequenceFile.getDefaultCompressionType(conf); } }
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close(); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
private void writePrevious(Collection<EtlKey> missedKeys, JobContext context) throws IOException { FileSystem fs = FileSystem.get(context.getConfiguration()); Path output = FileOutputFormat.getOutputPath(context); if (fs.exists(output)) { fs.mkdirs(output); } output = new Path(output, EtlMultiOutputFormat.OFFSET_PREFIX + "-previous"); SequenceFile.Writer writer = SequenceFile.createWriter(fs, context.getConfiguration(), output, EtlKey.class, NullWritable.class); for (EtlKey key : missedKeys) { writer.append(key, NullWritable.get()); } writer.close(); }
FileSystem fs = FileSystem.get(context.getConfiguration()); if (EtlMultiOutputFormat.isRunMoveData(context)) { Path workPath = super.getWorkPath(); getPartitionedPath(context, file, count.getEventCount(), count.getLastKey().getOffset()); Path dest = new Path(baseOutDir, partitionedFile); if (!fs.exists(dest.getParent())) { mkdirs(fs, dest.getParent()); Path tempPath = new Path(workPath, "counts." + context.getConfiguration().get("mapred.task.id")); OutputStream outputStream = new BufferedOutputStream(fs.create(tempPath)); ObjectMapper mapper = new ObjectMapper(); SequenceFile.Writer offsetWriter = SequenceFile.createWriter(fs, context.getConfiguration(), new Path(super.getWorkPath(), EtlMultiOutputFormat.getUniqueFile(context, EtlMultiOutputFormat.OFFSET_PREFIX, "")), log.info("Avg record size for " + offsets.get(s).getTopic() + ":" + offsets.get(s).getPartition() + " = " + offsets.get(s).getMessageSize()); offsetWriter.append(offsets.get(s), NullWritable.get()); offsetWriter.close(); super.commitTask(context);
private static void createControlFile( FileSystem fs, int fileSize, // in MB int nrFiles ) throws IOException { LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files"); fs.delete(CONTROL_DIR, true); for(int i=0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(CONTROL_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(fileSize)); } catch(Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: "+nrFiles+" files"); }
/** * Test that makes sure createWriter succeeds on a file that was * already created * @throws IOException */ public void testCreateWriterOnExistingFile() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); Path name = new Path(new Path(System.getProperty("test.build.data","."), "createWriterOnExistingFile") , "file"); fs.create(name); SequenceFile.createWriter(fs, conf, name, RandomDatum.class, RandomDatum.class, 512, (short) 1, 4096, false, CompressionType.NONE, null, new Metadata()); }
private static VectorWriter getSeqFileWriter(String outFile) throws IOException { Path path = new Path(outFile); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class); return new SequenceFileVectorWriter(seqWriter); }
if (fs.exists(outputFolder) == false) { fs.mkdirs(outputFolder); int compactionThreshold = Integer.parseInt(hbaseConf.get("hbase.hstore.compactionThreshold", "3")); logger.info("hbase.hstore.compactionThreshold is {0}", String.valueOf(compactionThreshold)); if (hfileSizeMB > 0.0f && hfileSizeMB * compactionThreshold < mbPerRegion) { final Path hfilePartitionFile = new Path(outputFolder, "part-r-00000_hfile"); short regionCount = (short) innerRegionSplits.size(); try (SequenceFile.Writer hfilePartitionWriter = SequenceFile.createWriter(hbaseConf, SequenceFile.Writer.file(hfilePartitionFile), SequenceFile.Writer.keyClass(RowKeyWritable.class), SequenceFile.Writer.valueClass(NullWritable.class))) { hfilePartitionWriter.append( new RowKeyWritable(KeyValueUtil.createFirstOnRow(splits.get(i), 9223372036854775807L).createKeyOnly(false).getKey()), NullWritable.get());
public static void copyTo64MB(String src, String dst) throws IOException { Configuration hconf = new Configuration(); Path srcPath = new Path(src); Path dstPath = new Path(dst); FileSystem fs = FileSystem.get(hconf); long srcSize = fs.getFileStatus(srcPath).getLen(); int copyTimes = (int) (67108864 / srcSize); // 64 MB System.out.println("Copy " + copyTimes + " times"); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf))); int count = 0; while (reader.next(key, value)) { for (int i = 0; i < copyTimes; i++) { writer.append(key, value); count++; } } System.out.println("Len: " + writer.getLength()); System.out.println("Rows: " + count); reader.close(); writer.close(); }
@Override public void write(String line) throws IOException { if (writer == null) { tmpFile = File.createTempFile("seq-", ".dat"); writer = SequenceFile.createWriter(new Configuration(), Writer.file(new Path(tmpFile.toURI())), Writer.keyClass(NullWritable.class), Writer.valueClass(Text.class)); } text.set(line); writer.append(NullWritable.get(), text); }
/** * Reduce task done, write output to a file. */ @Override public void cleanup(Context context) throws IOException { //write output to a file Configuration conf = context.getConfiguration(); Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR)); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE); writer.append(new LongWritable(numInside), new LongWritable(numOutside)); writer.close(); } }
public void run() { try { for(int i=start; i < end; i++) { String name = getFileName(i); Path controlFile = new Path(INPUT_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, fs.getConf(), controlFile, Text.class, LongWritable.class, CompressionType.NONE); String logFile = jhLogFiles[i].getPath().toString(); writer.append(new Text(logFile), new LongWritable(0)); } catch(Exception e) { throw new IOException(e); } finally { if (writer != null) writer.close(); writer = null; } } } catch(IOException ex) { LOG.error("FileCreateDaemon failed.", ex); } numFinishedThreads++; }
private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException { Configuration conf = new Configuration(); try { if (fs.exists(file)) { fs.delete(file, false); } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class); for (int i = 0; i < rowCount; i++) { w.append(new IntWritable(i), new Text("line " + i)); } w.close(); System.out.println("done"); } catch (IOException e) { e.printStackTrace(); } }
public void put(String storeName, String tableName, T state) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); writer.append(new Text(Strings.nullToEmpty(state.getId())), state); } catch (Throwable t) { throw closer.rethrow(t);
public SequenceFileWriter(LogFilePath path, CompressionCodec codec) throws IOException { Configuration config = new Configuration(); fsPath = new Path(path.getLogFilePath()); FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath()); if (codec != null) { this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec); } else { this.mWriter = SequenceFile.createWriter(fs, config, fsPath, LongWritable.class, BytesWritable.class); } this.mKey = new LongWritable(); this.mValue = new BytesWritable(); LOG.info("Created sequence file writer: {}", fsPath); }
@Test @SuppressWarnings("deprecation") public void testSerializeToSequenceFile() throws IOException { Closer closer = Closer.create(); Configuration conf = new Configuration(); WritableShimSerialization.addToHadoopConfiguration(conf); try { SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class)); Text key = new Text(); WorkUnitState workUnitState = new WorkUnitState(); TestWatermark watermark = new TestWatermark(); watermark.setLongWatermark(10L); workUnitState.setActualHighWatermark(watermark); writer1.append(key, workUnitState); SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class)); watermark.setLongWatermark(100L); workUnitState.setActualHighWatermark(watermark); writer2.append(key, workUnitState); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
private static <T extends WritableComparable<?>> Path writePartitionFile( String testname, Configuration conf, T[] splits) throws IOException { final FileSystem fs = FileSystem.getLocal(conf); final Path testdir = new Path(System.getProperty("test.build.data", "/tmp") ).makeQualified(fs); Path p = new Path(testdir, testname + "/_partition.lst"); TotalOrderPartitioner.setPartitionFile(conf, p); conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1); SequenceFile.Writer w = null; try { w = SequenceFile.createWriter(fs, conf, p, splits[0].getClass(), NullWritable.class, SequenceFile.CompressionType.NONE); for (int i = 0; i < splits.length; ++i) { w.append(splits[i], NullWritable.get()); } } finally { if (null != w) w.close(); } return p; }
RawComparator<K> comparator = (RawComparator<K>) job.getGroupingComparator(); Arrays.sort(samples, comparator); Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf)); FileSystem fs = dst.getFileSystem(conf); if (fs.exists(dst)) fs.delete(dst, false); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, dst, job.getMapOutputKeyClass(), NullWritable.class); NullWritable nullValue = NullWritable.get(); float stepSize = samples.length / (float) numPartitions; writer.append(currentKey, nullValue); lastKey = currentKey; lastKeyIndex = currentKeyOffset; splits.add(currentKey); writer.close(); LOG.info("********************************************* "); LOG.info(" START KEYs for new Regions: ");
+ "must be set"); this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval); FileSystem fs = dirName.getFileSystem(conf); if (!fs.mkdirs(dirName)) { throw new IOException("Mkdirs failed to create directory " + dirName); Path dataFile = new Path(dirName, DATA_FILE_NAME); Path indexFile = new Path(dirName, INDEX_FILE_NAME); SequenceFile.Writer.file(dataFile), SequenceFile.Writer.keyClass(keyClass)); this.data = SequenceFile.createWriter(conf, dataOptions); Options.prependOptions(opts, SequenceFile.Writer.file(indexFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(LongWritable.class), SequenceFile.Writer.compression(CompressionType.BLOCK)); this.index = SequenceFile.createWriter(conf, indexOptions);