if (fs.exists(outputFolder) == false) { fs.mkdirs(outputFolder); int compactionThreshold = Integer.parseInt(hbaseConf.get("hbase.hstore.compactionThreshold", "3")); logger.info("hbase.hstore.compactionThreshold is {0}", String.valueOf(compactionThreshold)); if (hfileSizeMB > 0.0f && hfileSizeMB * compactionThreshold < mbPerRegion) { final Path hfilePartitionFile = new Path(outputFolder, "part-r-00000_hfile"); short regionCount = (short) innerRegionSplits.size(); try (SequenceFile.Writer hfilePartitionWriter = SequenceFile.createWriter(hbaseConf, SequenceFile.Writer.file(hfilePartitionFile), SequenceFile.Writer.keyClass(RowKeyWritable.class), SequenceFile.Writer.valueClass(NullWritable.class))) { hfilePartitionWriter.append( new RowKeyWritable(KeyValueUtil.createFirstOnRow(splits.get(i), 9223372036854775807L).createKeyOnly(false).getKey()), NullWritable.get());
@Override protected Path createInputFileListing(Job job) throws IOException { if (conf.get(NUMBER_OF_LEVELS_TO_PRESERVE_KEY) == null) { return super.createInputFileListing(job); FileSystem fs = srcFiles.get(0).getFileSystem(conf); for (Path path : srcFiles) { FileStatus fst = fs.getFileStatus(path); totalBytesExpected += fst.getLen(); Text key = getKey(path); writer.append(key, new CopyListingFileStatus(fst)); writer.close(); cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesExpected); cfg.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString()); cfg.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalRecords); } catch (NoSuchFieldException | SecurityException | IllegalArgumentException
@BeforeClass public static void testWriteSequenceFile() throws IOException { Configuration c = new Configuration(); URI uri = file().toURI(); try(SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(uri, c), c, new Path(uri.toString()), LongWritable.class, Text.class)) { final LongWritable key = new LongWritable(); final Text val = new Text(); for (int i = 0; i < COUNT; ++i) { key.set(i); val.set(Integer.toString(i)); writer.append(key, val); } } }
public static void writePolicy(ClusteringPolicy policy, Path path) throws IOException { Path policyPath = new Path(path, POLICY_FILE_NAME); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(policyPath.toUri(), config); SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, policyPath, Text.class, ClusteringPolicyWritable.class); writer.append(new Text(), new ClusteringPolicyWritable(policy)); Closeables.close(writer, false); } }
FileSystem fs = FileSystem.get(context.getConfiguration()); if (EtlMultiOutputFormat.isRunMoveData(context)) { Path workPath = super.getWorkPath(); getPartitionedPath(context, file, count.getEventCount(), count.getLastKey().getOffset()); Path dest = new Path(baseOutDir, partitionedFile); if (!fs.exists(dest.getParent())) { mkdirs(fs, dest.getParent()); Path tempPath = new Path(workPath, "counts." + context.getConfiguration().get("mapred.task.id")); OutputStream outputStream = new BufferedOutputStream(fs.create(tempPath)); ObjectMapper mapper = new ObjectMapper(); SequenceFile.Writer offsetWriter = SequenceFile.createWriter(fs, context.getConfiguration(), new Path(super.getWorkPath(), EtlMultiOutputFormat.getUniqueFile(context, EtlMultiOutputFormat.OFFSET_PREFIX, "")), log.info("Avg record size for " + offsets.get(s).getTopic() + ":" + offsets.get(s).getPartition() + " = " + offsets.get(s).getMessageSize()); offsetWriter.append(offsets.get(s), NullWritable.get()); offsetWriter.close(); super.commitTask(context);
private static void writeInitialState(Path output, Path clustersIn) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); for (FileStatus part : fs.listStatus(clustersIn, PathFilters.logsCRCFilter())) { Path inPart = part.getPath(); Path path = new Path(output, inPart.getName()); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); for (Cluster value : new SequenceFileValueIterable<Cluster>(inPart, true, conf)) { log.debug("C-{}: {}", value.getId(), AbstractCluster.formatVector(value.getCenter(), null)); writer.append(new IntWritable(value.getId()), new VectorWritable(value.getCenter())); } writer.close(); } }
final int N = Integer.parseInt(args[1]); // number of entries in the sequnece file, for example 20 Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); Text key = new Text(); IntWritable value = new IntWritable(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i = 1; i < N; i++) { int randomInt = randomNumberGenerator.nextInt(100); key.set("cat" + i); value.set(randomInt); System.out.printf("%s\t%s\n", key, value); writer.append(key, value);
public static void copyTo64MB(String src, String dst) throws IOException { Configuration hconf = new Configuration(); Path srcPath = new Path(src); Path dstPath = new Path(dst); FileSystem fs = FileSystem.get(hconf); long srcSize = fs.getFileStatus(srcPath).getLen(); int copyTimes = (int) (67108864 / srcSize); // 64 MB System.out.println("Copy " + copyTimes + " times"); Reader reader = new Reader(hconf, SequenceFile.Reader.file(srcPath)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), hconf); Text value = new Text(); Writer writer = SequenceFile.createWriter(hconf, Writer.file(dstPath), Writer.keyClass(key.getClass()), Writer.valueClass(Text.class), Writer.compression(CompressionType.BLOCK, getLZOCodec(hconf))); int count = 0; while (reader.next(key, value)) { for (int i = 0; i < copyTimes; i++) { writer.append(key, value); count++; } } System.out.println("Len: " + writer.getLength()); System.out.println("Rows: " + count); reader.close(); writer.close(); }
private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException { Configuration conf = new Configuration(); try { if (fs.exists(file)) { fs.delete(file, false); } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class); for (int i = 0; i < rowCount; i++) { w.append(new IntWritable(i), new Text("line " + i)); } w.close(); System.out.println("done"); } catch (IOException e) { e.printStackTrace(); } }
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class)) { Cluster cluster = models.get(i); cw.setValue(cluster); Writable key = new IntWritable(i); writer.append(key, cw); } } }
public void putAll(String storeName, String tableName, Collection<T> states) throws IOException { String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName; Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName); if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) { throw new IOException("Failed to create a state file for table " + tmpTableName); try { @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec())); for (T state : states) { writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
private static void createControlFile( FileSystem fs, int fileSize, // in MB int nrFiles ) throws IOException { LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files"); fs.delete(CONTROL_DIR, true); for(int i=0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(CONTROL_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(fileSize)); } catch(Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: "+nrFiles+" files"); }
@Test @SuppressWarnings("deprecation") public void testSerializeToSequenceFile() throws IOException { Closer closer = Closer.create(); Configuration conf = new Configuration(); WritableShimSerialization.addToHadoopConfiguration(conf); try { SequenceFile.Writer writer1 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq1"), Text.class, WorkUnitState.class)); Text key = new Text(); WorkUnitState workUnitState = new WorkUnitState(); TestWatermark watermark = new TestWatermark(); watermark.setLongWatermark(10L); workUnitState.setActualHighWatermark(watermark); writer1.append(key, workUnitState); SequenceFile.Writer writer2 = closer.register(SequenceFile.createWriter(this.fs, conf, new Path(this.outputPath, "seq2"), Text.class, WorkUnitState.class)); watermark.setLongWatermark(100L); workUnitState.setActualHighWatermark(watermark); writer2.append(key, workUnitState); } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } }
public static int writeLabelIndex(Configuration conf, Path indexPath, Iterable<Pair<Text,IntWritable>> labels) throws IOException { FileSystem fs = FileSystem.get(indexPath.toUri(), conf); Collection<String> seen = new HashSet<>(); int i = 0; try (SequenceFile.Writer writer = SequenceFile.createWriter(fs.getConf(), SequenceFile.Writer.file(indexPath), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(IntWritable.class))){ for (Object label : labels) { String theLabel = SLASH.split(((Pair<?, ?>) label).getFirst().toString())[1]; if (!seen.contains(theLabel)) { writer.append(new Text(theLabel), new IntWritable(i++)); seen.add(theLabel); } } } return i; }
+ "must be set"); this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval); FileSystem fs = dirName.getFileSystem(conf); if (!fs.mkdirs(dirName)) { throw new IOException("Mkdirs failed to create directory " + dirName); Path dataFile = new Path(dirName, DATA_FILE_NAME); Path indexFile = new Path(dirName, INDEX_FILE_NAME); SequenceFile.Writer.file(dataFile), SequenceFile.Writer.keyClass(keyClass)); this.data = SequenceFile.createWriter(conf, dataOptions); Options.prependOptions(opts, SequenceFile.Writer.file(indexFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(LongWritable.class), SequenceFile.Writer.compression(CompressionType.BLOCK)); this.index = SequenceFile.createWriter(conf, indexOptions);
new Path(tmpDir, "intermediate").suffix("." + passNo); tmpFilename.toString(), approxOutputSize, conf); if(LOG.isDebugEnabled()) { fs.makeQualified(segmentsToMerge.get(0).segmentPathName), fs.makeQualified(outputFile), null); writer.close(); fs.getFileStatus(outputFile).getLen(), outputFile);
private static SequenceFile.Writer.Option getOutputPath(final Configuration conf, final RegionInfo info, final ExportProtos.ExportRequest request) throws IOException { Path file = new Path(request.getOutputPath(), "export-" + info.getEncodedName()); FileSystem fs = file.getFileSystem(conf); if (fs.exists(file)) { throw new IOException(file + " exists"); } return SequenceFile.Writer.file(file); }
public static void write(Path outputDir, Configuration conf, VectorIterable matrix) throws IOException { FileSystem fs = outputDir.getFileSystem(conf); fs.delete(outputDir, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputDir, IntWritable.class, VectorWritable.class); IntWritable topic = new IntWritable(); VectorWritable vector = new VectorWritable(); for (MatrixSlice slice : matrix) { topic.set(slice.index()); vector.set(slice.vector()); writer.append(topic, vector); } writer.close(); }
/** Write the list of labels into a map file */ public static int writeLabelIndex(Configuration conf, Iterable<String> labels, Path indexPath) throws IOException { FileSystem fs = FileSystem.get(indexPath.toUri(), conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, indexPath, Text.class, IntWritable.class); int i = 0; try { for (String label : labels) { writer.append(new Text(label), new IntWritable(i++)); } } finally { Closeables.close(writer, false); } return i; }
@SuppressWarnings("deprecation") public static void writeToSequenceFile(Configuration conf, String outputPath, Map<String, String> counterMap) throws IOException { try (SequenceFile.Writer writer = SequenceFile.createWriter(getWorkingFileSystem(conf), conf, new Path(outputPath), Text.class, Text.class)) { for (Map.Entry<String, String> counterEntry : counterMap.entrySet()) { writer.append(new Text(counterEntry.getKey()), new Text(counterEntry.getValue())); } } }