void writePartitionFile(Configuration conf, Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); @SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter( fs, conf, path, ImmutableBytesWritable.class, NullWritable.class); for (int i = 0; i < partitions.size(); i++) { writer.append(partitions.get(i), NullWritable.get()); } writer.close(); }
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close(); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
/** * Produce initial cluster centroids and write the centroids directly to file system. * This interface is called after produceSamples is called. * * @param numClusters The number centroids to be generated * @param centroidsPath the path on FileSystem where centroids are to be written to * @return the actual number of centroids produced */ public int produceInitialCentroids(int numClusters, Path centroidsPath) throws Exception { List<Vector> iCentroids = new ArrayList<Vector>(numClusters); produceInitialCentroids(numClusters, iCentroids); SequenceFile.Writer writer = new SequenceFile.Writer(getFileSystem(), getJobConf(), centroidsPath, Text.class, Kluster.class); for (int i = 0; i < iCentroids.size(); i++) { Vector vec = iCentroids.get(i); Kluster cluster = new Kluster(vec, i, dm); // add the center so the centroid will be correct upon output cluster.observe(cluster.getCenter(), 1); writer.append(new Text(cluster.getIdentifier()), cluster); } writer.close(); return iCentroids.size(); }
public static void writeSequenceFile(SequenceFile.Writer writer, int numRecords) throws IOException { final IntWritable key = new IntWritable(); final Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.set(numWritten); randomText(val, numWritten, RECORDSIZE); writer.append(key, val); } writer.close(); }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } }
public static void writeSequenceFile(SequenceFile.Writer writer, int numRecords) throws IOException { final IntWritable key = new IntWritable(); final Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.set(numWritten); randomText(val, numWritten, RECORDSIZE); writer.append(key, val); } writer.close(); }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } }
public static void writeSequenceFile(SequenceFile.Writer writer, int numRecords) throws IOException { final IntWritable key = new IntWritable(); final Text val = new Text(); for (int numWritten = 0; numWritten < numRecords; ++numWritten) { key.set(numWritten); randomText(val, numWritten, RECORDSIZE); writer.append(key, val); } writer.close(); }
public void write(String key, String value) throws IOException { if (currentChunkSize > maxChunkSizeInBytes) { writer.close(); writer = new SequenceFile.Writer(fs, conf, getPath(currentChunkID++), Text.class, Text.class); currentChunkSize = 0; } Text keyT = new Text(key); Text valueT = new Text(value); currentChunkSize += keyT.getBytes().length + valueT.getBytes().length; // Overhead writer.append(keyT, valueT); }
private static Path saveVector(Configuration conf, Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); try { writer.append(new IntWritable(0), new VectorWritable(v)); } finally { writer.close(); } return path; }
private void writeSequenceFileFromPCollection(final FileSystem fs, final Path path, final PCollection collection) throws IOException { final PType pType = collection.getPType(); final Converter converter = pType.getConverter(); final Class valueClass = converter.getValueClass(); final SequenceFile.Writer writer = new SequenceFile.Writer(fs, fs.getConf(), path, NullWritable.class, valueClass); for (final Object o : collection.materialize()) { final Object value = pType.getOutputMapFn().map(o); writer.append(NullWritable.get(), value); } writer.close(); }
@Override public void doCleanup(Context context) throws IOException, InterruptedException { mos.close(); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR), PathNameCuboidBase); FileSystem fs = FileSystem.get(context.getConfiguration()); if (!fs.exists(outputDirBase)) { fs.mkdirs(outputDirBase); SequenceFile .createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(outputDirBase, "part-m-00000")), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(Text.class)) .close(); } }
public long writeSeeds(Path sampleSeedPath) throws Exception { int fileNo = 0; long numTotal = this.numSamples; //total number of samples int centriodNum = genParams.length; // number of initial centroids long numPerCluster = (long) Math.ceil(numTotal / (double) centriodNum); long numFiles = (long) Math.ceil(numPerCluster / (double) SAMPLES_PER_FILE); //num of files per cluster for (int k = 0; k < genParams.length; k++) { if (genParams[k].length != dimension) throw new Exception("The dimension of mean vector or std vector does not match desired dimension!"); StringBuilder sb = new StringBuilder(); for (int d = 0; d < dimension; d++) { if (genParams[k][d].length != 2) throw new Exception("The dimension of mean vector or std vector does not match desired dimension"); sb.append("\t" + Double.toString(genParams[k][d][0]) + "\t" + Double.toString(genParams[k][d][1])); } for (long i = 0; i < numFiles; i++) { SequenceFile.Writer out = createNewFile(new Path(sampleSeedPath, "seed" + (fileNo++)), IntWritable.class, Text.class); out.append(new IntWritable(k), new Text(Long.toString(SAMPLES_PER_FILE) + sb.toString())); out.close(); } if (numFiles * SAMPLES_PER_FILE < numPerCluster) { long left = numPerCluster - numFiles * SAMPLES_PER_FILE; SequenceFile.Writer out = createNewFile(new Path(sampleSeedPath, "seed" + (fileNo++)), IntWritable.class, Text.class); out.append(new IntWritable(k), new Text(Long.toString(left) + sb.toString())); out.close(); } } return numPerCluster * centriodNum; }
private static void createSeqFile(FileSystem fs, Path file, int rowCount) throws IOException { Configuration conf = new Configuration(); try { if (fs.exists(file)) { fs.delete(file, false); } SequenceFile.Writer w = SequenceFile.createWriter(fs, conf, file, IntWritable.class, Text.class); for (int i = 0; i < rowCount; i++) { w.append(new IntWritable(i), new Text("line " + i)); } w.close(); System.out.println("done"); } catch (IOException e) { e.printStackTrace(); } }
Configuration conf) throws Exception { String dr = (dryrun ? "[DRY RUN ] " : ""); Path data = new Path(dir, DATA_FILE_NAME); Path index = new Path(dir, INDEX_FILE_NAME); int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128); if (!fs.exists(data)) { if (!dryrun) { indexWriter = SequenceFile.createWriter(conf, SequenceFile.Writer.file(index), SequenceFile.Writer.keyClass(keyClass), position.set(pos); if (!dryrun) { indexWriter.append(key, position); if (!dryrun) indexWriter.close(); return cnt;
SequenceFile.Writer out; int fileNo = 0; out = createNewFile(new Path(samplePath, "file" + (fileNo++))); double[] vec = new double[this.dimension]; while ((s = in.readLine()) != null) { p.assign(vec); if (samplesInCurrFile >= SAMPLES_PER_FILE) { out.close(); out = createNewFile(new Path(samplePath, "file" + (fileNo++))); samplesInCurrFile = 0; out.append(new LongWritable(samplesInCurrFile++), new VectorWritable(p)); sampleNum++; out.close(); in.close(); LOG.info("Parsed " + String.valueOf(sampleNum) + " samples totally.");