public static Vector readVector(DataInput in, byte vectorFlags, int size) throws IOException { VectorWritable v = new VectorWritable(); v.readFields(in, vectorFlags, size); return v.get(); }
matrix = new DenseMatrix(rows, columns); for (int row = 0; row < rows; row++) { matrix.assignRow(row, VectorWritable.readVector(in, vectorFlags, columns)); rowVectors[row] = VectorWritable.readVector(in, vectorFlags, columns); while (rowsRead++ < numNonZeroRows) { int rowIndex = in.readInt(); matrix.assignRow(rowIndex, VectorWritable.readVector(in, vectorFlags, columns));
@Override public void write(DataOutput out) throws IOException { writeVector(out, this.vector, this.writesLaxPrecision); }
public static VectorWritable merge(Iterator<VectorWritable> vectors) { return new VectorWritable(mergeToVector(vectors)); }
private static void doTestVectorWritableEquals(Vector v) throws IOException { Writable vectorWritable = new VectorWritable(v); VectorWritable vectorWritable2 = new VectorWritable(); writeAndRead(vectorWritable, vectorWritable2); Vector v2 = vectorWritable2.get(); if (v instanceof NamedVector) { assertTrue(v2 instanceof NamedVector); NamedVector nv = (NamedVector) v; NamedVector nv2 = (NamedVector) v2; assertEquals(nv.getName(), nv2.getName()); assertEquals("Victor", nv.getName()); } assertEquals(v, v2); }
private static void writeInitialState(Path output, Path clustersIn) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); for (FileStatus part : fs.listStatus(clustersIn, PathFilters.logsCRCFilter())) { Path inPart = part.getPath(); Path path = new Path(output, inPart.getName()); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class); for (Cluster value : new SequenceFileValueIterable<Cluster>(inPart, true, conf)) { log.debug("C-{}: {}", value.getId(), AbstractCluster.formatVector(value.getCenter(), null)); writer.append(new IntWritable(value.getId()), new VectorWritable(value.getCenter())); } writer.close(); } }
public static void write(Path outputDir, Configuration conf, Iterable<MatrixSlice> matrix) throws IOException { FileSystem fs = outputDir.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputDir, IntWritable.class, VectorWritable.class); IntWritable topic = new IntWritable(); VectorWritable vector = new VectorWritable(); for (MatrixSlice slice : matrix) { topic.set(slice.index()); vector.set(slice.vector()); writer.append(topic, vector); } writer.close(); }
public static Vector read(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(path.toUri(), conf); FSDataInputStream in = fs.open(path); try { return VectorWritable.readVector(in); } finally { Closeables.close(in, true); } } }
public static void write(Vector vector, Path path, Configuration conf, boolean laxPrecision) throws IOException { FileSystem fs = FileSystem.get(path.toUri(), conf); FSDataOutputStream out = fs.create(path); try { VectorWritable vectorWritable = new VectorWritable(vector); vectorWritable.setWritesLaxPrecision(laxPrecision); vectorWritable.write(out); } finally { Closeables.close(out, false); } }
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; try { writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } finally { Closeables.close(writer, false); } }
protected void persistVector(Path p, int key, Vector vector) throws IOException { SequenceFile.Writer writer = null; try { if (fs.exists(p)) { log.warn("{} exists, will overwrite", p); fs.delete(p, true); } writer = new SequenceFile.Writer(fs, conf, p, IntWritable.class, VectorWritable.class); writer.append(new IntWritable(key), new VectorWritable(vector)); } finally { Closeables.close(writer, false); } }
public void serialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); FSDataOutputStream out = fs.create(new Path(output, "naiveBayesModel.bin")); try { out.writeFloat(alphaI); VectorWritable.writeVector(out, weightsPerFeature); VectorWritable.writeVector(out, weightsPerLabel); VectorWritable.writeVector(out, perlabelThetaNormalizer); for (int row = 0; row < weightsPerLabelAndFeature.numRows(); row++) { VectorWritable.writeVector(out, weightsPerLabelAndFeature.viewRow(row)); } } finally { Closeables.close(out, false); } }
public static void writeVectorsToSequenceFile(Iterable<? extends Vector> datapoints, Path path, Configuration conf) throws IOException { try (SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(conf), conf, path, IntWritable.class, VectorWritable.class)){ int i = 0; for (Vector vector : datapoints) { writer.append(new IntWritable(i++), new VectorWritable(vector)); } } } }
private void saveOutputVector(Path path, Vector v) throws IOException { FileSystem fs = path.getFileSystem(conf); try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class)) { writer.append(new IntWritable(0), new VectorWritable(v)); } }
SequenceFile.Writer out; int fileNo = 0; out = createNewFile(new Path(samplePath, "file" + (fileNo++))); double[] vec = new double[this.dimension]; while ((s = in.readLine()) != null) { Vector p = new RandomAccessSparseVector(dimension); p.assign(vec); if (samplesInCurrFile >= SAMPLES_PER_FILE) { out.close(); out = createNewFile(new Path(samplePath, "file" + (fileNo++))); samplesInCurrFile = 0; out.append(new LongWritable(samplesInCurrFile++), new VectorWritable(p)); sampleNum++; out.close(); in.close(); LOG.info("Parsed " + String.valueOf(sampleNum) + " samples totally.");
private Vector loadInputVector(Path path) throws IOException { FileSystem fs = path.getFileSystem(conf); try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) { VectorWritable value = new VectorWritable(); if (!reader.next(new IntWritable(), value)) { throw new IOException("Input vector file is empty."); } return value.get(); } }
protected Vector fetchVector(Path p, int keyIndex) throws IOException { if (!fs.exists(p)) { return null; } SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf); IntWritable key = new IntWritable(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { if (key.get() == keyIndex) { return vw.get(); } } return null; }
@Override protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException { int row = r.get(); for (Vector.Element e : v.get().nonZeroes()) { RandomAccessSparseVector tmp = new RandomAccessSparseVector(newNumCols, 1); tmp.setQuick(row, e.get()); r.set(e.index()); ctx.write(r, new VectorWritable(tmp)); } } }
@Override public void map(IntWritable key, WeightedVectorWritable val, Context context) throws IOException, InterruptedException { // by pivoting on the cluster mapping value, we can make sure that each unique cluster goes to it's own reducer, // since they are numbered from 0 to k-1, where k is the number of clusters outputVector.set(val.getVector()); context.write(new IntWritable(newClusterMappings.get(key.get())), outputVector); } }
public static void writeVector(Vector v, Path path, FileSystem fs, Configuration conf) throws IOException { FSDataOutputStream out = fs.create(path); VectorWritable.writeVector(out, v); out.close(); }