@Override public void close() throws IOException { cleanup(); }
@Override public void collect(Writable key, Vector vw) throws IOException { map(vw); }
private void flushQBlocks() throws IOException { if (blockCnt == 1) { /* * only one block, no temp file, no second pass. should be the default * mode for efficiency in most cases. Sure mapper should be able to load * the entire split in memory -- and we don't require even that. */ value.setBlock(qSolver.getThinQtTilde()); outputQHat(value); outputR(new VectorWritable(new DenseVector(qSolver.getRTilde().getData(), true))); } else { secondPass(); } }
protected void cleanup() throws IOException { try { if (qSolver == null && yLookahead.isEmpty()) { return; } if (qSolver == null) { qSolver = new GivensThinSolver(yLookahead.size(), kp); } // grow q solver up if necessary qSolver.adjust(qSolver.getCnt() + yLookahead.size()); while (!yLookahead.isEmpty()) { qSolver.appendRow(yLookahead.remove(0)); } assert qSolver.isFull(); if (++blockCnt > 1) { flushSolver(); assert tempQw != null; closeables.remove(tempQw); Closeables.close(tempQw, false); } flushQBlocks(); } finally { IOUtils.close(closeables); } }
private void secondPass() throws IOException { qSolver = null; // release mem FileSystem localFs = FileSystem.getLocal(jobConf); SequenceFile.Reader tempQr = new SequenceFile.Reader(localFs, tempQPath, jobConf); closeables.addFirst(tempQr); int qCnt = 0; while (tempQr.next(tempKey, value)) { value .setBlock(GivensThinSolver.computeQtHat(value.getBlock(), qCnt, new CopyConstructorIterator<UpperTriangular>(rSubseq .iterator()))); if (qCnt == 1) { /* * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent * computeQHat iterators */ GivensThinSolver.mergeR(rSubseq.get(0), rSubseq.remove(1)); } else { qCnt++; } outputQHat(value); } assert rSubseq.size() == 1; outputR(new VectorWritable(new DenseVector(rSubseq.get(0).getData(), true))); }
@Override protected void map(Writable key, VectorWritable value, Context context) throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); if (sb != null) { yRow.assign(sb, Functions.MINUS); } qr.collect(key, yRow); }
qr = new QRFirstStep(conf, qhatCollector, rhatCollector);
private void flushSolver() throws IOException { UpperTriangular r = qSolver.getRTilde(); double[][] qt = qSolver.getThinQtTilde(); rSubseq.add(r); value.setBlock(qt); getTempQw().append(tempKey, value); /* * this probably should be a sparse row matrix, but compressor should get it * for disk and in memory we want it dense anyway, sparse random * implementations would be a mostly a memory management disaster consisting * of rehashes and GC // thrashing. (IMHO) */ value.setBlock(null); qSolver.reset(); }
protected void map(Vector incomingYRow) throws IOException { double[] yRow; if (yLookahead.size() == kp) { if (qSolver.isFull()) { flushSolver(); blockCnt++; } yRow = yLookahead.remove(0); qSolver.appendRow(yRow); } else { yRow = new double[kp]; } if (incomingYRow.isDense()) { for (int i = 0; i < kp; i++) { yRow[i] = incomingYRow.get(i); } } else { Arrays.fill(yRow, 0); for (Element yEl : incomingYRow.nonZeroes()) { yRow[yEl.index()] = yEl.get(); } } yLookahead.add(yRow); }
private void flushQBlocks() throws IOException { if (blockCnt == 1) { /* * only one block, no temp file, no second pass. should be the default * mode for efficiency in most cases. Sure mapper should be able to load * the entire split in memory -- and we don't require even that. */ value.setBlock(qSolver.getThinQtTilde()); outputQHat(value); outputR(new VectorWritable(new DenseVector(qSolver.getRTilde().getData(), true))); } else { secondPass(); } }
protected void cleanup() throws IOException { try { if (qSolver == null && yLookahead.isEmpty()) { return; } if (qSolver == null) { qSolver = new GivensThinSolver(yLookahead.size(), kp); } // grow q solver up if necessary qSolver.adjust(qSolver.getCnt() + yLookahead.size()); while (!yLookahead.isEmpty()) { qSolver.appendRow(yLookahead.remove(0)); } assert qSolver.isFull(); if (++blockCnt > 1) { flushSolver(); assert tempQw != null; closeables.remove(tempQw); Closeables.close(tempQw, false); } flushQBlocks(); } finally { IOUtils.close(closeables); } }
private void secondPass() throws IOException { qSolver = null; // release mem FileSystem localFs = FileSystem.getLocal(jobConf); SequenceFile.Reader tempQr = new SequenceFile.Reader(localFs, tempQPath, jobConf); closeables.addFirst(tempQr); int qCnt = 0; while (tempQr.next(tempKey, value)) { value .setBlock(GivensThinSolver.computeQtHat(value.getBlock(), qCnt, new CopyConstructorIterator<>(rSubseq.iterator()))); if (qCnt == 1) { /* * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent * computeQHat iterators */ GivensThinSolver.mergeR(rSubseq.get(0), rSubseq.remove(1)); } else { qCnt++; } outputQHat(value); } assert rSubseq.size() == 1; outputR(new VectorWritable(new DenseVector(rSubseq.get(0).getData(), true))); }
@Override protected void map(Writable key, VectorWritable value, Context context) throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); if (sb != null) { yRow.assign(sb, Functions.MINUS); } qr.collect(key, yRow); }
qr = new QRFirstStep(conf, qhatCollector, rhatCollector);
private void flushSolver() throws IOException { UpperTriangular r = qSolver.getRTilde(); double[][] qt = qSolver.getThinQtTilde(); rSubseq.add(r); value.setBlock(qt); getTempQw().append(tempKey, value); /* * this probably should be a sparse row matrix, but compressor should get it * for disk and in memory we want it dense anyway, sparse random * implementations would be a mostly a memory management disaster consisting * of rehashes and GC // thrashing. (IMHO) */ value.setBlock(null); qSolver.reset(); }
protected void map(Vector incomingYRow) throws IOException { double[] yRow; if (yLookahead.size() == kp) { if (qSolver.isFull()) { flushSolver(); blockCnt++; } yRow = yLookahead.remove(0); qSolver.appendRow(yRow); } else { yRow = new double[kp]; } if (incomingYRow.isDense()) { for (int i = 0; i < kp; i++) { yRow[i] = incomingYRow.get(i); } } else { Arrays.fill(yRow, 0); for (Element yEl : incomingYRow.nonZeroes()) { yRow[yEl.index()] = yEl.get(); } } yLookahead.add(yRow); }
private void flushQBlocks() throws IOException { if (blockCnt == 1) { /* * only one block, no temp file, no second pass. should be the default * mode for efficiency in most cases. Sure mapper should be able to load * the entire split in memory -- and we don't require even that. */ value.setBlock(qSolver.getThinQtTilde()); outputQHat(value); outputR(new VectorWritable(new DenseVector(qSolver.getRTilde().getData(), true))); } else { secondPass(); } }
protected void cleanup() throws IOException { try { if (qSolver == null && yLookahead.isEmpty()) { return; } if (qSolver == null) { qSolver = new GivensThinSolver(yLookahead.size(), kp); } // grow q solver up if necessary qSolver.adjust(qSolver.getCnt() + yLookahead.size()); while (!yLookahead.isEmpty()) { qSolver.appendRow(yLookahead.remove(0)); } assert qSolver.isFull(); if (++blockCnt > 1) { flushSolver(); assert tempQw != null; closeables.remove(tempQw); Closeables.close(tempQw, false); } flushQBlocks(); } finally { IOUtils.close(closeables); } }
private void secondPass() throws IOException { qSolver = null; // release mem FileSystem localFs = FileSystem.getLocal(jobConf); SequenceFile.Reader tempQr = new SequenceFile.Reader(localFs, tempQPath, jobConf); closeables.addFirst(tempQr); int qCnt = 0; while (tempQr.next(tempKey, value)) { value .setBlock(GivensThinSolver.computeQtHat(value.getBlock(), qCnt, new CopyConstructorIterator<UpperTriangular>(rSubseq .iterator()))); if (qCnt == 1) { /* * just merge r[0] <- r[1] so it doesn't have to repeat in subsequent * computeQHat iterators */ GivensThinSolver.mergeR(rSubseq.get(0), rSubseq.remove(1)); } else { qCnt++; } outputQHat(value); } assert rSubseq.size() == 1; outputR(new VectorWritable(new DenseVector(rSubseq.get(0).getData(), true))); }
@Override protected void map(Writable key, VectorWritable value, Context context) throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); if (sb != null) { yRow.assign(sb, Functions.MINUS); } qr.collect(key, yRow); }