@Override public Tuple next() throws IOException { if (currentScanner != null) tuple = currentScanner.next(); if (tuple != null) { return tuple; } else { if (currentScanner != null) { currentScanner.close(); TableStats scannerTableStsts = currentScanner.getInputStats(); if (scannerTableStsts != null) { tableStats.setReadBytes(tableStats.getReadBytes() + scannerTableStsts.getReadBytes()); tableStats.setNumRows(tableStats.getNumRows() + scannerTableStsts.getNumRows()); } } currentScanner = getNextScanner(); if (currentScanner != null) { tuple = currentScanner.next(); } } return tuple; }
/** * Is the given filename splitable? Usually, true, but if the file is * stream compressed, it will not be. * <p/> * <code>FileInputFormat</code> implementations can override this and return * <code>false</code> to ensure that individual input files are never split-up * so that Mappers process entire files. * * * @param path the file name to check * @param status get the file length * @return is this file isSplittable? */ protected boolean isSplittable(TableMeta meta, Schema schema, Path path, FileStatus status) throws IOException { Scanner scanner = getFileScanner(meta, schema, path, status); boolean split = scanner.isSplittable(); scanner.close(); return split; }
@Override public Chunk call() throws Exception { final Path outputPath = getChunkPathForWrite(level + 1, nextRunId); info(LOG, mergeFanout + " files are being merged to an output file " + outputPath.getName()); long mergeStartTime = System.currentTimeMillis(); final Scanner merger = createKWayMerger(inputFiles, startIdx, mergeFanout); merger.init(); final DirectRawFileWriter output = new DirectRawFileWriter(context.getConf(), null, inSchema, intermediateMeta, outputPath); output.init(); Tuple mergeTuple; while((mergeTuple = merger.next()) != null) { output.addTuple(mergeTuple); } merger.close(); output.close(); long mergeEndTime = System.currentTimeMillis(); info(LOG, outputPath.getName() + " is written to a disk. (" + FileUtil.humanReadableByteCount(output.getOffset(), false) + " bytes, " + (mergeEndTime - mergeStartTime) + " msec)"); File f = new File(localFS.makeQualified(outputPath).toUri()); FileFragment frag = new FileFragment(INTERMEDIATE_FILE_PREFIX + outputPath.getName(), outputPath, 0, f.length()); return new Chunk(inSchema, frag, intermediateMeta); } }
scanner.init(); while ((scanner.next()) != null) { tupleCnt++; scanner.close(); assertEquals(tupleNum, tupleCnt); assertNotSame(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue());
.build(); Scanner scanner = sm.getScanner(meta, schema, fragment, target); scanner.init(); int tupleCnt = 0; Tuple tuple; while ((tuple = scanner.next()) != null) { verifyProjectedFields(scanner.isProjectable(), tuple, tupleCnt); tupleCnt++; scanner.close();
assertTrue(scanner.isSplittable()); scanner.init(); int tupleCnt = 0; while (scanner.next() != null) { tupleCnt++; scanner.close(); assertTrue(scanner.isSplittable()); scanner.init(); while (scanner.next() != null) { tupleCnt++; scanner.close();
Scanner scanner = sm.getScanner(meta, schema, fragment, null); assertEquals(0.0f, scanner.getProgress(), 0.0f); scanner.init(); assertNotNull(scanner.next()); assertNull(null, scanner.next()); scanner.close(); assertEquals(1.0f, scanner.getProgress(), 0.0f);
try { this.result = externalMergeAndSort(mergedInputFragments); this.inputBytes = result.getInputStats().getNumBytes(); } catch (Exception e) { throw new PhysicalPlanningException(e); result.init(); return result.next();
@Override public void close() throws IOException { if(currentScanner != null) { currentScanner.close(); currentScanner = null; } iterator = null; progress = 1.0f; }
private void initScanner(Schema projected) throws IOException { TableDesc table = plan.getTableDesc(); TableMeta meta = table.getMeta(); if (fragments.length > 1) { this.scanner = new MergeScanner( context.getConf(), plan.getPhysicalSchema(), meta, FragmentConvertor.convert(context.getConf(), fragments), projected ); } else { Tablespace tablespace = TablespaceManager.get(table.getUri()); this.scanner = tablespace.getScanner( meta, plan.getPhysicalSchema(), FragmentConvertor.convert(context.getConf(), fragments[0]), projected); } if (scanner.isSelectable()) { // TODO - isSelectable should be moved to FormatProperty scanner.setFilter(qual); } if (plan.hasLimit()) { scanner.setLimit(plan.getLimit()); } scanner.init(); }
@Override public TableStats getInputStats() { if (scanner != null) { return scanner.getInputStats(); } else { if (inputStats != null) { return inputStats; } else { // If no fragment, there is no scanner. So, we need to create a dummy table stat. return new TableStats(); } } }
@Override public boolean hasNext() throws IOException { return (currentTuple = scanner.next()) != null; }
public MergeScanner(Configuration conf, Schema schema, TableMeta meta, List<Fragment> rawFragmentList, Schema target) throws IOException { this.schema = schema; this.meta = meta; this.target = target; this.fragments = new ArrayList<Fragment>(); long numBytes = 0; for (Fragment eachFileFragment: rawFragmentList) { long fragmentLength = TablespaceManager.guessFragmentVolume((TajoConf) conf, eachFileFragment); if (fragmentLength > 0) { numBytes += fragmentLength; fragments.add(eachFileFragment); } } // it should keep the input order. Otherwise, it causes wrong result of sort queries. this.reset(); if (currentScanner != null) { this.projectable = currentScanner.isProjectable(); this.selectable = currentScanner.isSelectable(); } tableStats = new TableStats(); tableStats.setNumBytes(numBytes); tableStats.setNumBlocks(fragments.size()); for(Column eachColumn: schema.getRootColumns()) { ColumnStats columnStats = new ColumnStats(eachColumn); tableStats.addColumnStat(columnStats); } }
@Override public Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment, Schema target) throws IOException { if (fragment instanceof FileFragment) { FileFragment fileFragment = (FileFragment)fragment; if (fileFragment.getEndKey() == 0) { Scanner scanner = new NullScanner(conf, schema, meta, fileFragment); scanner.setTarget(target.toArray()); return scanner; } } Scanner scanner; Class<? extends Scanner> scannerClass = getScannerClass(meta.getStoreType()); scanner = newScannerInstance(scannerClass, conf, schema, meta, fragment); if (scanner.isProjectable()) { scanner.setTarget(target.toArray()); } return scanner; } }
/** * Returns Scanner instance. * * @param meta The table meta * @param schema The input schema * @param fragment The fragment for scanning * @param target The output schema * @return Scanner instance * @throws java.io.IOException */ public Scanner getScanner(TableMeta meta, Schema schema, Fragment fragment, @Nullable Schema target) throws IOException { if (target == null) { target = schema; } if (fragment.isEmpty()) { Scanner scanner = new NullScanner(conf, schema, meta, fragment); scanner.setTarget(target.toArray()); return scanner; } Scanner scanner; Class<? extends Scanner> scannerClass = getScannerClass(meta.getDataFormat()); scanner = OldStorageManager.newScannerInstance(scannerClass, conf, schema, meta, fragment); scanner.setTarget(target.toArray()); return scanner; }
@Override public void init() throws IOException { // Why we should check nullity? See https://issues.apache.org/jira/browse/TAJO-1422 if (fragments == null) { scanIt = new EmptyScanIterator(); } else { Schema projectedFields = getProjectSchema( plan.getInSchema(), plan.getOutSchema(), Optional.ofNullable(plan.getTargets()), Optional.ofNullable(plan.getQual()) ); initScanner(projectedFields); // See Scanner.isProjectable() method. Depending on the result of isProjectable(), // the width of retrieved tuple is changed. // // If projectable, the retrieved tuple will contain only projected fields. // Otherwise, the retrieved tuple will contain projected fields and NullDatum // for non-projected fields. Schema actualInSchema = scanner.isProjectable() ? projectedFields : inSchema; initializeProjector(actualInSchema); if (plan.hasQual()) { qual.bind(context.getEvalContext(), actualInSchema); } initScanIterator(); } super.init(); }
@Override public float getProgress() { if (leftScan == null) { return mergerProgress; } return leftScan.getProgress() * 0.5f + rightScan.getProgress() * 0.5f; }
private void initScanIterator() { // We should use FilterScanIterator only if underlying storage does not support filter push down. if (plan.hasQual() && !scanner.isSelectable()) { scanIt = new FilterScanIterator(scanner, qual); } else { scanIt = new FullScanIterator(scanner); } }