private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = StorageManagerFactory.getStorageManager((TajoConf)conf).getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
@Override public Chunk call() throws Exception { final Path outputPath = getChunkPathForWrite(level + 1, nextRunId); info(LOG, mergeFanout + " files are being merged to an output file " + outputPath.getName()); long mergeStartTime = System.currentTimeMillis(); final Scanner merger = createKWayMerger(inputFiles, startIdx, mergeFanout); merger.init(); final DirectRawFileWriter output = new DirectRawFileWriter(context.getConf(), null, inSchema, intermediateMeta, outputPath); output.init(); Tuple mergeTuple; while((mergeTuple = merger.next()) != null) { output.addTuple(mergeTuple); } merger.close(); output.close(); long mergeEndTime = System.currentTimeMillis(); info(LOG, outputPath.getName() + " is written to a disk. (" + FileUtil.humanReadableByteCount(output.getOffset(), false) + " bytes, " + (mergeEndTime - mergeStartTime) + " msec)"); File f = new File(localFS.makeQualified(outputPath).toUri()); FileFragment frag = new FileFragment(INTERMEDIATE_FILE_PREFIX + outputPath.getName(), outputPath, 0, f.length()); return new Chunk(inSchema, frag, intermediateMeta); } }
@Override public Chunk call() throws Exception { final Path outputPath = getChunkPathForWrite(level + 1, nextRunId); info(LOG, mergeFanout + " files are being merged to an output file " + outputPath.getName()); long mergeStartTime = System.currentTimeMillis(); final Scanner merger = createKWayMerger(inputFiles, startIdx, mergeFanout); merger.init(); final DirectRawFileWriter output = new DirectRawFileWriter(context.getConf(), null, inSchema, intermediateMeta, outputPath); output.init(); Tuple mergeTuple; while((mergeTuple = merger.next()) != null) { output.addTuple(mergeTuple); } merger.close(); output.close(); long mergeEndTime = System.currentTimeMillis(); info(LOG, outputPath.getName() + " is written to a disk. (" + FileUtil.humanReadableByteCount(output.getOffset(), false) + " bytes, " + (mergeEndTime - mergeStartTime) + " msec)"); File f = new File(localFS.makeQualified(outputPath).toUri()); FileFragment frag = new FileFragment(INTERMEDIATE_FILE_PREFIX + outputPath.getName(), outputPath, 0, f.length()); return new Chunk(inSchema, frag, intermediateMeta); } }
@Test public void testApacheAccessLogScanner() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.REGEX, conf); Path tablePath = new Path(getResourcePath("dataset", "TestRegexSerDe"), "access.log"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); meta.putProperty(StorageConstants.TEXT_REGEX, apacheWeblogPattern); scanner.init(); Tuple tuple = scanner.next(); assertEquals(rows[0], tuple); assertNotNull(tuple = scanner.next()); assertEquals(rows[1], tuple); scanner.close(); }
@Test public void testIncompleteQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testIncompleteQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals("(f,hyunsik\",NULL,NULL,NULL,NULL,0.0,\"hyunsik,hyunsik)", tuple.toString()); i++; } assertEquals(1, i); scanner.close(); }
@Test public void testNoErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0"); FileFragment fragment = getFileFragment("testErrorTolerance2.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { scanner.next(); } catch (IOException ioe) { return; } finally { scanner.close(); } fail(); }
.build(); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init();
@Test public void testIgnoreTruncatedValueErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance3.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { Tuple tuple = scanner.next(); assertNull(tuple); } finally { scanner.close(); } }
@Test public void testIgnoreOneErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); assertNotNull(scanner.next()); assertNotNull(scanner.next()); try { scanner.next(); } catch (IOException ioe) { LOG.error(ioe); return; } finally { scanner.close(); } fail(); }
meta.putProperty(StorageConstants.TEXT_REGEX, apacheWeblogPattern); Scanner scanner = sm.getScanner(meta, schema, fragment, null); scanner.init();
@Test public void testIgnoreAllErrors() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(3, i); scanner.close(); }
private void initScanner(Schema projected) throws IOException { TableDesc table = plan.getTableDesc(); TableMeta meta = table.getMeta(); if (fragments.length > 1) { this.scanner = new MergeScanner( context.getConf(), plan.getPhysicalSchema(), meta, FragmentConvertor.convert(context.getConf(), fragments), projected ); } else { Tablespace tablespace = TablespaceManager.get(table.getUri()); this.scanner = tablespace.getScanner( meta, plan.getPhysicalSchema(), FragmentConvertor.convert(context.getConf(), fragments[0]), projected); } if (scanner.isSelectable()) { // TODO - isSelectable should be moved to FormatProperty scanner.setFilter(qual); } if (plan.hasLimit()) { scanner.setLimit(plan.getLimit()); } scanner.init(); }
private void initScanner(Schema projected) throws IOException { TableDesc table = plan.getTableDesc(); TableMeta meta = table.getMeta(); if (fragments.length > 1) { this.scanner = new MergeScanner( context.getConf(), plan.getPhysicalSchema(), meta, FragmentConvertor.convert(context.getConf(), fragments), projected ); } else { Tablespace tablespace = TablespaceManager.get(table.getUri()); this.scanner = tablespace.getScanner( meta, plan.getPhysicalSchema(), FragmentConvertor.convert(context.getConf(), fragments[0]), projected); } if (scanner.isSelectable()) { // TODO - isSelectable should be moved to FormatProperty scanner.setFilter(qual); } if (plan.hasLimit()) { scanner.setLimit(plan.getLimit()); } scanner.init(); }
@Test public void testStripQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testStripQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(6, i); scanner.close(); }
@Test public void testSkippingHeaderWithJson() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "2"); FileFragment fragment = getFileFragment("testNormal.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(19+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(4, lines); scanner.close(); } }
@Test public void testSkippingHeaderWithText() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "1"); meta.putProperty(StorageConstants.TEXT_DELIMITER, ","); FileFragment fragment = getFileFragment("testSkip.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(17+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(6, lines); scanner.close(); } } }