/** * Returns Scanner instance. * * @param meta The table meta * @param schema The input schema * @param fragment The fragment for scanning * @param target The output schema * @return Scanner instance * @throws IOException */ public synchronized SeekableScanner getSeekableScanner(TableMeta meta, Schema schema, Fragment fragment, Schema target) throws IOException { return (SeekableScanner)this.getScanner(meta, schema, fragment, target); }
/** * Returns Scanner instance. * * @param meta The table meta * @param schema The input schema * @param fragment The fragment for scanning * @param target The output schema * @return Scanner instance * @throws IOException */ public synchronized SeekableScanner getSeekableScanner(TableMeta meta, Schema schema, FragmentProto fragment, Schema target) throws IOException { return (SeekableScanner)this.getScanner(meta, schema, FragmentConvertor.convert(conf, fragment), target); }
/** * Returns Scanner instance. * * @param meta The table meta * @param schema The input schema * @param fragment The fragment for scanning * @param target The output schema * @return Scanner instance * @throws IOException */ public synchronized SeekableScanner getSeekableScanner(TableMeta meta, Schema schema, FragmentProto fragment, Schema target) throws IOException { return (SeekableScanner)this.getScanner(meta, schema, FragmentConvertor.convert(conf, fragment), target); }
private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
private Scanner getScanner(Chunk chunk) throws IOException { if (chunk.isMemory()) { long sortStart = System.currentTimeMillis(); this.sort(inMemoryTable); Scanner scanner = new MemTableScanner<>(inMemoryTable, inMemoryTable.size(), inMemoryTable.usedMem()); if(LOG.isDebugEnabled()) { debug(LOG, "Memory Chunk sort (" + FileUtil.humanReadableByteCount(inMemoryTable.usedMem(), false) + " bytes, " + inMemoryTable.size() + " rows, sort time: " + (System.currentTimeMillis() - sortStart) + " msec)"); } return scanner; } else { return TablespaceManager.getLocalFs().getScanner(chunk.meta, chunk.schema, chunk.fragment, chunk.schema); } }
private Scanner getScanner(Chunk chunk) throws IOException { if (chunk.isMemory()) { long sortStart = System.currentTimeMillis(); OffHeapRowBlockUtils.sort(inMemoryTable, unSafeComparator); Scanner scanner = new MemTableScanner<>(inMemoryTable, inMemoryTable.size(), inMemoryTable.usedMem()); if(LOG.isDebugEnabled()) { debug(LOG, "Memory Chunk sort (" + FileUtil.humanReadableByteCount(inMemoryTable.usedMem(), false) + " bytes, " + inMemoryTable.size() + " rows, sort time: " + (System.currentTimeMillis() - sortStart) + " msec)"); } return scanner; } else { return TablespaceManager.getLocalFs().getScanner(chunk.meta, chunk.schema, chunk.fragment, chunk.schema); } }
@Test public void testApacheAccessLogScanner() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.REGEX, conf); Path tablePath = new Path(getResourcePath("dataset", "TestRegexSerDe"), "access.log"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); meta.putProperty(StorageConstants.TEXT_REGEX, apacheWeblogPattern); scanner.init(); Tuple tuple = scanner.next(); assertEquals(rows[0], tuple); assertNotNull(tuple = scanner.next()); assertEquals(rows[1], tuple); scanner.close(); }
@Test public void testIncompleteQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testIncompleteQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals("(f,hyunsik\",NULL,NULL,NULL,NULL,0.0,\"hyunsik,hyunsik)", tuple.toString()); i++; } assertEquals(1, i); scanner.close(); }
@Test public void testVarioutType() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf); Path tablePath = new Path(getResourcePath("dataset", "TestJsonSerDe"), "testVariousType.json"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple = scanner.next(); assertNotNull(tuple); assertNull(scanner.next()); scanner.close(); Tuple baseTuple = new VTuple(new Datum[] { DatumFactory.createBool(true), // 0 DatumFactory.createChar("hyunsik"), // 1 DatumFactory.createInt2((short) 17), // 2 DatumFactory.createInt4(59), // 3 DatumFactory.createInt8(23L), // 4 DatumFactory.createFloat4(77.9f), // 5 DatumFactory.createFloat8(271.9d), // 6 DatumFactory.createText("hyunsik"), // 7 DatumFactory.createBlob("hyunsik".getBytes()), // 8 NullDatum.get(), // 9 }); assertEquals(baseTuple, tuple); }
@Test public void testNoErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0"); FileFragment fragment = getFileFragment("testErrorTolerance2.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { scanner.next(); } catch (IOException ioe) { return; } finally { scanner.close(); } fail(); }
.add("col3", TajoDataTypes.Type.TEXT) .build(); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init();
@Test public void testIgnoreTruncatedValueErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance3.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { Tuple tuple = scanner.next(); assertNull(tuple); } finally { scanner.close(); } }
@Test public void testIgnoreOneErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); assertNotNull(scanner.next()); assertNotNull(scanner.next()); try { scanner.next(); } catch (IOException ioe) { LOG.error(ioe); return; } finally { scanner.close(); } fail(); }
@Test public void testIgnoreAllErrors() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(3, i); scanner.close(); }
private void initScanner(Schema projected) throws IOException { TableDesc table = plan.getTableDesc(); TableMeta meta = table.getMeta(); if (fragments.length > 1) { this.scanner = new MergeScanner( context.getConf(), plan.getPhysicalSchema(), meta, FragmentConvertor.convert(context.getConf(), fragments), projected ); } else { Tablespace tablespace = TablespaceManager.get(table.getUri()); this.scanner = tablespace.getScanner( meta, plan.getPhysicalSchema(), FragmentConvertor.convert(context.getConf(), fragments[0]), projected); } if (scanner.isSelectable()) { // TODO - isSelectable should be moved to FormatProperty scanner.setFilter(qual); } if (plan.hasLimit()) { scanner.setLimit(plan.getLimit()); } scanner.init(); }
private void initScanner(Schema projected) throws IOException { TableDesc table = plan.getTableDesc(); TableMeta meta = table.getMeta(); if (fragments.length > 1) { this.scanner = new MergeScanner( context.getConf(), plan.getPhysicalSchema(), meta, FragmentConvertor.convert(context.getConf(), fragments), projected ); } else { Tablespace tablespace = TablespaceManager.get(table.getUri()); this.scanner = tablespace.getScanner( meta, plan.getPhysicalSchema(), FragmentConvertor.convert(context.getConf(), fragments[0]), projected); } if (scanner.isSelectable()) { // TODO - isSelectable should be moved to FormatProperty scanner.setFilter(qual); } if (plan.hasLimit()) { scanner.setLimit(plan.getLimit()); } scanner.init(); }
@Test public void testStripQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testStripQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(6, i); scanner.close(); }
@Test public void testSkippingHeaderWithJson() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "2"); FileFragment fragment = getFileFragment("testNormal.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(19+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(4, lines); scanner.close(); } }
@Test public void testSkippingHeaderWithText() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "1"); meta.putProperty(StorageConstants.TEXT_DELIMITER, ","); FileFragment fragment = getFileFragment("testSkip.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(17+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(6, lines); scanner.close(); } } }