private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
private Scanner getNextScanner() throws IOException { if (iterator.hasNext()) { currentFragment = iterator.next(); currentScanner = TablespaceManager.getLocalFs().getScanner(meta, schema, currentFragment, target); currentScanner.init(); return currentScanner; } else { return null; } }
public TestFileSystems(FileSystem fs) throws IOException { this.fs = fs; this.conf = new TajoConf(fs.getConf()); sm = TablespaceManager.getLocalFs(); testDir = getTestDir(this.fs, TEST_PATH); }
private Scanner getScanner(Chunk chunk) throws IOException { if (chunk.isMemory()) { long sortStart = System.currentTimeMillis(); OffHeapRowBlockUtils.sort(inMemoryTable, unSafeComparator); Scanner scanner = new MemTableScanner<>(inMemoryTable, inMemoryTable.size(), inMemoryTable.usedMem()); if(LOG.isDebugEnabled()) { debug(LOG, "Memory Chunk sort (" + FileUtil.humanReadableByteCount(inMemoryTable.usedMem(), false) + " bytes, " + inMemoryTable.size() + " rows, sort time: " + (System.currentTimeMillis() - sortStart) + " msec)"); } return scanner; } else { return TablespaceManager.getLocalFs().getScanner(chunk.meta, chunk.schema, chunk.fragment, chunk.schema); } }
private Scanner getScanner(Chunk chunk) throws IOException { if (chunk.isMemory()) { long sortStart = System.currentTimeMillis(); this.sort(inMemoryTable); Scanner scanner = new MemTableScanner<>(inMemoryTable, inMemoryTable.size(), inMemoryTable.usedMem()); if(LOG.isDebugEnabled()) { debug(LOG, "Memory Chunk sort (" + FileUtil.humanReadableByteCount(inMemoryTable.usedMem(), false) + " bytes, " + inMemoryTable.size() + " rows, sort time: " + (System.currentTimeMillis() - sortStart) + " msec)"); } return scanner; } else { return TablespaceManager.getLocalFs().getScanner(chunk.meta, chunk.schema, chunk.fragment, chunk.schema); } }
@Before public void setup() throws Exception { conf = new TajoConf(); conf.setVar(ConfVars.ROOT_DIR, TEST_PATH); conf.setStrings("tajo.storage.projectable-scanner", "rcfile", "parquet", "avro"); testDir = CommonTestingUtil.getTestDir(TEST_PATH); fs = testDir.getFileSystem(conf); sm = TablespaceManager.getLocalFs(); }
@Test public void testApacheAccessLogScanner() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.REGEX, conf); Path tablePath = new Path(getResourcePath("dataset", "TestRegexSerDe"), "access.log"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); meta.putProperty(StorageConstants.TEXT_REGEX, apacheWeblogPattern); scanner.init(); Tuple tuple = scanner.next(); assertEquals(rows[0], tuple); assertNotNull(tuple = scanner.next()); assertEquals(rows[1], tuple); scanner.close(); }
@Test public void testIncompleteQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testIncompleteQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals("(f,hyunsik\",NULL,NULL,NULL,NULL,0.0,\"hyunsik,hyunsik)", tuple.toString()); i++; } assertEquals(1, i); scanner.close(); }
@Test(timeout = 120000) public void testGetFileTablespace() throws Exception { final Configuration hdfsConf = getTestHdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(hdfsConf) .numDataNodes(1).format(true).build(); URI uri = URI.create(cluster.getFileSystem().getUri() + "/tajo"); try { /* Local FileSystem */ FileTablespace space = TablespaceManager.getLocalFs(); assertEquals(localFs.getUri(), space.getFileSystem().getUri()); FileTablespace distTablespace = new FileTablespace("testGetFileTablespace", uri, null); distTablespace.init(conf); TablespaceManager.addTableSpaceForTest(distTablespace); /* Distributed FileSystem */ space = TablespaceManager.get(uri); assertEquals(cluster.getFileSystem().getUri(), space.getFileSystem().getUri()); space = TablespaceManager.getByName("testGetFileTablespace"); assertEquals(cluster.getFileSystem().getUri(), space.getFileSystem().getUri()); } finally { cluster.shutdown(); } } }
@Test public void testNoErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "0"); FileFragment fragment = getFileFragment("testErrorTolerance2.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { scanner.next(); } catch (IOException ioe) { return; } finally { scanner.close(); } fail(); }
@Test public void testVarioutType() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf); Path tablePath = new Path(getResourcePath("dataset", "TestJsonSerDe"), "testVariousType.json"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple = scanner.next(); assertNotNull(tuple); assertNull(scanner.next()); scanner.close(); Tuple baseTuple = new VTuple(new Datum[] { DatumFactory.createBool(true), // 0 DatumFactory.createChar("hyunsik"), // 1 DatumFactory.createInt2((short) 17), // 2 DatumFactory.createInt4(59), // 3 DatumFactory.createInt8(23L), // 4 DatumFactory.createFloat4(77.9f), // 5 DatumFactory.createFloat8(271.9d), // 6 DatumFactory.createText("hyunsik"), // 7 DatumFactory.createBlob("hyunsik".getBytes()), // 8 NullDatum.get(), // 9 }); assertEquals(baseTuple, tuple); }
.add("col3", TajoDataTypes.Type.TEXT) .build(); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init();
@Test public void testIgnoreTruncatedValueErrorTolerance() throws IOException { TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance3.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); try { Tuple tuple = scanner.next(); assertNull(tuple); } finally { scanner.close(); } }
@Test public void testIgnoreOneErrorTolerance() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); assertNotNull(scanner.next()); assertNotNull(scanner.next()); try { scanner.next(); } catch (IOException ioe) { LOG.error(ioe); return; } finally { scanner.close(); } fail(); }
meta.putProperty(StorageConstants.TEXT_REGEX_OUTPUT_FORMAT_STRING, "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s"); FileTablespace sm = TablespaceManager.getLocalFs(); Path tablePath = new Path(testDir, "testSerializer.data"); Appender appender = sm.getAppender(meta, schema, tablePath);
@Test public void testIgnoreAllErrors() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "-1"); FileFragment fragment = getFileFragment("testErrorTolerance1.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(3, i); scanner.close(); }
@Test public void testStripQuote() throws IOException, CloneNotSupportedException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageUtil.TEXT_DELIMITER, ","); meta.putProperty(StorageUtil.QUOTE_CHAR, "\""); FileFragment fragment = getFileFragment("testStripQuote.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); Tuple tuple; int i = 0; while ((tuple = scanner.next()) != null) { assertEquals(baseTuple, tuple); i++; } assertEquals(6, i); scanner.close(); }
@Test public void testSkippingHeaderWithJson() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.JSON, conf);; meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "2"); FileFragment fragment = getFileFragment("testNormal.json"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(19+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(4, lines); scanner.close(); } }
@Test public void testProjection() throws IOException { Schema target = SchemaBuilder.builder() .add("time", TajoDataTypes.Type.TEXT) .add("status", TajoDataTypes.Type.TEXT) .build(); Tuple[] rows = new VTuple[]{new VTuple(new Datum[]{ DatumFactory.createText("[10/Oct/2000:13:55:36 -0700]"), DatumFactory.createText("200") }), new VTuple(new Datum[]{ DatumFactory.createText("[10/Oct/2000:13:55:36 -0700]"), DatumFactory.createText("200") })}; TajoConf conf = new TajoConf(); TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.REGEX, conf); Path tablePath = new Path(getResourcePath("dataset", "TestRegexSerDe"), "access.log"); FileSystem fs = FileSystem.getLocal(conf); FileStatus status = fs.getFileStatus(tablePath); FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, target); meta.putProperty(StorageConstants.TEXT_REGEX, apacheWeblogPattern); scanner.init(); Tuple tuple = scanner.next(); assertEquals(2, tuple.size()); assertEquals(rows[0], tuple); assertNotNull(tuple = scanner.next()); assertEquals(2, tuple.size()); assertEquals(rows[1], tuple); scanner.close(); }
@Test public void testSkippingHeaderWithText() throws IOException { TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.TEXT, conf); meta.putProperty(StorageConstants.TEXT_SKIP_HEADER_LINE, "1"); meta.putProperty(StorageConstants.TEXT_DELIMITER, ","); FileFragment fragment = getFileFragment("testSkip.txt"); Scanner scanner = TablespaceManager.getLocalFs().getScanner(meta, schema, fragment, null); scanner.init(); int lines = 0; try { while (true) { Tuple tuple = scanner.next(); if (tuple != null) { assertEquals(17+lines, tuple.getInt2(2)); lines++; } else break; } } finally { assertEquals(6, lines); scanner.close(); } } }