@Test public void testUse() throws Exception { File f = genCSVFile("testUse", "{\"columns\": [\"f\",\"g\"]}"); String jsonTableName = format("\"${WORKING_PATH}/%s\"", f.getPath()); // the extension is actually csv test("use dfs"); try { String[] jsonQueries = { format("select columns from table(%s ('JSON'))", jsonTableName), format("select columns from table(%s(type => 'JSON'))", jsonTableName), }; for (String jsonQuery : jsonQueries) { testWithResult(jsonQuery, listOf("f","g")); } testWithResult(format("select length(columns[0]) as columns from table(%s ('JSON'))", jsonTableName), 1); } finally { test("use sys"); } }
@Test public void testTextLineDelimiterWithCarriageReturn() throws Exception { String tableName = genCSVTable("testTextLineDelimiterWithCarriageReturn", "1, a\r", "2, b\r"); testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => '\r\n'))", tableName), listOf("1, a"), listOf("2, b")); }
@Test // DX-3796 public void testLongRows() throws Exception { List<String> values = new ArrayList<>(); for (int i = 0; i < 100; i++) { values.add(format("%05d", i)); } String row = Joiner.on(",").join(values); String csvTable = genCSVTable("longRows", row, row); test(format("select * from table(%s (type => 'Text', fieldDelimiter => '\t', autoGenerateColumnNames => false, extractHeader => true, skipFirstLine => false))", csvTable), values); } }
@Test public void testMultiByteLineDelimiter() throws Exception { String tableName = genCSVTable("testMultiByteLineDelimiter", "1abc2abc3abc"); test(format("select columns from table(%s(type=>'TeXT', lineDelimiter => 'abc'))", tableName)); testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => 'abc'))", tableName), listOf("1"), listOf("2"), listOf("3")); }
@Test public void testTextHeader() throws Exception { String headerTableName = genCSVTable("testTextHeader", "b|a", "b|0", "b|1"); testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', skipFirstLine => true))", headerTableName), listOf("b", "0"), listOf("b", "1") ); testBuilder() .sqlQuery(format("select a, b from table(%s(type => 'TeXT', fieldDelimiter => '|', extractHeader => true))", headerTableName)) .ordered() .baselineColumns("b", "a") .baselineValues("b", "0") .baselineValues("b", "1") .build().run(); }
String headerTableName = genCSVTable("testTextHeader", "b|a", "b |0", "select count(*) as cnt from table(%s(type => 'TeXT', fieldDelimiter => '|', extractHeader => true))", headerTableName); testBuilder() .sqlQuery(query) .ordered() "select count(*) as cnt from table(%s(type => 'TeXT', fieldDelimiter => '|', extractHeader => false))", headerTableName); testBuilder() .sqlQuery(query) .ordered() "select count(*) as cnt from table(%s(type => 'TeXT', fieldDelimiter => '|', skipFirstLine => true))", headerTableName); testBuilder() .sqlQuery(query) .ordered() " extractHeader => true))", headerTableName); testBuilder() .sqlQuery(query) .ordered()
private String genCSVTable(String name, String... rows) throws IOException { File f = genCSVFile(name, rows); return format("dfs.\"${WORKING_PATH}/%s\"", f.getPath()); }
private void testWithResult(String query, Object... expectedResult) throws Exception { TestBuilder builder = testBuilder() .sqlQuery(query) .ordered() .baselineColumns("columns"); for (Object o : expectedResult) { builder = builder.baselineValues(o); } builder.build().run(); }
@Test public void testTextComment() throws Exception { String commentTableName = genCSVTable("testTextComment", "b|0", "@ this is a comment", "b|1"); testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', comment => '@'))", commentTableName), listOf("b", "0"), listOf("b", "1") ); }
@Test public void testTabFieldDelimiter() throws Exception { String tableName = genCSVTable("testTabFieldDelimiter", "1\ta", "2\tb"); testWithResult(format("select columns from table(%s(type=>'TeXT', fieldDelimiter => '\t'))", tableName), listOf("1", "a"), listOf("2", "b")); }
@Test public void testVariationsJSON() throws Exception { String jsonTableName = genCSVTable("testVariationsJSON", "{\"columns\": [\"f\",\"g\"]}"); // the extension is actually csv testWithResult(format("select columns from %s", jsonTableName), listOf("{\"columns\": [\"f\"", "g\"]}\n") ); String[] jsonQueries = { format("select columns from table(%s ('JSON'))", jsonTableName), format("select columns from table(%s(type => 'JSON'))", jsonTableName), // format("select columns from %s ('JSON')", jsonTableName), // format("select columns from %s (type => 'JSON')", jsonTableName), // format("select columns from %s(type => 'JSON')", jsonTableName), // we can use named format plugin configurations too! format("select columns from table(%s(type => 'Named', name => 'json'))", jsonTableName), }; for (String jsonQuery : jsonQueries) { testWithResult(jsonQuery, listOf("f","g")); } }
@Test public void testSingleTextLineDelimiter() throws Exception { String tableName = genCSVTable("testSingleTextLineDelimiter", "a|b|c"); testWithResult(format("select columns from table(%s(type => 'TeXT', lineDelimiter => '|'))", tableName), listOf("a"), listOf("b"), listOf("c")); }
@Test public void testDataWithPartOfMultiByteLineDelimiter() throws Exception { String tableName = genCSVTable("testDataWithPartOfMultiByteLineDelimiter", "ab1abc2abc3abc"); testWithResult(format("select columns from table(%s(type=>'TeXT', lineDelimiter => 'abc'))", tableName), listOf("ab1"), listOf("2"), listOf("3")); }
@Test public void testTextQuote() throws Exception { String tableName = genCSVTable("testTextQuote", "\"b\"|\"0\"", "\"b\"|\"1\"", "\"b\"|\"2\""); testWithResult(format("select columns from table(%s(type => 'TeXT', fieldDelimiter => '|', quote => '@'))", tableName), listOf("\"b\"", "\"0\""), listOf("\"b\"", "\"1\""), listOf("\"b\"", "\"2\"") ); String quoteTableName = genCSVTable("testTextQuote2", "@b@|@0@", "@b$@c@|@1@"); // It seems that a parameter can not be called "escape" testWithResult(format("select columns from table(%s(\"escape\" => '$', type => 'TeXT', fieldDelimiter => '|', quote => '@'))", quoteTableName), listOf("b", "0"), listOf("b$@c", "1") // shouldn't $ be removed here? ); }
@Test // '\n' is treated as standard delimiter // if user has indicated custom line delimiter but input file contains '\n', split will occur on both public void testCustomTextLineDelimiterAndNewLine() throws Exception { String tableName = genCSVTable("testTextLineDelimiter", "b|1", "b|2"); testWithResult(format("select columns from table(%s(type => 'TeXT', lineDelimiter => '|'))", tableName), listOf("b"), listOf("1"), listOf("b"), listOf("2")); }
@Test public void testVariationsCSV() throws Exception { String csvTableName = genCSVTable("testVariationsCSV", "a,b", "c|d"); testWithResult(csvQuery, listOf("a,b"), listOf("c|d")); testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => ','))", csvTableName), listOf("a", "b"), listOf("c|d")); testWithResult(format("select columns from table(%s (type => 'TeXT', fieldDelimiter => '|'))", csvTableName), listOf("a,b"), listOf("c", "d"));
@Test public void testTextFieldDelimiter() throws Exception { String tableName = genCSVTable("testTextFieldDelimiter", "\"b\"|\"0\"", "\"b\"|\"1\"", "\"b\"|\"2\""); String queryTemplate = "select columns from table(%s (type => 'TeXT', fieldDelimiter => '%s'))"; testWithResult(format(queryTemplate, tableName, ","), listOf("b\"|\"0"), listOf("b\"|\"1"), listOf("b\"|\"2") ); testWithResult(format(queryTemplate, tableName, "|"), listOf("b", "0"), listOf("b", "1"), listOf("b", "2") ); }