" - {name: json, type: json, default: \"{\\\"foo\\\":\\\"bar\\\"}\" }"); Schema inputSchema = Schema.builder() .add("timestamp", TIMESTAMP) .add("string", STRING) .add("boolean", BOOLEAN) .add("long", LONG) .add("double", DOUBLE) .add("json", JSON) .add("remove_me", STRING) .build(); List<Object[]> records = filter(task, inputSchema, Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
" - {name: json}"); Schema inputSchema = Schema.builder() .add("timestamp", TIMESTAMP) .add("string", STRING) .add("boolean", BOOLEAN) .add("long", LONG) .add("double", DOUBLE) .add("json", JSON) .add("keep_me", STRING) .build(); List<Object[]> records = filter(task, inputSchema, Timestamp.ofEpochSecond(1436745600), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "keep_me",
" - {name: json}"); Schema inputSchema = Schema.builder() .add("timestamp", TIMESTAMP) .add("string", STRING) .add("boolean", BOOLEAN) .add("long", LONG) .add("double", DOUBLE) .add("json", JSON) .add("remove_me", STRING) .build(); List<Object[]> records = filter(task, inputSchema, Timestamp.ofEpochSecond(0), "string", Boolean.valueOf(true), Long.valueOf(0), Double.valueOf(0.5), ValueFactory.newString("json"), "remove_me",
@Test public void buildOutputSchema_DropColumns() { PluginTask task = taskFromYamlString( "type: column", "drop_columns:", " - {name: timestamp}", " - {name: string}", " - {name: boolean}", " - {name: long}", " - {name: double}", " - {name: json}"); Schema inputSchema = Schema.builder() .add("timestamp", TIMESTAMP) .add("string", STRING) .add("boolean", BOOLEAN) .add("long", LONG) .add("double", DOUBLE) .add("json", JSON) .add("keep_me", STRING) .build(); Schema outputSchema = ColumnFilterPlugin.buildOutputSchema(task, inputSchema); assertEquals(1, outputSchema.size()); Column column; { column = outputSchema.getColumn(0); assertEquals("keep_me", column.getName()); } }
@Test public void buildShouldVisitSet() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: \"$.json1.a.a.a\"}", "add_columns:", " - {name: \"$.json1.b.b[1].b\", type: string, default: foo}", "drop_columns:", " - {name: \"$.json1.c.c[*].c\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); assertTrue(subject.shouldVisit("$['json1']['a']['a']['a']")); assertTrue(subject.shouldVisit("$['json1']['a']['a']")); assertTrue(subject.shouldVisit("$['json1']['a']")); assertTrue(subject.shouldVisit("$['json1']['b']['b'][1]['b']")); assertTrue(subject.shouldVisit("$['json1']['b']['b'][1]")); assertTrue(subject.shouldVisit("$['json1']['b']['b']")); assertTrue(subject.shouldVisit("$['json1']['b']")); assertTrue(subject.shouldVisit("$['json1']['c']['c'][*]['c']")); assertTrue(subject.shouldVisit("$['json1']['c']['c'][*]")); assertTrue(subject.shouldVisit("$['json1']['c']['c']")); assertTrue(subject.shouldVisit("$['json1']['c']")); assertTrue(subject.shouldVisit("$['json1']")); assertFalse(subject.shouldVisit("$['json2']")); }
@Test public void visitArray_Columns() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}", " - {name: \"$.json1.k2[0]\"}", " - {name: \"$.json1.k3[*].k1\"}", " - {name: \"$.json1.k3[*].k3\", src: \"$.json1.k3[*].k1\"}", " - {name: \"$.json1.k4[*].k1\", type: string, default: v}", " - {name: \"$.json1.k5[0].k1\", type: string, default: v}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":[{"k1":"v"},"v"],"k2":["v","v"],"k3":[{"k1":"v","k2":"v"}]} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value k3 = ValueFactory.newString("k3"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newArray(ValueFactory.newMap(k1, v), v), k2, ValueFactory.newArray(v, v), k3, ValueFactory.newArray(ValueFactory.newMap(k1, v, k2, v))); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":[{\"k1\":\"v\"}],\"k2\":[\"v\"],\"k3\":[{\"k1\":\"v\",\"k3\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString()); }
@Test public void buildOutputSchema_Columns() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: timestamp}", " - {name: string}", " - {name: boolean}", " - {name: long}", " - {name: double}", " - {name: json}"); Schema inputSchema = Schema.builder() .add("timestamp", TIMESTAMP) .add("string", STRING) .add("boolean", BOOLEAN) .add("long", LONG) .add("double", DOUBLE) .add("json", JSON) .add("remove_me", STRING) .build(); Schema outputSchema = ColumnFilterPlugin.buildOutputSchema(task, inputSchema); assertEquals(6, outputSchema.size()); Column column; { column = outputSchema.getColumn(0); assertEquals("timestamp", column.getName()); } }
@Test public void visitArray_AddColumns() { PluginTask task = taskFromYamlString( "type: column", "add_columns:", " - {name: \"$.json1.k1[1]\", src: \"$.json1.k1[0]\"}", " - {name: \"$.json1.k3[*].k2\", type: string, default: v}", " - {name: \"$.json1.k4[*].k1\", type: string, default: v}", " - {name: \"$.json1.k5[0].k1\", type: string, default: v}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":[{"k1":"v"}],"k2":["v","v"],"k3":[{"k1":"v"}]} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value k3 = ValueFactory.newString("k3"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)), k2, ValueFactory.newArray(v, v), k3, ValueFactory.newArray(ValueFactory.newMap(k1, v))); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":[{\"k1\":\"v\"},{\"k1\":\"v\"}],\"k2\":[\"v\",\"v\"],\"k3\":[{\"k1\":\"v\",\"k2\":\"v\"}],\"k4\":[],\"k5\":[{\"k1\":\"v\"}]}", visited.toString()); }
@Test public void visitArray_DropColumns() { PluginTask task = taskFromYamlString( "type: column", "drop_columns:", " - {name: \"$.json1.k1[0].k1\"}", " - {name: \"$.json1.k2[*]\"}", // ending with [*] is allowed for drop_columns, but not for others " - {name: \"$.json1.k3[*].k1\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":[{"k1":"v"}],"k2":["v","v"],"k3":[{"k3":"v"}]} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value k3 = ValueFactory.newString("k3"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)), k2, ValueFactory.newArray(v, v), k3, ValueFactory.newArray(ValueFactory.newMap(k1, v))); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":[{}],\"k2\":[],\"k3\":[{}]}", visited.toString()); }
@Test public void buildJsonDropColumns() { PluginTask task = taskFromYamlString( "type: column", "drop_columns:", " - {name: $.json1.a.default}", " - {name: $.json1.a.copy}", " - {name: \"$.json1.a.copy_array[1]\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); assertFalse(subject.jsonDropColumns.containsKey("$['json1']")); assertTrue(subject.jsonDropColumns.containsKey("$['json1']['a']")); assertTrue(subject.jsonDropColumns.containsKey("$['json1']['a']['copy_array']")); { HashSet<String> jsonColumns = subject.jsonDropColumns.get("$['json1']['a']"); assertEquals(2, jsonColumns.size()); assertTrue(jsonColumns.contains("$['json1']['a']['default']")); assertTrue(jsonColumns.contains("$['json1']['a']['copy']")); } { HashSet<String> jsonColumns = subject.jsonDropColumns.get("$['json1']['a']['copy_array']"); assertEquals(1, jsonColumns.size()); assertTrue(jsonColumns.contains("$['json1']['a']['copy_array'][1]")); } }
@Test public void visitMap_columnsUsingBracketNotation() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: \"$['json1']['k1']\"}", " - {name: \"$['json1']['k2']['k2']\"}", " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}", " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":{"k1":"v"},"k2":{"k1":"v","k2":"v"}} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newMap(k1, v), k2, ValueFactory.newMap(k2, v)); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString()); }
@Test public void visit_withComplexRename() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: \"$.json1['k____1']\", src: \"$.json1['k.-=+1']\"}", " - {name: \"$.json1['k____1'][0]['k____1']\", src: \"$.json1['k____1'][0]['k.-=+1']\"}", " - {name: \"$['json1']['k_2']\", src: \"$['json1']['k.2']\"}", " - {name: \"$['json1']['k_2']['k_2']\", src: \"$['json1']['k_2']['k.2']\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k.1":[{"k.1":"v"}], "k.2":{"k.2":"v"}} Value k1 = ValueFactory.newString("k.-=+1"); Value k2 = ValueFactory.newString("k.2"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)), k2, ValueFactory.newMap(k2, v)); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k____1\":[{\"k____1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString()); }
@Test public void visit_withSingleQuotesAndDoubleQuotes() { PluginTask task = taskFromYamlString( "type: column", "columns:", " - {name: \"$['json1']['k_1']\", src: \"$['json1']['k.1']\"}", " - {name: '$[\"json1\"][\"k_1\"][0][\"k_1\"]', src: '$[\"json1\"][\"k_1\"][0][\"k.1\"]'}", " - {name: '$[\"json1\"][\"k_2\"]', src: '$[\"json1\"][\"k.2\"]'}", " - {name: '$[\"json1\"][\"k_2\"][\"k_2\"]', src: '$[\"json1\"][\"k_2\"][\"k.2\"]'}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k.1":[{"k.1":"v"}], "k.2":{"k.2":"v"}} Value k1 = ValueFactory.newString("k.1"); Value k2 = ValueFactory.newString("k.2"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newArray(ValueFactory.newMap(k1, v)), k2, ValueFactory.newMap(k2, v)); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k_1\":[{\"k_1\":\"v\"}],\"k_2\":{\"k_2\":\"v\"}}", visited.toString()); }
@Test public void buildOutputSchema_AddColumns() { PluginTask task = taskFromYamlString( "type: column", "add_columns:", " - {name: timestamp, type: timestamp, default: 2015-07-13, format: \"%Y-%m-%d\", timezone: UTC}", " - {name: string, type: string, default: string}", " - {name: boolean, type: boolean, default: true}", " - {name: long, type: long, default: 0}", " - {name: double, type: double, default: 0.5}", " - {name: json, type: json, default: \"{\\\"foo\\\":\\\"bar\\\"}\" }"); Schema inputSchema = Schema.builder() .add("keep_me", STRING) .build(); Schema outputSchema = ColumnFilterPlugin.buildOutputSchema(task, inputSchema); assertEquals(7, outputSchema.size()); Column column; { column = outputSchema.getColumn(0); assertEquals("keep_me", column.getName()); } { column = outputSchema.getColumn(1); assertEquals("timestamp", column.getName()); } }
@Test public void visitMap_addColumnsUsingBracketNotation() { PluginTask task = taskFromYamlString( "type: column", "add_columns:", " - {name: \"$['json1']['k3']['k3']\", type: string, default: v}", " - {name: \"$['json1']['k4']\", src: \"$['json1']['k2']\"}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":{"k1":"v"},"k2":{"k2":"v"}} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newMap(k1, v), k2, ValueFactory.newMap(k2, v)); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":{\"k1\":\"v\"},\"k2\":{\"k2\":\"v\"},\"k3\":{\"k3\":\"v\"},\"k4\":{\"k2\":\"v\"}}", visited.toString()); }
@Test public void visitMap_DropColumns() { PluginTask task = taskFromYamlString( "type: column", "drop_columns:", " - {name: $.json1.k1.k1}", " - {name: $.json1.k2}"); Schema inputSchema = Schema.builder() .add("json1", JSON) .add("json2", JSON) .build(); JsonVisitor subject = jsonVisitor(task, inputSchema); // {"k1":{"k1":"v"},"k2":{"k2":"v"}} Value k1 = ValueFactory.newString("k1"); Value k2 = ValueFactory.newString("k2"); Value v = ValueFactory.newString("v"); Value map = ValueFactory.newMap( k1, ValueFactory.newMap(k1, v), k2, ValueFactory.newMap(k2, v)); MapValue visited = subject.visit("$['json1']", map).asMapValue(); assertEquals("{\"k1\":{}}", visited.toString()); }