CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(inputFlowFile).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(inputFlowFile).getValue())
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue())
CSVProperties props = new CSVProperties.Builder() .delimiter(delimiter) .escape(escape)
CSVProperties props = new CSVProperties.Builder() .delimiter(delimiter) .escape(escape)
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(incomingCSV).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(incomingCSV).getValue())
CSVProperties props = new CSVProperties.Builder() .charset(context.getProperty(CHARSET).evaluateAttributeExpressions(inputFlowFile).getValue()) .delimiter(context.getProperty(DELIMITER).evaluateAttributeExpressions(inputFlowFile).getValue())
InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().linesToSkip(1).build(), ImmutableSet.of("long", "field_1"));
InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferNullableSchema("TestRecord", stream, new CSVProperties.Builder().build(), ImmutableSet.of("long", "field_1"));
InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().build(), ImmutableSet.of("float"));
@Test public void testSchemaInference() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().hasHeader().build()); Assert.assertEquals("Should use name", "TestRecord", schema.getName()); Assert.assertNull("Should not have namespace", schema.getNamespace()); Assert.assertNotNull(schema.getField("long")); Assert.assertNotNull(schema.getField("float")); Assert.assertNotNull(schema.getField("double")); Assert.assertNotNull(schema.getField("double2")); Assert.assertNotNull(schema.getField("string")); Assert.assertNotNull(schema.getField("nullable_long")); Assert.assertNotNull(schema.getField("nullable_string")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("long").schema()); Assert.assertEquals("Should infer a float (ends in f)", schema(Schema.Type.FLOAT), schema.getField("float").schema()); Assert.assertEquals("Should infer a double (ends in d)", nullable(Schema.Type.DOUBLE), schema.getField("double").schema()); Assert.assertEquals("Should infer a double (decimal defaults to double)", nullable(Schema.Type.DOUBLE), schema.getField("double2").schema()); Assert.assertEquals("Should infer a non-null string (not numeric)", schema(Schema.Type.STRING), schema.getField("string").schema()); Assert.assertEquals("Should infer a nullable long (second line is a long)", nullable(Schema.Type.LONG), schema.getField("nullable_long").schema()); Assert.assertEquals("Should infer a nullable string (second is missing)", nullable(Schema.Type.STRING), schema.getField("nullable_string").schema()); }
InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferNullableSchema("TestRecord", stream, new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("float"));
@Test public void testCSVProperitesBuilderQuote() { Assert.assertEquals("Quote should be '", "'", new CSVProperties.Builder() .quote("\\u0027") .build().quote); Assert.assertEquals("Quote should be '", "'", new CSVProperties.Builder() .quote("\\'") .build().quote); Assert.assertEquals("Quote should be '", "'", new CSVProperties.Builder() .quote("\'") .build().quote); Assert.assertEquals("Quote should be '", "'", new CSVProperties.Builder() .quote("'") .build().quote); } }
@Test public void testCSVProperitesBuilderDelimiter() { Assert.assertEquals("Delimiter should be tab", "\t", new CSVProperties.Builder() .delimiter("\\u0009") .build().delimiter); Assert.assertEquals("Delimiter should be tab", "\t", new CSVProperties.Builder() .delimiter("\\t") .build().delimiter); Assert.assertEquals("Delimiter should be tab", "\t", new CSVProperties.Builder() .delimiter("\t") .build().delimiter); }
@Test public void testCSVProperitesBuilderEscape() { Assert.assertEquals("Escape should be backslash", "\\", new CSVProperties.Builder() .escape("\\u005c") .build().escape); Assert.assertEquals("Escape should be backslash", "\\", new CSVProperties.Builder() .escape("\\\\") .build().escape); Assert.assertEquals("Escape should be backslash", "\\", new CSVProperties.Builder() .escape("\\") .build().escape); }
@Test public void testSchemaInferenceMissingExample() throws Exception { InputStream stream = new ByteArrayInputStream( "\none,two\n34,\n".getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().linesToSkip(1).hasHeader().build()); Assert.assertNotNull(schema.getField("one")); Assert.assertNotNull(schema.getField("two")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("one").schema()); Assert.assertEquals("Should default to a string", nullable(Schema.Type.STRING), schema.getField("two").schema()); }
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_long")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });
@Test public void testSchemaNamespace() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferNullableSchema("com.example.TestRecord", stream, new CSVProperties.Builder().hasHeader().build()); Assert.assertEquals("Should use name", "TestRecord", schema.getName()); Assert.assertEquals("Should set namespace", "com.example", schema.getNamespace()); }
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_string")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });