.linesToSkip(context.getProperty(HEADER_LINE_SKIP_COUNT).evaluateAttributeExpressions(inputFlowFile).asInteger()) .header(header.get()) .hasHeader(hasHeader.get()) .build();
.quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()) .escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()) .hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()) .linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()) .build();
.quote(quote) .header(header) .hasHeader(!noHeader) .linesToSkip(linesToSkip) .charset(charsetName)
.quote(context.getProperty(QUOTE).evaluateAttributeExpressions(incomingCSV).getValue()) .escape(context.getProperty(ESCAPE).evaluateAttributeExpressions(incomingCSV).getValue()) .hasHeader(context.getProperty(HAS_HEADER).evaluateAttributeExpressions(incomingCSV).asBoolean()) .linesToSkip(context.getProperty(LINES_TO_SKIP).evaluateAttributeExpressions(incomingCSV).asInteger()) .build();
.quote(quote) .header(header) .hasHeader(!noHeader) .linesToSkip(linesToSkip) .charset(charsetName)
.linesToSkip(context.getProperty(HEADER_LINE_SKIP_COUNT).evaluateAttributeExpressions(inputFlowFile).asInteger()) .header(header.get()) .hasHeader(hasHeader.get()) .build();
@Test public void testSchemaInference() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().hasHeader().build()); Assert.assertEquals("Should use name", "TestRecord", schema.getName()); Assert.assertNull("Should not have namespace", schema.getNamespace()); Assert.assertNotNull(schema.getField("long")); Assert.assertNotNull(schema.getField("float")); Assert.assertNotNull(schema.getField("double")); Assert.assertNotNull(schema.getField("double2")); Assert.assertNotNull(schema.getField("string")); Assert.assertNotNull(schema.getField("nullable_long")); Assert.assertNotNull(schema.getField("nullable_string")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("long").schema()); Assert.assertEquals("Should infer a float (ends in f)", schema(Schema.Type.FLOAT), schema.getField("float").schema()); Assert.assertEquals("Should infer a double (ends in d)", nullable(Schema.Type.DOUBLE), schema.getField("double").schema()); Assert.assertEquals("Should infer a double (decimal defaults to double)", nullable(Schema.Type.DOUBLE), schema.getField("double2").schema()); Assert.assertEquals("Should infer a non-null string (not numeric)", schema(Schema.Type.STRING), schema.getField("string").schema()); Assert.assertEquals("Should infer a nullable long (second line is a long)", nullable(Schema.Type.LONG), schema.getField("nullable_long").schema()); Assert.assertEquals("Should infer a nullable string (second is missing)", nullable(Schema.Type.STRING), schema.getField("nullable_string").schema()); }
InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferNullableSchema("TestRecord", stream, new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("float"));
@Test public void testSchemaInferenceMissingExample() throws Exception { InputStream stream = new ByteArrayInputStream( "\none,two\n34,\n".getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().linesToSkip(1).hasHeader().build()); Assert.assertNotNull(schema.getField("one")); Assert.assertNotNull(schema.getField("two")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("one").schema()); Assert.assertEquals("Should default to a string", nullable(Schema.Type.STRING), schema.getField("two").schema()); }
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_long")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });
@Test public void testSchemaNamespace() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferNullableSchema("com.example.TestRecord", stream, new CSVProperties.Builder().hasHeader().build()); Assert.assertEquals("Should use name", "TestRecord", schema.getName()); Assert.assertEquals("Should set namespace", "com.example", schema.getNamespace()); }
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_string")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });