@Override public void process(InputStream in) throws IOException { avroSchema.set(CSVUtil .inferSchema( context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props) .toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
@Override public void process(InputStream in) throws IOException { avroSchema.set(CSVUtil .inferSchema( context.getProperty(RECORD_NAME).evaluateAttributeExpressions(inputFlowFile).getValue(), in, props) .toString(context.getProperty(PRETTY_AVRO_OUTPUT).asBoolean())); } });
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_string")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });
@Override public void run() { try { CSVUtil.inferSchema("TestRecord", new ByteArrayInputStream(csvLines.getBytes("utf8")), new CSVProperties.Builder().hasHeader().build(), ImmutableSet.of("nullable_long")); } catch (IOException e) { throw new RuntimeException("Schema inference threw IOException", e); } } });
@Test public void testSchemaInferenceMissingExample() throws Exception { InputStream stream = new ByteArrayInputStream( "\none,two\n34,\n".getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().linesToSkip(1).hasHeader().build()); Assert.assertNotNull(schema.getField("one")); Assert.assertNotNull(schema.getField("two")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("one").schema()); Assert.assertEquals("Should default to a string", nullable(Schema.Type.STRING), schema.getField("two").schema()); }
@Test public void testSchemaInference() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().hasHeader().build()); Assert.assertEquals("Should use name", "TestRecord", schema.getName()); Assert.assertNull("Should not have namespace", schema.getNamespace()); Assert.assertNotNull(schema.getField("long")); Assert.assertNotNull(schema.getField("float")); Assert.assertNotNull(schema.getField("double")); Assert.assertNotNull(schema.getField("double2")); Assert.assertNotNull(schema.getField("string")); Assert.assertNotNull(schema.getField("nullable_long")); Assert.assertNotNull(schema.getField("nullable_string")); Assert.assertEquals("Should infer a long", schema(Schema.Type.LONG), schema.getField("long").schema()); Assert.assertEquals("Should infer a float (ends in f)", schema(Schema.Type.FLOAT), schema.getField("float").schema()); Assert.assertEquals("Should infer a double (ends in d)", nullable(Schema.Type.DOUBLE), schema.getField("double").schema()); Assert.assertEquals("Should infer a double (decimal defaults to double)", nullable(Schema.Type.DOUBLE), schema.getField("double2").schema()); Assert.assertEquals("Should infer a non-null string (not numeric)", schema(Schema.Type.STRING), schema.getField("string").schema()); Assert.assertEquals("Should infer a nullable long (second line is a long)", nullable(Schema.Type.LONG), schema.getField("nullable_long").schema()); Assert.assertEquals("Should infer a nullable string (second is missing)", nullable(Schema.Type.STRING), schema.getField("nullable_string").schema()); }
@Test public void testSchemaInferenceWithoutHeader() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().build(), ImmutableSet.of("float"));
@Test public void testSchemaInferenceSkipHeader() throws Exception { InputStream stream = new ByteArrayInputStream(csvLines.getBytes("utf8")); Schema schema = CSVUtil.inferSchema("TestRecord", stream, new CSVProperties.Builder().linesToSkip(1).build(), ImmutableSet.of("long", "field_1"));