public SchemaBuilder addSingleValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, true, defaultNullValue)); return this; }
public SchemaBuilder addMultiValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType, @Nonnull Object defaultNullValue) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, false, defaultNullValue)); return this; }
public SchemaBuilder addSingleValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, true)); return this; }
public SchemaBuilder addMultiValueDimension(@Nonnull String dimensionName, @Nonnull DataType dataType) { _schema.addField(new DimensionFieldSpec(dimensionName, dataType, false)); return this; }
public static void addBuiltInVirtualColumnsToSchema(Schema schema) { if (!schema.hasColumn("$docId")) { schema.addField(new DimensionFieldSpec("$docId", FieldSpec.DataType.INT, true, DocIdVirtualColumnProvider.class)); } if (!schema.hasColumn("$hostName")) { schema.addField( new DimensionFieldSpec("$hostName", FieldSpec.DataType.STRING, true, HostNameVirtualColumnProvider.class)); } if (!schema.hasColumn("$segmentName")) { schema.addField(new DimensionFieldSpec("$segmentName", FieldSpec.DataType.STRING, true, SegmentNameVirtualColumnProvider.class)); } } }
/** * Helper method to build a schema with provided number of metric columns. * * @return Schema containing the given number of metric columns */ private static Schema buildSchema() { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true)); schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true)); schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true)); schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true)); return schema; } }
/** * Setup to build a segment with raw indexes (no-dictionary) of various data types. * * @throws Exception */ @BeforeClass public void setup() throws Exception { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true)); schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true)); schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true)); schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true)); _random = new Random(System.nanoTime()); _recordReader = buildIndex(schema); }
private FieldSpec buildSpec(DataGeneratorSpec genSpec, String column) { DataType dataType = genSpec.getDataTypesMap().get(column); FieldType fieldType = genSpec.getFieldTypesMap().get(column); FieldSpec spec; switch (fieldType) { case DIMENSION: spec = new DimensionFieldSpec(); break; case METRIC: spec = new MetricFieldSpec(); break; case TIME: spec = new TimeFieldSpec(column, dataType, genSpec.getTimeUnitMap().get(column)); break; default: throw new RuntimeException("Invalid Field type."); } spec.setName(column); spec.setDataType(dataType); spec.setSingleValueField(true); return spec; }
/** * Setup to build a segment with raw indexes (no-dictionary) of various data types. * * @throws Exception */ @BeforeClass public void setup() throws Exception { _schema = new Schema(); _schema.addField(new DimensionFieldSpec(FIXED_BYTE_SORTED_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(FIXED_BYTES_UNSORTED_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(FIXED_BYTES_NO_DICT_COLUMN, FieldSpec.DataType.BYTES, true)); _schema.addField(new DimensionFieldSpec(VARIABLE_BYTES_COLUMN, FieldSpec.DataType.BYTES, true)); _random = new Random(System.nanoTime()); _recordReader = buildIndex(_schema); _segment = ImmutableSegmentLoader.load(new File(SEGMENT_DIR_NAME, SEGMENT_NAME), ReadMode.heap); }
/** * Tests SegmentDictionaryCreator for case when there is only one string and it is empty. */ @Test public void testSingleEmptyString() throws Exception { File indexDir = new File("/tmp/dict.test"); indexDir.deleteOnExit(); FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true); try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(new String[]{""}, fieldSpec, indexDir)) { dictionaryCreator.build(); Assert.assertEquals(dictionaryCreator.getNumBytesPerEntry(), 0); Assert.assertEquals(dictionaryCreator.indexOfSV(""), 0); } FileUtils.deleteQuietly(indexDir); }
/** * Test for ensuring that Strings with special characters can be handled * correctly. * * @throws Exception */ @Test public void testUTF8Characters() throws Exception { File indexDir = new File("/tmp/dict.test"); indexDir.deleteOnExit(); FieldSpec fieldSpec = new DimensionFieldSpec("test", DataType.STRING, true); String[] inputStrings = new String[3]; inputStrings[0] = new String(new byte[]{67, 97, 102, -61, -87}); // "Café"; inputStrings[1] = new String(new byte[]{70, 114, 97, 110, -61, -89, 111, 105, 115}); // "François"; inputStrings[2] = new String(new byte[]{67, -61, -76, 116, 101, 32, 100, 39, 73, 118, 111, 105, 114, 101}); // "Côte d'Ivoire"; Arrays.sort(inputStrings); try (SegmentDictionaryCreator dictionaryCreator = new SegmentDictionaryCreator(inputStrings, fieldSpec, indexDir)) { dictionaryCreator.build(); for (String inputString : inputStrings) { Assert .assertTrue(dictionaryCreator.indexOfSV(inputString) >= 0, "Value not found in dictionary " + inputString); } } FileUtils.deleteQuietly(indexDir); }
protected Schema createDummySchema(String tableName) { Schema schema = new Schema(); schema.setSchemaName(tableName); schema.addField(new DimensionFieldSpec("dimA", FieldSpec.DataType.STRING, true, "")); schema.addField(new DimensionFieldSpec("dimB", FieldSpec.DataType.STRING, true, 0)); schema.addField(new MetricFieldSpec("metricA", FieldSpec.DataType.INT, 0)); schema.addField(new MetricFieldSpec("metricB", FieldSpec.DataType.DOUBLE, -1)); return schema; }
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, FieldSpec.DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_SV_2, FieldSpec.DataType.INT, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }
@Test public void testNoVirtualColumnsInSchema() { Schema schema = new Schema(); FieldSpec spec = new DimensionFieldSpec("col1", FieldSpec.DataType.STRING, true); schema.addField(spec); TimeFieldSpec tfs = new TimeFieldSpec("col1", FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "col2", FieldSpec.DataType.LONG, TimeUnit.DAYS); schema.addField(tfs); VirtualColumnProviderFactory.addBuiltInVirtualColumnsToSchema(schema); Assert.assertEquals(schema.getColumnNames().size(), 5); Assert.assertEquals(schema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.MILLISECONDS); RealtimeSegmentConverter converter = new RealtimeSegmentConverter(null, "", schema, "testTable", "col1", "segment1", "col1"); Schema newSchema = converter.getUpdatedSchema(schema); Assert.assertEquals(newSchema.getColumnNames().size(), 2); Assert.assertEquals(newSchema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.DAYS); } }
private Schema createPinotSchemaWithTimeSpec(TimeFieldSpec timeSpec) { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); FieldSpec spec; spec = new DimensionFieldSpec(D1, DataType.STRING, true); testSchema.addField(spec); spec = new DimensionFieldSpec(D2, DataType.STRING, true); testSchema.addField(spec); spec = new MetricFieldSpec(M1, DataType.INT); testSchema.addField(spec); spec = new MetricFieldSpec(M2, DataType.FLOAT); testSchema.addField(spec); testSchema.addField(timeSpec); return testSchema; }
public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws IOException { DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>()); Schema schema = new Schema(); for (final Field field : dataStream.getSchema().getFields()) { try { getColumnType(field); } catch (Exception e) { LOGGER.warn("Caught exception while converting Avro field {} of type {}, field will not be in schema.", field.name(), field.schema().getType()); continue; } final String columnName = field.name(); final String pinotType = field.getProp("pinotType"); final FieldSpec fieldSpec; if (pinotType != null && "METRIC".equals(pinotType)) { fieldSpec = new MetricFieldSpec(); } else { fieldSpec = new DimensionFieldSpec(); } fieldSpec.setName(columnName); fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName))); fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName))); schema.addField(fieldSpec); } dataStream.close(); return schema; }
private Schema createPinotSchema() { Schema testSchema = new Schema(); testSchema.setSchemaName("schema"); testSchema.addField(new DimensionFieldSpec(D_SV_1, DataType.STRING, true)); testSchema.addField(new DimensionFieldSpec(D_MV_1, FieldSpec.DataType.STRING, false)); testSchema.addField(new MetricFieldSpec(M1, FieldSpec.DataType.INT)); testSchema.addField(new MetricFieldSpec(M2, FieldSpec.DataType.FLOAT)); testSchema.addField(new TimeFieldSpec(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.HOURS, TIME))); return testSchema; }
/** * Tests loading default string column with empty ("") default null value. */ @Test public void testDefaultEmptyValueStringColumn() throws Exception { Schema schema = constructV1Segment(); schema.addField(new DimensionFieldSpec("SVString", FieldSpec.DataType.STRING, true, "")); schema.addField(new DimensionFieldSpec("MVString", FieldSpec.DataType.STRING, false, "")); IndexSegment indexSegment = ImmutableSegmentLoader.load(_indexDir, _v1IndexLoadingConfig, schema); Assert.assertEquals(indexSegment.getDataSource("SVString").getDictionary().get(0), ""); Assert.assertEquals(indexSegment.getDataSource("MVString").getDictionary().get(0), ""); indexSegment.destroy(); indexSegment = ImmutableSegmentLoader.load(_indexDir, _v3IndexLoadingConfig, schema); Assert.assertEquals(indexSegment.getDataSource("SVString").getDictionary().get(0), ""); Assert.assertEquals(indexSegment.getDataSource("MVString").getDictionary().get(0), ""); indexSegment.destroy(); }
private Schema createSchema(boolean isSimpleDate) { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(STRING_COL_NAME, FieldSpec.DataType.STRING, true)); if (isSimpleDate) { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.INT, TimeUnit.DAYS)); } else { schema.addField(new TimeFieldSpec(TIME_COL_NAME, FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS)); } return schema; }
schema.addField(new DimensionFieldSpec(column, dataType, true)); StatsCollectorConfig statsCollectorConfig = new StatsCollectorConfig(schema, null);