@Nullable public FieldSpec getFieldSpecForColumn(String column) { return _schema.getFieldSpecFor(column); }
public void createInvertedIndexForColumn(String column) { Preconditions.checkNotNull(column); if (_schema != null && _schema.getFieldSpecFor(column) == null) { LOGGER.warn("Cannot find column {} in schema, will not create inverted index.", column); return; } if (_schema == null) { LOGGER.warn("Schema has not been set, column {} might not exist in schema after all.", column); } _invertedIndexCreationColumns.add(column); }
public void setSchema(Schema schema) { Preconditions.checkNotNull(schema); _schema = schema; if (_invertedIndexCreationColumns != null) { Iterator<String> iterator = _invertedIndexCreationColumns.iterator(); while (iterator.hasNext()) { String column = iterator.next(); if (_schema.getFieldSpecFor(column) == null) { LOGGER.warn("Cannot find column {} in schema, will not create inverted index.", column); iterator.remove(); } } } }
public boolean isVirtualColumn(String columnName) { return columnName.startsWith("$") || (getFieldSpecFor(columnName).getVirtualColumnProvider() != null && !getFieldSpecFor(columnName).getVirtualColumnProvider().isEmpty()); } }
@Override public ColumnDataSource getDataSource(String columnName) { if (!_schema.isVirtualColumn(columnName)) { return new ColumnDataSource(_schema.getFieldSpecFor(columnName), _numDocsIndexed, _maxNumValuesMap.get(columnName), _indexReaderWriterMap.get(columnName), _invertedIndexMap.get(columnName), _dictionaryMap.get(columnName), _bloomFilterMap.get(columnName)); } else { return getVirtualDataSource(columnName); } }
@Override public void logStats() { try { for (final String column : columnStatsCollectorMap.keySet()) { AbstractColumnStatisticsCollector statisticsCollector = columnStatsCollectorMap.get(column); LOGGER.info("********** logging for column : " + column + " ********************* "); LOGGER.info("min value : " + statisticsCollector.getMinValue()); LOGGER.info("max value : " + statisticsCollector.getMaxValue()); LOGGER.info("cardinality : " + statisticsCollector.getCardinality()); LOGGER.info("length of largest column : " + statisticsCollector.getLengthOfLargestElement()); LOGGER.info("is sorted : " + statisticsCollector.isSorted()); LOGGER.info("column type : " + _statsCollectorConfig.getSchema().getFieldSpecFor(column).getDataType()); if (statisticsCollector.getPartitionFunction() != null) { LOGGER.info("partitions: " + statisticsCollector.getPartitions().toString()); } LOGGER.info("***********************************************"); } } catch (final Exception e) { LOGGER.error("Caught exception while logging column stats", e); } } }
_logger.warn("More than one sorted column configured. Using {}", sortedColumn); FieldSpec fieldSpec = schema.getFieldSpecFor(sortedColumn); if (!fieldSpec.isSingleValueField()) { _logger.error("Cannot configure multi-valued column {} as sorted column", sortedColumn);
@Override public int compare(int i1, int i2) { int docId1 = sortedDocIds[i1]; int docId2 = sortedDocIds[i2]; int compare = 0; for (int index : _sortOrder) { String dimensionName = _dimensionNames.get(index); FieldSpec fieldSpec = _schema.getFieldSpecFor(dimensionName); PinotSegmentColumnReader columnReader = _columnReaderMap.get(dimensionName); // Multi value column or no dictionary column is not supported boolean isMultiValueColumn = !fieldSpec.isSingleValueField(); boolean isNoDictionaryColumn = !columnReader.hasDictionary(); if (isMultiValueColumn || isNoDictionaryColumn) { throw new IllegalStateException( "Multi value column or no dictionary column is not supported. ( column name: " + dimensionName + ", multi value column: " + isMultiValueColumn + ", no dictionary column: " + isNoDictionaryColumn + " )"); } // Compute the order compare = columnReader.getDictionaryId(docId1) - columnReader.getDictionaryId(docId2); if (compare != 0) { return compare; } } return compare; }
int compare = 0; for (String column : _sortOrder) { FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Object otherVal = o.getRow().getValue(column); Object thisVal = _row.getValue(column);
@Override public void indexRow(GenericRow row) { for (String columnName : _forwardIndexCreatorMap.keySet()) { Object columnValueToIndex = row.getValue(columnName); if (columnValueToIndex == null) { throw new RuntimeException("Null value for column:" + columnName); } SegmentDictionaryCreator dictionaryCreator = _dictionaryCreatorMap.get(columnName); if (schema.getFieldSpecFor(columnName).isSingleValueField()) { if (dictionaryCreator != null) { int dictId = dictionaryCreator.indexOfSV(columnValueToIndex); ((SingleValueForwardIndexCreator) _forwardIndexCreatorMap.get(columnName)).index(docIdCounter, dictId); if (_invertedIndexCreatorMap.containsKey(columnName)) { _invertedIndexCreatorMap.get(columnName).add(dictId); } } else { ((SingleValueRawIndexCreator) _forwardIndexCreatorMap.get(columnName)) .index(docIdCounter, columnValueToIndex); } } else { int[] dictIds = dictionaryCreator.indexOfMV(columnValueToIndex); ((MultiValueForwardIndexCreator) _forwardIndexCreatorMap.get(columnName)).index(docIdCounter, dictIds); if (_invertedIndexCreatorMap.containsKey(columnName)) { _invertedIndexCreatorMap.get(columnName).add(dictIds, dictIds.length); } } } docIdCounter++; }
@Override protected void updateDefaultColumn(String column, DefaultColumnAction action) throws Exception { LOGGER.info("Starting default column action: {} on column: {}", action, column); // For V3 segment format, only support ADD action // For UPDATE and REMOVE action, throw exception to drop and re-download the segment if (!action.isAddAction()) { throw new V3RemoveIndexException( "Default value indices for column: " + column + " cannot be removed for V3 format segment."); } // Create new dictionary and forward index, and update column metadata createColumnV1Indices(column); // Write index to V3 format. FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Preconditions.checkNotNull(fieldSpec); boolean isSingleValue = fieldSpec.isSingleValueField(); File dictionaryFile = new File(_indexDir, column + V1Constants.Dict.FILE_EXTENSION); File forwardIndexFile; if (isSingleValue) { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.SORTED_SV_FORWARD_INDEX_FILE_EXTENSION); } else { forwardIndexFile = new File(_indexDir, column + V1Constants.Indexes.UNSORTED_MV_FORWARD_INDEX_FILE_EXTENSION); } LoaderUtils.writeIndexToV3Format(_segmentWriter, column, dictionaryFile, ColumnIndexType.DICTIONARY); LoaderUtils.writeIndexToV3Format(_segmentWriter, column, forwardIndexFile, ColumnIndexType.FORWARD_INDEX); } }
FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Object defaultNullValue = fieldSpec.getDefaultNullValue(); if (fieldSpec.isSingleValueField()) {
/** * Returns a new schema based on the original one. The new schema removes columns as needed (for ex, virtual cols) * and adds the new timespec to the schema. */ @VisibleForTesting public Schema getUpdatedSchema(Schema original) { TimeFieldSpec tfs = original.getTimeFieldSpec(); // Use outgoing granularity for creating segment TimeGranularitySpec outgoing = tfs.getOutgoingGranularitySpec(); TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing); Schema newSchema = new Schema(); newSchema.addField(newTimeSpec); for (String col : original.getPhysicalColumnNames()) { if (!col.equals(tfs.getName())) { newSchema.addField(original.getFieldSpecFor(col)); } } return newSchema; } }
for (FieldSpec fieldSpec : fieldSpecs) { String columnName = fieldSpec.getName(); FieldSpec segmentFieldSpec = segmentSchema.getFieldSpecFor(columnName); Preconditions.checkState(fieldSpec.equals(segmentFieldSpec), "Field spec mismatch for column: %s, in the given schema: %s, in the segment schema: %s", columnName,
FieldSpec.DataType dataType = _schema.getFieldSpecFor(column).getDataType(); switch (dataType) { case INT:
@Test public void testByteType() throws DecoderException, IOException { Schema expectedSchema = new Schema(); byte[] expectedEmptyDefault = new byte[0]; byte[] expectedNonEmptyDefault = Hex.decodeHex("abcd1234".toCharArray()); expectedSchema.setSchemaName("test"); expectedSchema.addField(new MetricFieldSpec("noDefault", FieldSpec.DataType.BYTES)); expectedSchema.addField(new MetricFieldSpec("emptyDefault", FieldSpec.DataType.BYTES, expectedEmptyDefault)); expectedSchema.addField(new MetricFieldSpec("nonEmptyDefault", FieldSpec.DataType.BYTES, expectedNonEmptyDefault)); // Ensure that schema can be serialized and de-serialized (ie byte[] converted to String and back). String jsonSchema = expectedSchema.getJSONSchema(); Schema actualSchema = Schema.fromString(jsonSchema); Assert.assertEquals(actualSchema.getFieldSpecFor("noDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("emptyDefault").getDefaultNullValue(), expectedEmptyDefault); Assert.assertEquals(actualSchema.getFieldSpecFor("nonEmptyDefault").getDefaultNullValue(), expectedNonEmptyDefault); Assert.assertEquals(actualSchema, expectedSchema); Assert.assertEquals(actualSchema.hashCode(), expectedSchema.hashCode()); } }
/** * Complete the stats gathering process and store the stats information in indexCreationInfoMap. */ void buildIndexCreationInfo() throws Exception { for (FieldSpec spec : dataSchema.getAllFieldSpecs()) { String column = spec.getName(); // Skip adding virtual columns, so that they don't get an on-disk representation if (dataSchema.isVirtualColumn(column)) { continue; } ColumnStatistics columnProfile = segmentStats.getColumnProfileFor(column); indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(columnProfile, true/*createDictionary*/, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, false/*isAutoGenerated*/, dataSchema.getFieldSpecFor(column).getDefaultNullValue())); } segmentIndexCreationInfo.setTotalDocs(totalDocs); segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs); segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs); segmentIndexCreationInfo.setStarTreeEnabled(createStarTree); }
private ColumnDataSource getVirtualDataSource(String column) { VirtualColumnContext virtualColumnContext = new VirtualColumnContext(NetUtil.getHostnameOrAddress(), _segmentMetadata.getTableName(), getSegmentName(), column, _numDocsIndexed + 1); VirtualColumnProvider provider = VirtualColumnProviderFactory.buildProvider(_schema.getFieldSpecFor(column).getVirtualColumnProvider()); return new ColumnDataSource(provider.buildColumnIndexContainer(virtualColumnContext), provider.buildMetadata(virtualColumnContext)); }
val = ((Utf8) val).toString(); uniqueEntries.get(column).add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
for (String column : recordRow.getFieldNames()) { String valueStringFromAvro = null; if (avroDataPublisher.getSchema().getFieldSpecFor(column).isSingleValueField()) { Object valueFromAvro = recordRow.getValue(column); valueStringFromAvro = valueFromAvro.toString();