@JsonCreator public MapInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec ) { this.parseSpec = parseSpec; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
@JsonCreator public ProtobufInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("descriptor") String descriptorFilePath, @JsonProperty("protoMessageType") String protoMessageType ) { this.parseSpec = parseSpec; this.descriptorFilePath = descriptorFilePath; this.protoMessageType = protoMessageType; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
@JsonCreator public ThriftInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("thriftJar") String jarPath, @JsonProperty("thriftClass") String thriftClassName ) { this.jarPath = jarPath; this.thriftClassName = thriftClassName; Preconditions.checkNotNull(thriftClassName, "thrift class name"); this.parseSpec = parseSpec; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
public Builder withDimensionsSpec(InputRowParser parser) { if (parser != null && parser.getParseSpec() != null && parser.getParseSpec().getDimensionsSpec() != null) { this.dimensionsSpec = parser.getParseSpec().getDimensionsSpec(); } else { this.dimensionsSpec = new DimensionsSpec(null, null, null); } return this; }
@VisibleForTesting static String typeStringFromParseSpec(ParseSpec parseSpec) { StringBuilder builder = new StringBuilder("struct<"); builder.append(parseSpec.getTimestampSpec().getTimestampColumn()).append(":string"); // the typeString seems positionally dependent, so repeated timestamp column causes incorrect mapping if (parseSpec.getDimensionsSpec().getDimensionNames().size() > 0) { builder.append(","); builder.append(String.join( ":string,", parseSpec.getDimensionsSpec() .getDimensionNames() .stream() .filter(s -> !s.equals(parseSpec.getTimestampSpec().getTimestampColumn())) .collect(Collectors.toList()))); builder.append(":string"); } builder.append(">"); return builder.toString(); }
@JsonCreator public OrcHadoopInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("typeString") String typeString, @JsonProperty("mapFieldNameFormat") String mapFieldNameFormat ) { this.parseSpec = parseSpec; this.typeString = typeString == null ? typeStringFromParseSpec(parseSpec) : typeString; this.mapFieldNameFormat = mapFieldNameFormat == null || !mapFieldNameFormat.contains(MAP_PARENT_TAG) || !mapFieldNameFormat.contains(MAP_CHILD_TAG) ? DEFAULT_MAP_FIELD_NAME_FORMAT : mapFieldNameFormat; this.mapParentFieldNameFormat = StringUtils.replace(this.mapFieldNameFormat, MAP_PARENT_TAG, "%s"); this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); this.oip = makeObjectInspector(this.typeString); }
@Override public List<InputRow> parseBatch(Map<String, Object> theMap) { final List<String> dimensions; if (!this.dimensions.isEmpty()) { dimensions = this.dimensions; } else { dimensions = Lists.newArrayList( Sets.difference(theMap.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) ); } final DateTime timestamp; try { timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap); if (timestamp == null) { final String input = theMap.toString(); throw new NullPointerException( StringUtils.format( "Null timestamp in input: %s", input.length() < 100 ? input : input.substring(0, 100) + "..." ) ); } } catch (Exception e) { throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp, dimensions, theMap)); }
@JsonCreator public ParquetAvroHadoopInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("binaryAsString") Boolean binaryAsString ) { this.parseSpec = parseSpec; this.timestampSpec = parseSpec.getTimestampSpec(); this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); this.binaryAsString = binaryAsString == null ? false : binaryAsString; final JSONPathSpec flattenSpec; if (parseSpec != null && (parseSpec instanceof AvroParseSpec)) { flattenSpec = ((AvroParseSpec) parseSpec).getFlattenSpec(); } else { flattenSpec = JSONPathSpec.DEFAULT; } this.recordFlattener = ObjectFlatteners.create( flattenSpec, new AvroFlattenerMaker(false, this.binaryAsString) ); }
private static IndexIOConfig createIoConfig(TaskToolbox toolbox, DataSchema dataSchema, Interval interval) { return new IndexIOConfig( new IngestSegmentFirehoseFactory( dataSchema.getDataSource(), interval, null, // no filter // set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()), toolbox.getIndexIO() ), false ); }
/** * imitate avro extension {@link org.apache.druid.data.input.avro.AvroParsers#parseGenericRecord} */ @Override public List<InputRow> parseBatch(GenericRecord record) { Map<String, Object> row = recordFlattener.flatten(record); final List<String> dimensions; if (!this.dimensions.isEmpty()) { dimensions = this.dimensions; } else { dimensions = Lists.newArrayList( Sets.difference(row.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) ); } // check for parquet Date // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date LogicalType logicalType = determineTimestampSpecLogicalType(record.getSchema(), timestampSpec.getTimestampColumn()); DateTime dateTime; if (logicalType instanceof LogicalTypes.Date) { int daysSinceEpoch = (Integer) record.get(timestampSpec.getTimestampColumn()); dateTime = DateTimes.utc(TimeUnit.DAYS.toMillis(daysSinceEpoch)); } else { // Fall back to a binary format that will be parsed using joda-time dateTime = timestampSpec.extractTimestamp(row); } return ImmutableList.of(new MapBasedInputRow(dateTime, dimensions, row)); }
} else { dimensions = Lists.newArrayList( Sets.difference(record.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) );
@Test public void testSerde() throws IOException { final String json = "{" + "\"format\":\"timeAndDims\", " + "\"timestampSpec\": {\"column\":\"timestamp\"}, " + "\"dimensionsSpec\":{}" + "}"; final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT); final ParseSpec parseSpec = mapper.convertValue(mapValue, ParseSpec.class); Assert.assertEquals(TimeAndDimsParseSpec.class, parseSpec.getClass()); Assert.assertEquals("timestamp", parseSpec.getTimestampSpec().getTimestampColumn()); Assert.assertEquals(ImmutableList.of(), parseSpec.getDimensionsSpec().getDimensionNames()); // Test round-trip. Assert.assertEquals( parseSpec, mapper.readValue(mapper.writeValueAsString(parseSpec), ParseSpec.class) ); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
public SelectorFilterTest( String testName, IndexBuilder indexBuilder, Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher, boolean cnf, boolean optimize ) { super( testName, ROWS, indexBuilder.schema( new IncrementalIndexSchema.Builder() .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()).build() ), finisher, cnf, optimize ); }
public ExpressionFilterTest( String testName, IndexBuilder indexBuilder, Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher, boolean cnf, boolean optimize ) { super( testName, ROWS, indexBuilder.schema( new IncrementalIndexSchema.Builder() .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()).build() ), finisher, cnf, optimize ); }
public FloatAndDoubleFilteringTest( String testName, IndexBuilder indexBuilder, Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher, boolean cnf, boolean optimize ) { super( testName, ROWS, indexBuilder.schema( new IncrementalIndexSchema.Builder() .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()) .build() ), finisher, cnf, optimize ); }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); aggregators = config.getSchema().getDataSchema().getAggregators(); if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) { aggsForSerializingSegmentInputRow = aggregators; } else { // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well // as late arriving data on HDFS etc. aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory(); } } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions() );
@Test public void testDefaultExclusions() { Map<String, Object> parser = jsonMapper.convertValue( new StringInputRowParser( new JSONParseSpec( new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dimB", "dimA")), null, null), null, null ), null ), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT ); DataSchema schema = new DataSchema( "test", parser, new AggregatorFactory[]{ new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2"), }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), null, jsonMapper ); Assert.assertEquals( ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"), schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions() ); }