org.apache.druid.data.input.impl.ParseSpec.getDimensionsSpec java code examples

@JsonCreator
public MapInputRowParser(
  @JsonProperty("parseSpec") ParseSpec parseSpec
)
{
 this.parseSpec = parseSpec;
 this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames();
}

@JsonCreator
public ProtobufInputRowParser(
  @JsonProperty("parseSpec") ParseSpec parseSpec,
  @JsonProperty("descriptor") String descriptorFilePath,
  @JsonProperty("protoMessageType") String protoMessageType
)
{
 this.parseSpec = parseSpec;
 this.descriptorFilePath = descriptorFilePath;
 this.protoMessageType = protoMessageType;
 this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames();
}

@JsonCreator
public ThriftInputRowParser(
  @JsonProperty("parseSpec") ParseSpec parseSpec,
  @JsonProperty("thriftJar") String jarPath,
  @JsonProperty("thriftClass") String thriftClassName
)
{
 this.jarPath = jarPath;
 this.thriftClassName = thriftClassName;
 Preconditions.checkNotNull(thriftClassName, "thrift class name");
 this.parseSpec = parseSpec;
 this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames();
}

public Builder withDimensionsSpec(InputRowParser parser)
{
 if (parser != null
   && parser.getParseSpec() != null
   && parser.getParseSpec().getDimensionsSpec() != null) {
  this.dimensionsSpec = parser.getParseSpec().getDimensionsSpec();
 } else {
  this.dimensionsSpec = new DimensionsSpec(null, null, null);
 }
 return this;
}

@VisibleForTesting
static String typeStringFromParseSpec(ParseSpec parseSpec)
{
 StringBuilder builder = new StringBuilder("struct<");
 builder.append(parseSpec.getTimestampSpec().getTimestampColumn()).append(":string");
 // the typeString seems positionally dependent, so repeated timestamp column causes incorrect mapping
 if (parseSpec.getDimensionsSpec().getDimensionNames().size() > 0) {
  builder.append(",");
  builder.append(String.join(
    ":string,",
    parseSpec.getDimensionsSpec()
         .getDimensionNames()
         .stream()
         .filter(s -> !s.equals(parseSpec.getTimestampSpec().getTimestampColumn()))
         .collect(Collectors.toList())));
  builder.append(":string");
 }
 builder.append(">");
 return builder.toString();
}

@JsonCreator
public OrcHadoopInputRowParser(
  @JsonProperty("parseSpec") ParseSpec parseSpec,
  @JsonProperty("typeString") String typeString,
  @JsonProperty("mapFieldNameFormat") String mapFieldNameFormat
)
{
 this.parseSpec = parseSpec;
 this.typeString = typeString == null ? typeStringFromParseSpec(parseSpec) : typeString;
 this.mapFieldNameFormat =
   mapFieldNameFormat == null ||
   !mapFieldNameFormat.contains(MAP_PARENT_TAG) ||
   !mapFieldNameFormat.contains(MAP_CHILD_TAG) ? DEFAULT_MAP_FIELD_NAME_FORMAT : mapFieldNameFormat;
 this.mapParentFieldNameFormat = StringUtils.replace(this.mapFieldNameFormat, MAP_PARENT_TAG, "%s");
 this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames();
 this.oip = makeObjectInspector(this.typeString);
}

@Override
public List<InputRow> parseBatch(Map<String, Object> theMap)
{
 final List<String> dimensions;
 if (!this.dimensions.isEmpty()) {
  dimensions = this.dimensions;
 } else {
  dimensions = Lists.newArrayList(
    Sets.difference(theMap.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions())
  );
 }
 final DateTime timestamp;
 try {
  timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap);
  if (timestamp == null) {
   final String input = theMap.toString();
   throw new NullPointerException(
     StringUtils.format(
       "Null timestamp in input: %s",
       input.length() < 100 ? input : input.substring(0, 100) + "..."
     )
   );
  }
 }
 catch (Exception e) {
  throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap);
 }
 return ImmutableList.of(new MapBasedInputRow(timestamp, dimensions, theMap));
}

@JsonCreator
public ParquetAvroHadoopInputRowParser(
  @JsonProperty("parseSpec") ParseSpec parseSpec,
  @JsonProperty("binaryAsString") Boolean binaryAsString
)
{
 this.parseSpec = parseSpec;
 this.timestampSpec = parseSpec.getTimestampSpec();
 this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames();
 this.binaryAsString = binaryAsString == null ? false : binaryAsString;
 final JSONPathSpec flattenSpec;
 if (parseSpec != null && (parseSpec instanceof AvroParseSpec)) {
  flattenSpec = ((AvroParseSpec) parseSpec).getFlattenSpec();
 } else {
  flattenSpec = JSONPathSpec.DEFAULT;
 }
 this.recordFlattener = ObjectFlatteners.create(
   flattenSpec,
   new AvroFlattenerMaker(false, this.binaryAsString)
 );
}

private static IndexIOConfig createIoConfig(TaskToolbox toolbox, DataSchema dataSchema, Interval interval)
{
 return new IndexIOConfig(
   new IngestSegmentFirehoseFactory(
     dataSchema.getDataSource(),
     interval,
     null, // no filter
     // set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose
     dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(),
     Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()),
     toolbox.getIndexIO()
   ),
   false
 );
}

/**
 * imitate avro extension {@link org.apache.druid.data.input.avro.AvroParsers#parseGenericRecord}
 */
@Override
public List<InputRow> parseBatch(GenericRecord record)
{
 Map<String, Object> row = recordFlattener.flatten(record);
 final List<String> dimensions;
 if (!this.dimensions.isEmpty()) {
  dimensions = this.dimensions;
 } else {
  dimensions = Lists.newArrayList(
    Sets.difference(row.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions())
  );
 }
 // check for parquet Date
 // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
 LogicalType logicalType = determineTimestampSpecLogicalType(record.getSchema(), timestampSpec.getTimestampColumn());
 DateTime dateTime;
 if (logicalType instanceof LogicalTypes.Date) {
  int daysSinceEpoch = (Integer) record.get(timestampSpec.getTimestampColumn());
  dateTime = DateTimes.utc(TimeUnit.DAYS.toMillis(daysSinceEpoch));
 } else {
  // Fall back to a binary format that will be parsed using joda-time
  dateTime = timestampSpec.extractTimestamp(row);
 }
 return ImmutableList.of(new MapBasedInputRow(dateTime, dimensions, row));
}

} else {
 dimensions = Lists.newArrayList(
   Sets.difference(record.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions())
 );

@Test
public void testSerde() throws IOException
{
 final String json = "{"
           + "\"format\":\"timeAndDims\", "
           + "\"timestampSpec\": {\"column\":\"timestamp\"}, "
           + "\"dimensionsSpec\":{}"
           + "}";
 final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
 final ParseSpec parseSpec = mapper.convertValue(mapValue, ParseSpec.class);
 Assert.assertEquals(TimeAndDimsParseSpec.class, parseSpec.getClass());
 Assert.assertEquals("timestamp", parseSpec.getTimestampSpec().getTimestampColumn());
 Assert.assertEquals(ImmutableList.of(), parseSpec.getDimensionsSpec().getDimensionNames());
 // Test round-trip.
 Assert.assertEquals(
   parseSpec,
   mapper.readValue(mapper.writeValueAsString(parseSpec), ParseSpec.class)
 );
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  metricNames.add(aggregators[i].getName());
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

@Override
protected void setup(Context context)
{
 config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
 aggregators = config.getSchema().getDataSchema().getAggregators();
 combiningAggs = new AggregatorFactory[aggregators.length];
 for (int i = 0; i < aggregators.length; ++i) {
  combiningAggs[i] = aggregators[i].getCombiningFactory();
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

public SelectorFilterTest(
  String testName,
  IndexBuilder indexBuilder,
  Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher,
  boolean cnf,
  boolean optimize
)
{
 super(
   testName,
   ROWS,
   indexBuilder.schema(
     new IncrementalIndexSchema.Builder()
       .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()).build()
   ),
   finisher,
   cnf,
   optimize
 );
}

public ExpressionFilterTest(
  String testName,
  IndexBuilder indexBuilder,
  Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher,
  boolean cnf,
  boolean optimize
)
{
 super(
   testName,
   ROWS,
   indexBuilder.schema(
     new IncrementalIndexSchema.Builder()
       .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec()).build()
   ),
   finisher,
   cnf,
   optimize
 );
}

public FloatAndDoubleFilteringTest(
  String testName,
  IndexBuilder indexBuilder,
  Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher,
  boolean cnf,
  boolean optimize
)
{
 super(
   testName,
   ROWS,
   indexBuilder.schema(
     new IncrementalIndexSchema.Builder()
       .withDimensionsSpec(PARSER.getParseSpec().getDimensionsSpec())
       .build()
   ),
   finisher,
   cnf,
   optimize
 );
}

@Override
protected void setup(Context context)
  throws IOException, InterruptedException
{
 super.setup(context);
 aggregators = config.getSchema().getDataSchema().getAggregators();
 if (DatasourcePathSpec.checkIfReindexingAndIsUseAggEnabled(config.getSchema().getIOConfig().getPathSpec())) {
  aggsForSerializingSegmentInputRow = aggregators;
 } else {
  // Note: this is required for "delta-ingestion" use case where we are reading rows stored in Druid as well
  // as late arriving data on HDFS etc.
  aggsForSerializingSegmentInputRow = new AggregatorFactory[aggregators.length];
  for (int i = 0; i < aggregators.length; ++i) {
   aggsForSerializingSegmentInputRow[i] = aggregators[i].getCombiningFactory();
  }
 }
 typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema()
                            .getDataSchema()
                            .getParser()
                            .getParseSpec()
                            .getDimensionsSpec());
}

  schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
);

@Test
public void testDefaultExclusions()
{
 Map<String, Object> parser = jsonMapper.convertValue(
   new StringInputRowParser(
     new JSONParseSpec(
       new TimestampSpec("time", "auto", null),
       new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dimB", "dimA")), null, null),
       null,
       null
     ),
     null
   ), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT
 );
 DataSchema schema = new DataSchema(
   "test",
   parser,
   new AggregatorFactory[]{
     new DoubleSumAggregatorFactory("metric1", "col1"),
     new DoubleSumAggregatorFactory("metric2", "col2"),
     },
   new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))),
   null,
   jsonMapper
 );
 Assert.assertEquals(
   ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"),
   schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()
 );
}

Popular methods of ParseSpec

Popular in Java

Running tasks concurrently on multiple threads
setScale (BigDecimal)
getSharedPreferences (Context)
addToBackStack (FragmentTransaction)
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
JTable (javax.swing)
Top Vim plugins

How to use getDimensionsSpecmethodin org.apache.druid.data.input.impl.ParseSpec

Best Java code snippets using org.apache.druid.data.input.impl.ParseSpec.getDimensionsSpec (Showing top 20 results out of 315)

How to use
getDimensionsSpec
method
in
org.apache.druid.data.input.impl.ParseSpec