@JsonCreator public ProtobufInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("descriptor") String descriptorFilePath, @JsonProperty("protoMessageType") String protoMessageType ) { this.parseSpec = parseSpec; this.descriptorFilePath = descriptorFilePath; this.protoMessageType = protoMessageType; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
@JsonCreator public MapInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec ) { this.parseSpec = parseSpec; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
@JsonCreator public ThriftInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("thriftJar") String jarPath, @JsonProperty("thriftClass") String thriftClassName ) { this.jarPath = jarPath; this.thriftClassName = thriftClassName; Preconditions.checkNotNull(thriftClassName, "thrift class name"); this.parseSpec = parseSpec; this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); }
@JsonCreator public RegexParseSpec( @JsonProperty("timestampSpec") TimestampSpec timestampSpec, @JsonProperty("dimensionsSpec") DimensionsSpec dimensionsSpec, @JsonProperty("listDelimiter") String listDelimiter, @JsonProperty("columns") List<String> columns, @JsonProperty("pattern") String pattern ) { super(timestampSpec, dimensionsSpec); this.listDelimiter = listDelimiter; this.columns = columns; this.pattern = pattern; verify(dimensionsSpec.getDimensionNames()); }
@VisibleForTesting static String typeStringFromParseSpec(ParseSpec parseSpec) { StringBuilder builder = new StringBuilder("struct<"); builder.append(parseSpec.getTimestampSpec().getTimestampColumn()).append(":string"); // the typeString seems positionally dependent, so repeated timestamp column causes incorrect mapping if (parseSpec.getDimensionsSpec().getDimensionNames().size() > 0) { builder.append(","); builder.append(String.join( ":string,", parseSpec.getDimensionsSpec() .getDimensionNames() .stream() .filter(s -> !s.equals(parseSpec.getTimestampSpec().getTimestampColumn())) .collect(Collectors.toList()))); builder.append(":string"); } builder.append(">"); return builder.toString(); }
@JsonCreator public OrcHadoopInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("typeString") String typeString, @JsonProperty("mapFieldNameFormat") String mapFieldNameFormat ) { this.parseSpec = parseSpec; this.typeString = typeString == null ? typeStringFromParseSpec(parseSpec) : typeString; this.mapFieldNameFormat = mapFieldNameFormat == null || !mapFieldNameFormat.contains(MAP_PARENT_TAG) || !mapFieldNameFormat.contains(MAP_CHILD_TAG) ? DEFAULT_MAP_FIELD_NAME_FORMAT : mapFieldNameFormat; this.mapParentFieldNameFormat = StringUtils.replace(this.mapFieldNameFormat, MAP_PARENT_TAG, "%s"); this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); this.oip = makeObjectInspector(this.typeString); }
private void verify(List<SpatialDimensionSchema> spatialDimensions) { List<String> dimNames = getDimensionNames(); Preconditions.checkArgument( Sets.intersection(this.dimensionExclusions, Sets.newHashSet(dimNames)).isEmpty(), "dimensions and dimensions exclusions cannot overlap" ); ParserUtils.validateFields(dimNames); ParserUtils.validateFields(dimensionExclusions); List<String> spatialDimNames = Lists.transform( spatialDimensions, new Function<SpatialDimensionSchema, String>() { @Override public String apply(SpatialDimensionSchema input) { return input.getDimName(); } } ); // Don't allow duplicates between main list and deprecated spatial list ParserUtils.validateFields(Iterables.concat(dimNames, spatialDimNames)); }
Preconditions.checkArgument(!column.contains(","), "Column[%s] has a comma, it cannot", column); verify(dimensionsSpec.getDimensionNames()); } else { Preconditions.checkArgument(
Preconditions.checkArgument(!column.contains(","), "Column[%s] has a comma, it cannot", column); verify(dimensionsSpec.getDimensionNames()); } else { Preconditions.checkArgument(
@JsonCreator public ParquetAvroHadoopInputRowParser( @JsonProperty("parseSpec") ParseSpec parseSpec, @JsonProperty("binaryAsString") Boolean binaryAsString ) { this.parseSpec = parseSpec; this.timestampSpec = parseSpec.getTimestampSpec(); this.dimensions = parseSpec.getDimensionsSpec().getDimensionNames(); this.binaryAsString = binaryAsString == null ? false : binaryAsString; final JSONPathSpec flattenSpec; if (parseSpec != null && (parseSpec instanceof AvroParseSpec)) { flattenSpec = ((AvroParseSpec) parseSpec).getFlattenSpec(); } else { flattenSpec = JSONPathSpec.DEFAULT; } this.recordFlattener = ObjectFlatteners.create( flattenSpec, new AvroFlattenerMaker(false, this.binaryAsString) ); }
private static IndexIOConfig createIoConfig(TaskToolbox toolbox, DataSchema dataSchema, Interval interval) { return new IndexIOConfig( new IngestSegmentFirehoseFactory( dataSchema.getDataSource(), interval, null, // no filter // set dimensions and metrics names to make sure that the generated dataSchema is used for the firehose dataSchema.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(), Arrays.stream(dataSchema.getAggregators()).map(AggregatorFactory::getName).collect(Collectors.toList()), toolbox.getIndexIO() ), false ); }
if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensionNames().contains(timestampColumn))) { dimensionExclusions.add(timestampColumn); metSet.add(aggregator.getName()); final Set<String> dimSet = Sets.newHashSet(dimensionsSpec.getDimensionNames()); final Set<String> overlap = Sets.intersection(metSet, dimSet); if (!overlap.isEmpty()) {
@Test public void testSerde() throws IOException { final String json = "{" + "\"format\":\"timeAndDims\", " + "\"timestampSpec\": {\"column\":\"timestamp\"}, " + "\"dimensionsSpec\":{}" + "}"; final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT); final ParseSpec parseSpec = mapper.convertValue(mapValue, ParseSpec.class); Assert.assertEquals(TimeAndDimsParseSpec.class, parseSpec.getClass()); Assert.assertEquals("timestamp", parseSpec.getTimestampSpec().getTimestampColumn()); Assert.assertEquals(ImmutableList.of(), parseSpec.getDimensionsSpec().getDimensionNames()); // Test round-trip. Assert.assertEquals( parseSpec, mapper.readValue(mapper.writeValueAsString(parseSpec), ParseSpec.class) ); }
List<String> dims; if (config.getParser().getParseSpec().getDimensionsSpec().hasCustomDimensions()) { dims = config.getParser().getParseSpec().getDimensionsSpec().getDimensionNames(); } else { Set<String> dimSet = Sets.newHashSet(
dims = dimensions; } else if (inputRowParser.getParseSpec().getDimensionsSpec().hasCustomDimensions()) { dims = inputRowParser.getParseSpec().getDimensionsSpec().getDimensionNames(); } else { dims = getUniqueDimensions(
@Test public void testSerde() throws IOException { HashMap<String, Boolean> feature = new HashMap<String, Boolean>(); feature.put("ALLOW_UNQUOTED_CONTROL_CHARS", true); JSONParseSpec spec = new JSONParseSpec( new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo")), null, null), null, feature ); final JSONParseSpec serde = (JSONParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), ParseSpec.class ); Assert.assertEquals("timestamp", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals(Arrays.asList("bar", "foo"), serde.getDimensionsSpec().getDimensionNames()); Assert.assertEquals(feature, serde.getFeatureSpec()); } }
@Test public void testSerde() throws IOException { RegexParseSpec spec = new RegexParseSpec( new TimestampSpec("abc", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.singletonList("abc")), null, null), "\u0001", Collections.singletonList("abc"), "abc" ); final RegexParseSpec serde = (RegexParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), ParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals("abc", serde.getPattern()); Assert.assertEquals("\u0001", serde.getListDelimiter()); Assert.assertEquals(Collections.singletonList("abc"), serde.getDimensionsSpec().getDimensionNames()); } }
@Test public void testSerde() throws IOException { DelimitedParseSpec spec = new DelimitedParseSpec( new TimestampSpec("abc", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.singletonList("abc")), null, null), "\u0001", "\u0002", Collections.singletonList("abc"), false, 0 ); final DelimitedParseSpec serde = jsonMapper.readValue( jsonMapper.writeValueAsString(spec), DelimitedParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals(Collections.singletonList("abc"), serde.getColumns()); Assert.assertEquals("\u0001", serde.getDelimiter()); Assert.assertEquals("\u0002", serde.getListDelimiter()); Assert.assertEquals(Collections.singletonList("abc"), serde.getDimensionsSpec().getDimensionNames()); }
@Test public void testSerde() throws IOException { jsonMapper.setInjectableValues( new InjectableValues.Std().addValue( JavaScriptConfig.class, JavaScriptConfig.getEnabledInstance() ) ); JavaScriptParseSpec spec = new JavaScriptParseSpec( new TimestampSpec("abc", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.singletonList("abc")), null, null), "abc", JavaScriptConfig.getEnabledInstance() ); final JavaScriptParseSpec serde = (JavaScriptParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), ParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals("abc", serde.getFunction()); Assert.assertEquals(Collections.singletonList("abc"), serde.getDimensionsSpec().getDimensionNames()); }
new HashSet<>(expectedDimensionsSpec.getDimensionNames()), new HashSet<>(ingestSegmentFirehoseFactory.getDimensions()) );