TimestampAggregatorFactory( String name, String fieldName, String timeFormat, Comparator<Long> comparator, Long initValue ) { this.name = name; this.fieldName = fieldName; this.timeFormat = timeFormat; this.comparator = comparator; this.initValue = initValue; this.timestampSpec = new TimestampSpec(fieldName, timeFormat, null); }
@Test public void testExtractTimestampWithMissingTimestampColumn() { TimestampSpec spec = new TimestampSpec(null, null, DateTimes.EPOCH); Assert.assertEquals( DateTimes.of("1970-01-01"), spec.extractTimestamp(ImmutableMap.of("dim", "foo")) ); }
/** * imitate avro extension {@link org.apache.druid.data.input.avro.AvroParsers#parseGenericRecord} */ @Override public List<InputRow> parseBatch(GenericRecord record) { Map<String, Object> row = recordFlattener.flatten(record); final List<String> dimensions; if (!this.dimensions.isEmpty()) { dimensions = this.dimensions; } else { dimensions = Lists.newArrayList( Sets.difference(row.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) ); } // check for parquet Date // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date LogicalType logicalType = determineTimestampSpecLogicalType(record.getSchema(), timestampSpec.getTimestampColumn()); DateTime dateTime; if (logicalType instanceof LogicalTypes.Date) { int daysSinceEpoch = (Integer) record.get(timestampSpec.getTimestampColumn()); dateTime = DateTimes.utc(TimeUnit.DAYS.toMillis(daysSinceEpoch)); } else { // Fall back to a binary format that will be parsed using joda-time dateTime = timestampSpec.extractTimestamp(row); } return ImmutableList.of(new MapBasedInputRow(dateTime, dimensions, row)); }
@Test public void testSerde() throws IOException { HashMap<String, Boolean> feature = new HashMap<String, Boolean>(); feature.put("ALLOW_UNQUOTED_CONTROL_CHARS", true); JSONParseSpec spec = new JSONParseSpec( new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo")), null, null), null, feature ); final JSONParseSpec serde = (JSONParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), ParseSpec.class ); Assert.assertEquals("timestamp", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals(Arrays.asList("bar", "foo"), serde.getDimensionsSpec().getDimensionNames()); Assert.assertEquals(feature, serde.getFeatureSpec()); } }
@Override public List<InputRow> parseBatch(Map<String, Object> theMap) { final List<String> dimensions; if (!this.dimensions.isEmpty()) { dimensions = this.dimensions; } else { dimensions = Lists.newArrayList( Sets.difference(theMap.keySet(), parseSpec.getDimensionsSpec().getDimensionExclusions()) ); } final DateTime timestamp; try { timestamp = parseSpec.getTimestampSpec().extractTimestamp(theMap); if (timestamp == null) { final String input = theMap.toString(); throw new NullPointerException( StringUtils.format( "Null timestamp in input: %s", input.length() < 100 ? input : input.substring(0, 100) + "..." ) ); } } catch (Exception e) { throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp, dimensions, theMap)); }
@VisibleForTesting static String typeStringFromParseSpec(ParseSpec parseSpec) { StringBuilder builder = new StringBuilder("struct<"); builder.append(parseSpec.getTimestampSpec().getTimestampColumn()).append(":string"); // the typeString seems positionally dependent, so repeated timestamp column causes incorrect mapping if (parseSpec.getDimensionsSpec().getDimensionNames().size() > 0) { builder.append(","); builder.append(String.join( ":string,", parseSpec.getDimensionsSpec() .getDimensionNames() .stream() .filter(s -> !s.equals(parseSpec.getTimestampSpec().getTimestampColumn())) .collect(Collectors.toList()))); builder.append(":string"); } builder.append(">"); return builder.toString(); }
public DateTime extractTimestamp(Map<String, Object> input) { return parseDateTime(input.get(timestampColumn)); }
@Override public int hashCode() { int result = timestampSpec != null ? timestampSpec.hashCode() : 0; result = 31 * result + (dimensionsSpec != null ? dimensionsSpec.hashCode() : 0); return result; } }
@Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } ParseSpec parseSpec = (ParseSpec) o; if (timestampSpec != null ? !timestampSpec.equals(parseSpec.timestampSpec) : parseSpec.timestampSpec != null) { return false; } return !(dimensionsSpec != null ? !dimensionsSpec.equals(parseSpec.dimensionsSpec) : parseSpec.dimensionsSpec != null); }
@Test public void testSerde() throws IOException { RegexParseSpec spec = new RegexParseSpec( new TimestampSpec("abc", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.singletonList("abc")), null, null), "\u0001", Collections.singletonList("abc"), "abc" ); final RegexParseSpec serde = (RegexParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), ParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals("abc", serde.getPattern()); Assert.assertEquals("\u0001", serde.getListDelimiter()); Assert.assertEquals(Collections.singletonList("abc"), serde.getDimensionsSpec().getDimensionNames()); } }
parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record
final String timestampColumn = timestampSpec.getTimestampColumn(); if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensionNames().contains(timestampColumn))) { dimensionExclusions.add(timestampColumn);
static Long convertLong(TimestampSpec timestampSpec, Object input) { if (input instanceof Number) { return ((Number) input).longValue(); } else if (input instanceof DateTime) { return ((DateTime) input).getMillis(); } else if (input instanceof Timestamp) { return ((Timestamp) input).getTime(); } else if (input instanceof String) { return timestampSpec.parseDateTime(input).getMillis(); } return null; } }
@Override public int hashCode() { int result = timestampSpec != null ? timestampSpec.hashCode() : 0; result = 31 * result + (dimensionsSpec != null ? dimensionsSpec.hashCode() : 0); return result; } }
@Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } ParseSpec parseSpec = (ParseSpec) o; if (timestampSpec != null ? !timestampSpec.equals(parseSpec.timestampSpec) : parseSpec.timestampSpec != null) { return false; } return !(dimensionsSpec != null ? !dimensionsSpec.equals(parseSpec.dimensionsSpec) : parseSpec.dimensionsSpec != null); }
@JsonCreator public AvroParseSpec( @JsonProperty("timestampSpec") TimestampSpec timestampSpec, @JsonProperty("dimensionsSpec") DimensionsSpec dimensionsSpec, @JsonProperty("flattenSpec") JSONPathSpec flattenSpec ) { super( timestampSpec != null ? timestampSpec : new TimestampSpec(null, null, null), dimensionsSpec != null ? dimensionsSpec : DimensionsSpec.EMPTY, flattenSpec != null ? flattenSpec : JSONPathSpec.DEFAULT ); }
@Test public void testSerde() throws IOException { DelimitedParseSpec spec = new DelimitedParseSpec( new TimestampSpec("abc", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Collections.singletonList("abc")), null, null), "\u0001", "\u0002", Collections.singletonList("abc"), false, 0 ); final DelimitedParseSpec serde = jsonMapper.readValue( jsonMapper.writeValueAsString(spec), DelimitedParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); Assert.assertEquals(Collections.singletonList("abc"), serde.getColumns()); Assert.assertEquals("\u0001", serde.getDelimiter()); Assert.assertEquals("\u0002", serde.getListDelimiter()); Assert.assertEquals(Collections.singletonList("abc"), serde.getDimensionsSpec().getDimensionNames()); }
@Test public void testExtractTimestamp() { TimestampSpec spec = new TimestampSpec("TIMEstamp", "yyyy-MM-dd", null); Assert.assertEquals( DateTimes.of("2014-03-01"), spec.extractTimestamp(ImmutableMap.of("TIMEstamp", "2014-03-01")) ); }
parseSpec.getTimestampSpec().extractTimestamp(record), dimensions, record
String tsField = parseSpec.getTimestampSpec().getTimestampColumn();