public SchemaBuilder addDateTime(@Nonnull String name, @Nonnull DataType dataType, @Nonnull String format, @Nonnull String granularity) { _schema.addField(new DateTimeFieldSpec(name, dataType, format, granularity)); return this; }
/** * Converts the time column value from timeFieldSpec to dateTimeFieldSpec * @param timeColumnValue - time column value from timeFieldSpec * @return */ private Object convertTimeFieldToDateTimeFieldSpec(Object timeColumnValue) { TimeGranularitySpec timeGranularitySpec = _timeFieldSpec.getOutgoingGranularitySpec(); DateTimeFormatSpec formatFromTimeSpec = new DateTimeFormatSpec(timeGranularitySpec.getTimeUnitSize(), timeGranularitySpec.getTimeType().toString(), timeGranularitySpec.getTimeFormat()); if (formatFromTimeSpec.getFormat().equals(_dateTimeFieldSpec.getFormat())) { return timeColumnValue; } long timeColumnValueMS = timeGranularitySpec.toMillis(timeColumnValue); DateTimeFormatSpec toFormat = new DateTimeFormatSpec(_dateTimeFieldSpec.getFormat()); return toFormat.fromMillisToFormat(timeColumnValueMS, Object.class); }
public BackfillDateTimeRecordReader(RecordReader baseRecordReader, TimeFieldSpec timeFieldSpec, DateTimeFieldSpec dateTimeFieldSpec) { _baseRecordReader = baseRecordReader; _timeFieldSpec = timeFieldSpec; _dateTimeFieldSpec = dateTimeFieldSpec; _schema = baseRecordReader.getSchema(); // Add/replace the date time field spec to the schema _schema.removeField(_dateTimeFieldSpec.getName()); _schema.addField(_dateTimeFieldSpec); }
/** * Test {@link TimeFieldSpec} constructors. */ @Test public void testDateTimeFieldSpecConstructor() { String name = "Date"; String format = "1:HOURS:EPOCH"; String granularity = "1:HOURS"; DateTimeFieldSpec dateTimeFieldSpec1 = new DateTimeFieldSpec(name, LONG, format, granularity); DateTimeFieldSpec dateTimeFieldSpec2 = new DateTimeFieldSpec(name, INT, format, granularity); Assert.assertFalse(dateTimeFieldSpec1.equals(dateTimeFieldSpec2)); DateTimeFieldSpec dateTimeFieldSpec3 = new DateTimeFieldSpec(name, LONG, format, granularity); Assert.assertEquals(dateTimeFieldSpec1, dateTimeFieldSpec3); }
Assert.assertEquals(dateTimeFieldSpec.getFieldType(), FieldSpec.FieldType.DATE_TIME); Assert.assertEquals(dateTimeFieldSpec.getName(), "dateTime"); Assert.assertEquals(dateTimeFieldSpec.getDataType(), FieldSpec.DataType.LONG); Assert.assertEquals(dateTimeFieldSpec.isSingleValueField(), true); Assert.assertEquals(dateTimeFieldSpec.getDefaultNullValue(), Long.MIN_VALUE); Assert.assertEquals(dateTimeFieldSpec.getFormat(), "1:HOURS:EPOCH"); Assert.assertEquals(dateTimeFieldSpec.getGranularity(), "1:HOURS");
private List<GenericRow> createTestDataWithTimespec(TimeFieldSpec timeFieldSpec, DateTimeFieldSpec dateTimeFieldSpec) { List<GenericRow> rows = new ArrayList<>(); Random random = new Random(); Map<String, Object> fields; for (int i = 0; i < NUM_ROWS; i++) { fields = new HashMap<>(); fields.put(D1, RandomStringUtils.randomAlphabetic(2)); fields.put(D2, RandomStringUtils.randomAlphabetic(5)); fields.put(M1, Math.abs(random.nextInt())); fields.put(M2, Math.abs(random.nextFloat())); long timestamp = System.currentTimeMillis(); Object timeColumnValue = timeFieldSpec.getIncomingGranularitySpec().fromMillis(timestamp); fields.put(timeFieldSpec.getName(), timeColumnValue); DateTimeFormatSpec toFormat = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); Object dateTimeColumnValue = toFormat.fromMillisToFormat(timestamp, Object.class); fields.put(dateTimeFieldSpec.getName(), dateTimeColumnValue); GenericRow row = new GenericRow(); row.init(fields); rows.add(row); } return rows; }
DateTimeFieldSpec dateTimeFieldSpec = (DateTimeFieldSpec) fieldSpec; properties.setProperty(V1Constants.MetadataKeys.Column.getKeyFor(column, DATETIME_FORMAT), dateTimeFieldSpec.getFormat()); properties.setProperty(V1Constants.MetadataKeys.Column.getKeyFor(column, DATETIME_GRANULARITY), dateTimeFieldSpec.getGranularity());
@Test(dataProvider = "backfillRecordReaderDataProvider") public void testBackfillDateTimeRecordReader(RecordReader baseRecordReader, TimeFieldSpec timeFieldSpec, DateTimeFieldSpec dateTimeFieldSpec, Schema schemaExpected) throws Exception { BackfillDateTimeColumn backfillDateTimeColumn = new BackfillDateTimeColumn(new File("original"), new File("backup"), timeFieldSpec, dateTimeFieldSpec); try (BackfillDateTimeRecordReader wrapperReader = backfillDateTimeColumn .getBackfillDateTimeRecordReader(baseRecordReader)) { // check that schema has new column Schema schemaActual = wrapperReader.getSchema(); Assert.assertEquals(schemaActual, schemaExpected); DateTimeFieldSpec dateTimeFieldSpecActual = schemaActual.getDateTimeSpec(dateTimeFieldSpec.getName()); TimeFieldSpec timeFieldSpecActual = schemaActual.getTimeFieldSpec(); Assert.assertEquals(dateTimeFieldSpecActual, dateTimeFieldSpec); Assert.assertEquals(timeFieldSpecActual, timeFieldSpec); while (wrapperReader.hasNext()) { GenericRow next = wrapperReader.next(); // check that new datetime column is generated Object dateTimeColumnValueActual = next.getValue(dateTimeFieldSpec.getName()); Assert.assertNotNull(dateTimeColumnValueActual); Object timeColumnValueActual = next.getValue(timeFieldSpec.getName()); Assert.assertNotNull(timeColumnValueActual); // check that datetime column has correct value as per its format Long timeColumnValueMS = timeFieldSpec.getIncomingGranularitySpec().toMillis(timeColumnValueActual); DateTimeFormatSpec toFormat = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat()); Object dateTimeColumnValueExpected = toFormat.fromMillisToFormat(timeColumnValueMS, Object.class); Assert.assertEquals(dateTimeColumnValueActual, dateTimeColumnValueExpected); } } }
@Test(dataProvider = "testFormatDataProvider") public void testDateTimeFormat(String name, FieldSpec.DataType dataType, String format, String granularity, boolean exceptionExpected, DateTimeFieldSpec dateTimeFieldExpected) { DateTimeFieldSpec dateTimeFieldActual = null; boolean exceptionActual = false; try { dateTimeFieldActual = new DateTimeFieldSpec(name, dataType, format, granularity); } catch (IllegalArgumentException e) { exceptionActual = true; } Assert.assertEquals(exceptionActual, exceptionExpected); Assert.assertEquals(dateTimeFieldActual, dateTimeFieldExpected); }
/** * Reads the next row from the baseRecordReader, and adds a dateTimeFieldSPec column to it * {@inheritDoc} * @see org.apache.pinot.core.data.readers.RecordReader#next(org.apache.pinot.core.data.GenericRow) */ @Override public GenericRow next(GenericRow reuse) throws IOException { reuse = _baseRecordReader.next(reuse); Long timeColumnValue = (Long) reuse.getValue(_timeFieldSpec.getName()); Object dateTimeColumnValue = convertTimeFieldToDateTimeFieldSpec(timeColumnValue); reuse.putField(_dateTimeFieldSpec.getName(), dateTimeColumnValue); return reuse; }
@DataProvider(name = "testFormatDataProvider") public Object[][] provideTestFormatData() { String name = "Date"; FieldSpec.DataType dataType = LONG; String granularity = "1:HOURS"; List<Object[]> entries = new ArrayList<>(); entries.add(new Object[]{name, dataType, "1:hours", granularity, true, null}); entries.add(new Object[]{name, dataType, "one_hours", granularity, true, null}); entries.add(new Object[]{name, dataType, "1:HOURS:SIMPLE_DATE_FORMAT", granularity, true, null}); entries.add(new Object[]{name, dataType, "1:hour:EPOCH", granularity, true, null}); entries.add(new Object[]{name, dataType, "1:HOUR:EPOCH:yyyyMMdd", granularity, true, null}); entries.add(new Object[]{name, dataType, "0:HOURS:EPOCH", granularity, true, null}); entries.add(new Object[]{name, dataType, "-1:HOURS:EPOCH", granularity, true, null}); entries.add(new Object[]{name, dataType, "0.1:HOURS:EPOCH", granularity, true, null}); entries.add(new Object[]{name, dataType, "1:HOURS:EPOCH", granularity, false, new DateTimeFieldSpec(name, dataType, "1:HOURS:EPOCH", granularity)}); entries.add( new Object[]{name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", granularity, false, new DateTimeFieldSpec( name, dataType, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", granularity)}); return entries.toArray(new Object[entries.size()][]); }
break; case DATE_TIME: this.fieldSpec = new DateTimeFieldSpec(columnName, dataType, dateTimeFormat, dateTimeGranularity); break; default:
DateTimeFieldSpec dateTimeFieldSpec1 = JsonUtils.stringToObject(getRandomOrderJsonString(dateTimeFields), DateTimeFieldSpec.class); DateTimeFieldSpec dateTimeFieldSpec2 = new DateTimeFieldSpec("Date", LONG, "1:MILLISECONDS:EPOCH", "5:MINUTES"); Assert.assertEquals(dateTimeFieldSpec1, dateTimeFieldSpec2, ERROR_MESSAGE);
inputSchema = createPinotSchemaWithTimeSpec(timeFieldSpec); inputRecordReader = new GenericRowRecordReader(inputData, inputSchema); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS"); wrapperSchema = createPinotSchemaWrapperWithDateTimeSpec(inputSchema, dateTimeFieldSpec); entries.add(new Object[]{inputRecordReader, timeFieldSpec, dateTimeFieldSpec, wrapperSchema}); inputRecordReader = new GenericRowRecordReader(inputData, inputSchema); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd", "1:HOURS"); wrapperSchema = createPinotSchemaWrapperWithDateTimeSpec(inputSchema, dateTimeFieldSpec); entries.add(new Object[]{inputRecordReader, timeFieldSpec, dateTimeFieldSpec, wrapperSchema}); inputSchema = createPinotSchemaWithTimeSpec(timeFieldSpec); inputRecordReader = new GenericRowRecordReader(inputData, inputSchema); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "1:HOURS:EPOCH", "1:HOURS"); wrapperSchema = createPinotSchemaWrapperWithDateTimeSpec(inputSchema, dateTimeFieldSpec); entries.add(new Object[]{inputRecordReader, timeFieldSpec, dateTimeFieldSpec, wrapperSchema}); inputSchema = createPinotSchemaWithTimeSpec(timeFieldSpec); inputRecordReader = new GenericRowRecordReader(inputData, inputSchema); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "5:MILLISECONDS:EPOCH", "1:HOURS"); wrapperSchema = createPinotSchemaWrapperWithDateTimeSpec(inputSchema, dateTimeFieldSpec); entries.add(new Object[]{inputRecordReader, timeFieldSpec, dateTimeFieldSpec, wrapperSchema}); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS"); inputData = createTestDataWithTimespec(timeFieldSpec, dateTimeFieldSpec); inputSchema = createPinotSchemaWithTimeSpec(timeFieldSpec, dateTimeFieldSpec); dateTimeFieldSpec = new DateTimeFieldSpec("timestampInEpoch", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS");