public ReadDefinition newMessage() { return new ReadDefinition(); }
NamespaceService ns = mock(NamespaceService.class); when(ns.getDataset(any())) .thenReturn(new DatasetConfig().setReadDefinition(new ReadDefinition()));
new DatasetConfig() .setTag("0") .setReadDefinition(new ReadDefinition()) );
dataset.setReadDefinition(new ReadDefinition()); final ReadDefinition readDefinition = new ReadDefinition() .setExtendedProperty(ByteString.copyFrom(tableAttributes.toByteArray())) .setSplitVersion(0L);
public void savePhysicalDataset(List<String> path, DatasetType type, long splitVersion, int count) throws NamespaceException { final ReadDefinition readDefinition = new ReadDefinition() .setSplitVersion(splitVersion); final DatasetConfig datasetConfig = saveDataset(path, type, config -> config.setReadDefinition(readDefinition)); generateSplits(splitVersion, count) .forEach(split -> splitsStore.put(DatasetSplitId.of(datasetConfig, split, splitVersion), split)); }
final ParquetGroupScanUtils parquetGroupScanUtils = ((ParquetFormatPlugin) formatPlugin).getGroupScan(SYSTEM_USERNAME, fsPlugin, fileSelection, datasetConfig.getFullPathList(), GroupScan.ALL_COLUMNS, schema, null); this.cachedParquetReadDefinition = new ReadDefinition();
@Override public DatasetConfig getDataset() throws Exception { final DatasetConfig dataset; if(oldDataset == null) { dataset = new DatasetConfig() .setFullPathList(key.getPathComponents()) .setId(new EntityId(UUID.randomUUID().toString())) .setType(DatasetType.PHYSICAL_DATASET); } else { dataset = oldDataset; } return dataset .setName(key.getName()) .setReadDefinition(new ReadDefinition() .setScanStats(new ScanStats().setRecordCount(100l) .setScanFactor(ScanCostFactor.OTHER.getFactor()))) .setOwner(SystemUser.SYSTEM_USERNAME) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(getSchema().toByteString()) .setSchemaVersion(DatasetHelper.CURRENT_VERSION); }
@Override public DatasetConfig getDataset() { final DatasetConfig dataset; if(oldDataset == null) { dataset = new DatasetConfig() .setFullPathList(key.getPathComponents()) .setId(new EntityId(UUID.randomUUID().toString())) .setType(DatasetType.PHYSICAL_DATASET); } else { dataset = oldDataset; } return dataset .setName(key.getName()) .setReadDefinition(new ReadDefinition() .setScanStats(new ScanStats().setRecordCount(100l) .setScanFactor(ScanCostFactor.OTHER.getFactor()))) .setOwner(SystemUser.SYSTEM_USERNAME) .setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(getSchema().toByteString()) .setSchemaVersion(DatasetHelper.CURRENT_VERSION); }
private void buildAll(DatasetConfig datasetConfig) throws Exception { final EasyGroupScanUtils easyGroupScanUtils = ((EasyFormatPlugin) formatPlugin).getGroupScan(SYSTEM_USERNAME, fsPlugin, fileSelection, GroupScan.ALL_COLUMNS); cachedMetadata = new ReadDefinition() .setLastRefreshDate(System.currentTimeMillis()) .setScanStats(MetadataUtils.fromPojoScanStats(easyGroupScanUtils.getScanStats()).setScanFactor(ScanCostFactor.EASY.getFactor())) .setReadSignature(ByteString.copyFrom(FILE_UPDATE_KEY_SERIALIZER.serialize(updateKey))) .setPartitionColumnsList(MetadataUtils.getStringColumnNames(easyGroupScanUtils.getPartitionColumns())) .setExtendedProperty( ByteString.copyFrom(EasyDatasetXAttrSerDe.EASY_DATASET_XATTR_SERIALIZER.serialize( new EasyDatasetXAttr().setSelectionRoot(fileSelection.getSelectionRoot())))); //cachedMetadata.setSortColumnsList(easyGroupScanUtils.getSortColumns()); // TODO(AH) probably not needed since they are set in layout info? // compute splits this.cachedSplits = getSplits(datasetConfig, easyGroupScanUtils); this.builtAll = true; }
private static SourceTableDefinition newDataset(final String dsPath) { final List<String> path = SqlUtils.parseSchemaPath(dsPath); SourceTableDefinition ret = mock(SourceTableDefinition.class); NamespaceKey datasetName = new NamespaceKey(path); when(ret.getName()).thenReturn(datasetName); BatchSchema schema = BatchSchema.newBuilder() .addField(new Field("string", FieldType.nullable(ArrowType.Utf8.INSTANCE), null)) .build(); DatasetConfig dsConfig = new DatasetConfig() .setName(Util.last(path)) .setFullPathList(path) .setType(DatasetType.PHYSICAL_DATASET_SOURCE_FILE) .setRecordSchema(ByteString.EMPTY) .setPhysicalDataset( new PhysicalDataset() .setFormatSettings(null)) .setSchemaVersion(DatasetHelper.CURRENT_VERSION) .setRecordSchema(schema.toByteString()) .setReadDefinition(new ReadDefinition()); try { when(ret.getDataset()).thenReturn(dsConfig); } catch (Exception ignored) { } when(ret.getType()).thenReturn(DatasetType.PHYSICAL_DATASET_SOURCE_FILE); when(ret.isSaveable()).thenReturn(true); return ret; }
final ReadDefinition readDefinition = new ReadDefinition() .setExtendedProperty( ByteString.copyFrom(
.setPhysicalDataset(new PhysicalDataset()) .setRecordSchema(batchSchema.toByteString()) .setReadDefinition(new ReadDefinition() .setPartitionColumnsList(partitionColumns) .setSortColumnsList(FluentIterable.from(table.getSd().getSortCols())
private DatasetConfig addDataset(IndexedStore<byte[], NameSpaceContainer> namespace, IndexedStore<DatasetSplitId, DatasetSplit> splitsStore, String id, List<String> path, int splits) { DatasetConfig ds = new DatasetConfig() .setId(new EntityId(id)) .setName(last(path)) .setFullPathList(path) .setType(DatasetType.PHYSICAL_DATASET) .setReadDefinition(new ReadDefinition().setSplitVersion(42L)); namespace.put( NamespaceServiceImpl.getKey(new NamespaceKey(path)), new NameSpaceContainer().setType(NameSpaceContainer.Type.DATASET).setFullPathList(path).setDataset(ds)); for(int i = 0; i < splits; i++) { final String splitKey = Integer.toString(i); DatasetSplit split = new DatasetSplit() .setSplitVersion(42L) .setSplitKey(splitKey); // Generate an older dataset split id DatasetSplitId splitId = UnsafeDatasetSplitIdHelper.of(ds, splitKey); splitsStore.put(splitId, split); } return ds; }
@Test public void testIdWithPercentageFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1%test")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1%25test_0_s1", split1.getSplitId()); assertEquals("ds1%25test_0_s2", split2.getSplitId()); assertEquals("ds1%25test_0_s3", split3.getSplitId()); }
@Test public void testIdFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1_0_s1", split1.getSplitId()); assertEquals("ds1_0_s2", split2.getSplitId()); assertEquals("ds1_0_s3", split3.getSplitId()); }
@Test public void testIdWithUnderscoreFromConfig() throws Exception { DatasetConfig datasetConfig = new DatasetConfig() .setId(new EntityId().setId("ds1_test")) .setReadDefinition(new ReadDefinition().setSplitVersion(0L)); DatasetSplitId split1 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s1"), 0L); DatasetSplitId split2 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s2"), 0L); DatasetSplitId split3 = DatasetSplitId.of(datasetConfig, new DatasetSplit().setSplitKey("s3"), 0L); assertEquals("ds1%5Ftest_0_s1", split1.getSplitId()); assertEquals("ds1%5Ftest_0_s2", split2.getSplitId()); assertEquals("ds1%5Ftest_0_s3", split3.getSplitId()); }
ReadDefinition readDefinition = new ReadDefinition();