public Pair<IFileSplitProvider, AlgebricksPartitionConstraint> getSplitProviderAndConstraints(Dataset ds) throws AlgebricksException { return getSplitProviderAndConstraints(ds, ds.getDatasetName()); }
metaType == null ? null : SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(metaType); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName()); secondaryFileSplitProvider = secondarySplitsAndConstraint.first; secondaryPartitionConstraint = secondarySplitsAndConstraint.second; metadataProvider.getSplitProviderAndConstraints(dataset); primaryFileSplitProvider = primarySplitsAndConstraint.first; primaryPartitionConstraint = primarySplitsAndConstraint.second;
public static JobSpecification buildCommitJob(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager(); ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>(); AlgebricksPartitionConstraint constraints = null; for (Index index : indexes) { IFileSplitProvider indexSplitProvider; if (isValidIndexName(index.getDatasetName(), index.getIndexName())) { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName()); indexSplitProvider = sAndConstraints.first; constraints = sAndConstraints.second; } else { indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first; } IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider); treeDataflowHelperFactories.add(indexDataflowHelperFactory); } ExternalDatasetIndexesCommitOperatorDescriptor op = new ExternalDatasetIndexesCommitOperatorDescriptor(spec, treeDataflowHelperFactories); spec.addRoot(op); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
public static JobSpecification buildAbortOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager(); ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>(); AlgebricksPartitionConstraint constraints = null; for (Index index : indexes) { IFileSplitProvider indexSplitProvider; if (isValidIndexName(index.getDatasetName(), index.getIndexName())) { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName()); indexSplitProvider = sAndConstraints.first; constraints = sAndConstraints.second; } else { indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first; } IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider); treeDataflowHelperFactories.add(indexDataflowHelperFactory); } ExternalDatasetIndexesAbortOperatorDescriptor op = new ExternalDatasetIndexesAbortOperatorDescriptor(spec, treeDataflowHelperFactories); spec.addRoot(op); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
public static JobSpecification buildRecoverOp(Dataset ds, List<Index> indexes, MetadataProvider metadataProvider) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); IStorageManager storageMgr = metadataProvider.getStorageComponentProvider().getStorageManager(); ArrayList<IIndexDataflowHelperFactory> treeDataflowHelperFactories = new ArrayList<>(); AlgebricksPartitionConstraint constraints = null; for (Index index : indexes) { IFileSplitProvider indexSplitProvider; if (isValidIndexName(index.getDatasetName(), index.getIndexName())) { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> sAndConstraints = metadataProvider.getSplitProviderAndConstraints(ds, index.getIndexName()); indexSplitProvider = sAndConstraints.first; constraints = sAndConstraints.second; } else { indexSplitProvider = metadataProvider.getSplitProviderAndConstraints(ds, IndexingConstants.getFilesIndexName(ds.getDatasetName())).first; } IIndexDataflowHelperFactory indexDataflowHelperFactory = new IndexDataflowHelperFactory(storageMgr, indexSplitProvider); treeDataflowHelperFactories.add(indexDataflowHelperFactory); } ExternalDatasetIndexesRecoverOperatorDescriptor op = new ExternalDatasetIndexesRecoverOperatorDescriptor(spec, treeDataflowHelperFactories); spec.addRoot(op); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, op, constraints); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
public static JobSpecification dropDatasetJobSpec(Dataset dataset, MetadataProvider metadataProvider) throws AlgebricksException, ACIDException { LOGGER.info("DROP DATASET: " + dataset); if (dataset.getDatasetType() == DatasetType.EXTERNAL) { return RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); } JobSpecification specPrimary = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second); specPrimary.addRoot(primaryBtreeDrop); return specPrimary; }
metadataProvider.getSplitProviderAndConstraints(dataset); FileSplit[] fs = splitsAndConstraint.first.getFileSplits(); StringBuilder sb = new StringBuilder();
public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException { String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
public static JobSpecification buildDropFilesIndexJobSpec(MetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException { String indexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
@Override public JobSpecification buildDropJobSpec(Set<DropOption> options) throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName()); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); // The index drop operation should be persistent regardless of temp datasets or permanent dataset. IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, dataflowHelperFactory, options); btreeDrop.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; }
getSplitProviderAndConstraints(dataset); long numElementsHint = getCardinalityPerPartitionHint(dataset);
@Override public JobSpecification buildCompactJobSpec() throws AlgebricksException { JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, index.getIndexName()); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, dataflowHelperFactory); compactOp.setSourceLocation(sourceLoc); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, secondaryPartitionConstraint); spec.addRoot(compactOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; } }
Dataset dataset) throws AlgebricksException { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); IFileSplitProvider primaryFileSplitProvider = primarySplitsAndConstraint.first; AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
public static JobSpecification buildFilesIndexUpdateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider .getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName())); IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first; IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( storageComponentProvider.getStorageManager(), secondaryFileSplitProvider); ExternalFilesIndexModificationOperatorDescriptor externalFilesOp = new ExternalFilesIndexModificationOperatorDescriptor(spec, dataflowHelperFactory, externalFilesSnapshot); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second); spec.addRoot(externalFilesOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }
public static JobSpecification compactDatasetJobSpec(Dataverse dataverse, String datasetName, MetadataProvider metadataProvider) throws AlgebricksException { String dataverseName = dataverse.getDataverseName(); Dataset dataset = metadataProvider.findDataset(dataverseName, datasetName); if (dataset == null) { throw new AsterixException("Could not find dataset " + datasetName + " in dataverse " + dataverseName); } JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); IIndexDataflowHelperFactory indexHelperFactory = new IndexDataflowHelperFactory( metadataProvider.getStorageComponentProvider().getStorageManager(), splitsAndConstraint.first); LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, indexHelperFactory); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); spec.addRoot(compactOp); return spec; }
MetadataProvider metadataProvider) throws AlgebricksException { Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset); AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second;
RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, indexName);
public static void flushDataset(IHyracksClientConnection hcc, MetadataProvider metadataProvider, Dataset dataset) throws Exception { CompilerProperties compilerProperties = metadataProvider.getApplicationContext().getCompilerProperties(); int frameSize = compilerProperties.getFrameSize(); JobSpecification spec = new JobSpecification(frameSize); RecordDescriptor[] rDescs = new RecordDescriptor[] { new RecordDescriptor(new ISerializerDeserializer[] {}) }; AlgebricksMetaOperatorDescriptor emptySource = new AlgebricksMetaOperatorDescriptor(spec, 0, 1, new IPushRuntimeFactory[] { new EmptyTupleSourceRuntimeFactory() }, rDescs); TxnId txnId = metadataProvider.getTxnIdFactory().create(); FlushDatasetOperatorDescriptor flushOperator = new FlushDatasetOperatorDescriptor(spec, txnId, dataset.getDatasetId()); spec.connect(new OneToOneConnectorDescriptor(spec), emptySource, 0, flushOperator, 0); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider.getSplitProviderAndConstraints(dataset, dataset.getDatasetName()); AlgebricksPartitionConstraint primaryPartitionConstraint = primarySplitsAndConstraint.second; AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, emptySource, primaryPartitionConstraint); JobEventListenerFactory jobEventListenerFactory = new JobEventListenerFactory(txnId, true); spec.setJobletEventListenerFactory(jobEventListenerFactory); JobUtils.runJob(hcc, spec, true); }
String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc = metadataProvider.getSplitProviderAndConstraints(dataset, fileIndexName); Index fileIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName);
public static JobSpecification buildFilesIndexCreateJobSpec(Dataset dataset, List<ExternalFile> externalFilesSnapshot, MetadataProvider metadataProvider) throws AlgebricksException { IStorageComponentProvider storageComponentProvider = metadataProvider.getStorageComponentProvider(); JobSpecification spec = RuntimeUtils.createJobSpecification(metadataProvider.getApplicationContext()); Pair<ILSMMergePolicyFactory, Map<String, String>> compactionInfo = DatasetUtil.getMergePolicyFactory(dataset, metadataProvider.getMetadataTxnContext()); ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first; Map<String, String> mergePolicyProperties = compactionInfo.second; Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider .getSplitProviderAndConstraints(dataset, IndexingConstants.getFilesIndexName(dataset.getDatasetName())); IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first; String fileIndexName = IndexingConstants.getFilesIndexName(dataset.getDatasetName()); Index fileIndex = MetadataManager.INSTANCE.getIndex(metadataProvider.getMetadataTxnContext(), dataset.getDataverseName(), dataset.getDatasetName(), fileIndexName); ARecordType recordType = (ARecordType) metadataProvider.findType(dataset.getItemTypeDataverseName(), dataset.getItemTypeName()); IResourceFactory resourceFactory = dataset.getResourceFactory(metadataProvider, fileIndex, recordType, null, mergePolicyFactory, mergePolicyProperties); IIndexBuilderFactory indexBuilderFactory = new IndexBuilderFactory(storageComponentProvider.getStorageManager(), secondaryFileSplitProvider, resourceFactory, true); IIndexDataflowHelperFactory dataflowHelperFactory = new IndexDataflowHelperFactory( storageComponentProvider.getStorageManager(), secondaryFileSplitProvider); ExternalFilesIndexCreateOperatorDescriptor externalFilesOp = new ExternalFilesIndexCreateOperatorDescriptor( spec, indexBuilderFactory, dataflowHelperFactory, externalFilesSnapshot); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, externalFilesOp, secondarySplitsAndConstraint.second); spec.addRoot(externalFilesOp); spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy()); return spec; }