@Override public long getLength() throws IOException, InterruptedException { return distributable.getLength(); }
@Override public String[] getLocations() throws IOException, InterruptedException { return distributable.getLocations(); } }
/** * This is used for making the datamap distributable. * It takes the valid segments and returns all the datamaps as distributable objects so that * it can be distributed across machines. * * @return */ public List<DataMapDistributable> toDistributable(List<Segment> segments) throws IOException { List<DataMapDistributable> distributables = new ArrayList<>(); for (Segment segment : segments) { List<DataMapDistributable> list = dataMapFactory.toDistributable(segment); for (DataMapDistributable distributable : list) { distributable.setDataMapSchema(dataMapSchema); distributable.setSegment(segment); distributable.setTablePath(identifier.getTablePath()); } distributables.addAll(list); } return distributables; }
@Override public List<DataMapDistributable> getAllUncachedDistributables( List<DataMapDistributable> distributables) throws IOException { List<DataMapDistributable> distributablesToBeLoaded = new ArrayList<>(distributables.size()); for (DataMapDistributable distributable : distributables) { Segment segment = distributable.getSegment(); Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(segment); // filter out the tableBlockIndexUniqueIdentifiers based on distributable TableBlockIndexUniqueIdentifier validIdentifier = BlockletDataMapUtil .filterIdentifiersBasedOnDistributable(tableBlockIndexUniqueIdentifiers, (BlockletDataMapDistributable) distributable); if (null == cache.getIfPresent( new TableBlockIndexUniqueIdentifierWrapper(validIdentifier, this.getCarbonTable()))) { ((BlockletDataMapDistributable) distributable) .setTableBlockIndexUniqueIdentifier(validIdentifier); distributablesToBeLoaded.add(distributable); } } return distributablesToBeLoaded; } }
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { distributable = (DataMapDistributableWrapper) inputSplit; // clear the segmentMap and from cache in executor when there are invalid segments if (invalidSegments.size() > 0) { DataMapStoreManager.getInstance().clearInvalidSegments(table, invalidSegments); } TableDataMap tableDataMap = DataMapStoreManager.getInstance() .getDataMap(table, distributable.getDistributable().getDataMapSchema()); if (isJobToClearDataMaps) { // if job is to clear datamaps just clear datamaps from cache and return DataMapStoreManager.getInstance() .clearDataMaps(table.getCarbonTableIdentifier().getTableUniqueName()); // clear the segment properties cache from executor SegmentPropertiesAndSchemaHolder.getInstance() .invalidate(table.getAbsoluteTableIdentifier()); blockletIterator = Collections.emptyIterator(); return; } dataMaps = tableDataMap.getTableDataMaps(distributable.getDistributable()); List<ExtendedBlocklet> blocklets = tableDataMap .prune(dataMaps, distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions); for (ExtendedBlocklet blocklet : blocklets) { blocklet.setDataMapUniqueId(distributable.getUniqueId()); } blockletIterator = blocklets.iterator(); }
for (DataMap dataMap : dataMaps) { blocklets.addAll(dataMap.prune(filterExp, segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment()), partitions)); .getExtendedBlocklet(blocklet, distributable.getSegment()); if (dataMapFactory.getDataMapLevel() == DataMapLevel.FG) { String blockletwritePath = serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletwritePath); detailedBlocklet.setSegmentId(distributable.getSegment().toString()); detailedBlocklets.add(detailedBlocklet);