@Override public Expression residual() { return residuals.residualFor(file.partition()); }
@Override public String toString() { return Objects.toStringHelper(this) .add("file", file.path()) .add("partition_data", file.partition()) .add("residual", residual()) .toString(); } }
@Override public ReplacePartitions addFile(DataFile file) { dropPartition(file.partition()); add(file); return this; }
@Override public Iterator<DataFile> iterator() { if (rowFilter != null && rowFilter != Expressions.alwaysTrue() && partFilter != null && partFilter != Expressions.alwaysTrue()) { Evaluator evaluator = evaluator(); InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator(); return Iterators.transform( Iterators.filter(reader.iterator(partFilter, columns), input -> (input != null && evaluator.eval(input.partition()) && metricsEvaluator.eval(input))), DataFile::copy); } else { return Iterators.transform(reader.iterator(partFilter, columns), DataFile::copy); } }
public void add(ManifestEntry entry) { switch (entry.status()) { case ADDED: addedFiles += 1; break; case EXISTING: existingFiles += 1; break; case DELETED: deletedFiles += 1; break; } stats.update(entry.file().partition()); writer.add(entry); }
Iterable<ManifestEntry> liveEntries() { if (rowFilter != null && rowFilter != Expressions.alwaysTrue() && partFilter != null && partFilter != Expressions.alwaysTrue()) { Evaluator evaluator = evaluator(); InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator(); return Iterables.filter(reader.entries(columns), entry -> (entry != null && entry.status() != Status.DELETED && evaluator.eval(entry.file().partition()) && metricsEvaluator.eval(entry.file()))); } else { return Iterables.filter(reader.entries(columns), entry -> entry != null && entry.status() != Status.DELETED); } }
Iterable<ManifestEntry> allEntries() { if (rowFilter != null && rowFilter != Expressions.alwaysTrue() && partFilter != null && partFilter != Expressions.alwaysTrue()) { Evaluator evaluator = evaluator(); InclusiveMetricsEvaluator metricsEvaluator = metricsEvaluator(); return Iterables.filter(reader.entries(columns), entry -> (entry != null && evaluator.eval(entry.file().partition()) && metricsEvaluator.eval(entry.file()))); } else { return reader.entries(columns); } }
public Builder copy(DataFile toCopy) { if (isPartitioned) { this.partitionData = copyPartitionData(spec, toCopy.partition(), partitionData); } this.filePath = toCopy.path().toString(); this.format = toCopy.format(); this.recordCount = toCopy.recordCount(); this.fileSizeInBytes = toCopy.fileSizeInBytes(); this.blockSizeInBytes = toCopy.blockSizeInBytes(); this.columnSizes = toCopy.columnSizes(); this.valueCounts = toCopy.valueCounts(); this.nullValueCounts = toCopy.nullValueCounts(); this.lowerBounds = toCopy.lowerBounds(); this.upperBounds = toCopy.upperBounds(); return this; }
DataFile file = entry.file(); boolean fileDelete = (deletePaths.contains(pathWrapper.set(file.path())) || dropPartitions.contains(partitionWrapper.set(file.partition()))); if (fileDelete || inclusive.eval(file.partition())) { ValidationException.check( fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file), "Cannot delete file where some, but not all, rows match filter %s: %s", this.deleteExpression, file.path()); throw new DeleteException(writeSpec().partitionToPath(file.partition())); DataFile file = entry.file(); boolean fileDelete = (deletePaths.contains(pathWrapper.set(file.path())) || dropPartitions.contains(partitionWrapper.set(file.partition()))); if (entry.status() != Status.DELETED) { if (fileDelete || inclusive.eval(file.partition())) { ValidationException.check( fileDelete || strict.eval(file.partition()) || metricsEvaluator.eval(file), "Cannot delete file where some, but not all, rows match filter %s: %s", this.deleteExpression, file.path());
String partition = spec.partitionToPath(entry.file().partition()); topN.update(partition, metrics -> (metrics == null ? new PartitionMetrics() : metrics) .updateFromFile(entry.file(), timestamp));
JoinedRow joined = new JoinedRow(); InternalRow partition = convertToRow.apply(file.partition()); joined.withRight(partition);
@Override public List<ManifestFile> apply(TableMetadata base) { if (validateAddedFiles) { PartitionSpec spec = writeSpec(); Expression rowFilter = rowFilter(); Expression inclusiveExpr = Projections.inclusive(spec).project(rowFilter); Evaluator inclusive = new Evaluator(spec.partitionType(), inclusiveExpr); Expression strictExpr = Projections.strict(spec).project(rowFilter); Evaluator strict = new Evaluator(spec.partitionType(), strictExpr); StrictMetricsEvaluator metrics = new StrictMetricsEvaluator( base.schema(), rowFilter); for (DataFile file : addedFiles()) { // the real test is that the strict or metrics test matches the file, indicating that all // records in the file match the filter. inclusive is used to avoid testing the metrics, // which is more complicated ValidationException.check( inclusive.eval(file.partition()) && (strict.eval(file.partition()) || metrics.eval(file)), "Cannot append file with rows that do not match filter: %s: %s", rowFilter, file.path()); } } return super.apply(base); } }
stats.update(entry.file().partition());