public ColumnValueCount newMessage() { return new ColumnValueCount(); }
private static long getAccurateColumnCount(String name, Iterator<DatasetSplit> splits){ long def = 0; int splitCount = 0; int columnObservation = 0; while(splits.hasNext()){ DatasetSplit split = splits.next(); splitCount++; ParquetDatasetSplitXAttr xattr = ParquetDatasetXAttrSerDe.PARQUET_DATASET_SPLIT_XATTR_SERIALIZER.deserialize(split.getExtendedProperty().toByteArray()); List<ColumnValueCount> counts = xattr.getColumnValueCountsList(); for(ColumnValueCount c : counts){ if(c.getColumn().equalsIgnoreCase(name)){ def += c.getCount(); columnObservation++; continue; } } } if(splitCount != columnObservation){ // missing metadata observations, make sure to avoid wrong result. return GroupScan.NO_COLUMN_STATS; } return def; }
columnValueCounts.add(new ColumnValueCount() .setColumn(entry.getKey().getAsUnescapedPath()) .setCount(entry.getValue()));
public void writeTo(Output output, ParquetDatasetXAttr message) throws IOException { if(message.selectionRoot != null) output.writeString(1, message.selectionRoot, false); if(message.dictionaryEncodedColumns != null) output.writeObject(2, message.dictionaryEncodedColumns, DictionaryEncodedColumns.getSchema(), false); if(message.columnValueCounts != null) { for(ColumnValueCount columnValueCounts : message.columnValueCounts) { if(columnValueCounts != null) output.writeObject(3, columnValueCounts, ColumnValueCount.getSchema(), true); } } }
columnValueCounts.add(new ColumnValueCount() .setColumn(entry.getKey().getAsUnescapedPath()) .setCount(entry.getValue()));
public void mergeFrom(Input input, ParquetDatasetXAttr message) throws IOException { for(int number = input.readFieldNumber(this);; number = input.readFieldNumber(this)) { switch(number) { case 0: return; case 1: message.selectionRoot = input.readString(); break; case 2: message.dictionaryEncodedColumns = input.mergeObject(message.dictionaryEncodedColumns, DictionaryEncodedColumns.getSchema()) ; break; case 3: if(message.columnValueCounts == null) message.columnValueCounts = new ArrayList<ColumnValueCount>(); message.columnValueCounts.add(input.mergeObject(null, ColumnValueCount.getSchema())); break; default: input.handleUnknownField(number, this); } } }
if(message.columnValueCounts == null) message.columnValueCounts = new ArrayList<ColumnValueCount>(); message.columnValueCounts.add(input.mergeObject(null, ColumnValueCount.getSchema())); break;
public void writeTo(Output output, ParquetDatasetSplitXAttr message) throws IOException { if(message.path != null) output.writeString(1, message.path, false); if(message.start != null) output.writeInt64(2, message.start, false); if(message.length != null) output.writeInt64(3, message.length, false); if(message.rowGroupIndex != null) output.writeInt32(4, message.rowGroupIndex, false); if(message.updateKey != null) output.writeObject(5, message.updateKey, FileSystemCachedEntity.getSchema(), false); if(message.columnValueCounts != null) { for(ColumnValueCount columnValueCounts : message.columnValueCounts) { if(columnValueCounts != null) output.writeObject(6, columnValueCounts, ColumnValueCount.getSchema(), true); } } }