private boolean isComplete(DatasetConfig config) { return config != null && DatasetHelper.getSchemaBytes(config) != null && config.getReadDefinition() != null; }
public boolean isPhysicalDataset(SourceName sourceName, SourceFolderPath folderPath) { try { DatasetConfig ds = namespaceService.getDataset(new PhysicalDatasetPath(folderPath).toNamespaceKey()); return DatasetHelper.isPhysicalDataset(ds.getType()); } catch (NamespaceException nse) { logger.debug("Error while checking physical dataset in source {} for folder {}, error {}", sourceName.getName(), folderPath.toPathString(), nse.toString()); return false; } }
public static BatchSchema fromDataset(DatasetConfig config){ try{ ByteString bytes = DatasetHelper.getSchemaBytes(config); if(bytes == null){ throw new IllegalStateException(String.format("Schema is currently unavailable for dataset %s.", PathUtils.constructFullPath(config.getFullPathList()))); } return deserialize(bytes); }catch(Exception ex){ throw new IllegalStateException(String.format("Schema for dataset %s is corrupt.", PathUtils.constructFullPath(config.getFullPathList())), ex); } }
public static List<org.apache.arrow.vector.types.pojo.Field> getArrowFieldsFromDatasetConfig(DatasetConfig datasetConfig) { List<org.apache.arrow.vector.types.pojo.Field> fields = Lists.newArrayList(); final ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig); if (schemaBytes == null) { return null; } final BatchSchema batchSchema = BatchSchema.deserialize(schemaBytes); for (int i = 0; i < batchSchema.getFieldCount(); i++) { final org.apache.arrow.vector.types.pojo.Field field = batchSchema.getColumn(i); if (!NamespaceTable.SYSTEM_COLUMNS.contains(field.getName())) { fields.add(field); } } return fields; } }
public static String[] getColumnsLowerCase(DatasetConfig datasetConfig) { final ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig); if (schemaBytes != null) { Schema schema = Schema.getRootAsSchema(schemaBytes.asReadOnlyByteBuffer()); org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema); return s.getFields().stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new); } else { // If virtual dataset was created with view fields if (datasetConfig.getType() == DatasetType.VIRTUAL_DATASET) { final List<ViewFieldType> viewFieldTypes = datasetConfig.getVirtualDataset().getSqlFieldsList(); if (notEmpty(viewFieldTypes)) { return viewFieldTypes.stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new); } } } return new String[0]; }
@Override public DatasetConfig updateForSchemaChange(DatasetConfig oldConfig, BatchSchema expectedSchema, BatchSchema newlyObservedSchema) { Preconditions.checkNotNull(oldConfig); Preconditions.checkNotNull(newlyObservedSchema); BatchSchema newSchema; if (DatasetHelper.getSchemaBytes(oldConfig) == null) { newSchema = newlyObservedSchema; } else { newSchema = BatchSchema.fromDataset(oldConfig).merge(newlyObservedSchema); } DatasetConfig newConfig = clone(oldConfig); newConfig.setRecordSchema(ByteString.copyFrom(newSchema.serialize())); return newConfig; }
private DatasetConfig update(DatasetConfig datasetConfig) { if (datasetConfig == null) { return null; } final io.protostuff.ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig); if (schemaBytes == null) { return null; } try { OldSchema oldSchema = OldSchema.getRootAsOldSchema(schemaBytes.asReadOnlyByteBuffer()); byte[] newschemaBytes = convertFromOldSchema(oldSchema); datasetConfig.setRecordSchema(ByteString.copyFrom(newschemaBytes)); return datasetConfig; } catch (Exception e) { System.out.println("Unable to update Arrow Schema for: " + PathUtils .constructFullPath(Optional.ofNullable(datasetConfig.getFullPathList()).orElse(Lists.newArrayList()))); e.printStackTrace(System.out); return null; } }
/** * Get view fields from dataset config as it is in the batch schema (if not present, defaults to reltree's row type) * @param config dataset config * @return List of view fields */ public static List<ViewFieldType> getViewFields(DatasetConfig config) { final ByteString schemaBytes = DatasetHelper.getSchemaBytes(config); final boolean virtualDataset = config.getType() == DatasetType.VIRTUAL_DATASET; if (schemaBytes == null && virtualDataset) { return config.getVirtualDataset().getSqlFieldsList(); } if (schemaBytes != null) { return getBatchSchemaFields(BatchSchema.deserialize(schemaBytes.toByteArray())); } return null; }
private ViewTable createTableFromVirtualDataset(DatasetConfig datasetConfig, MetadataRequestOptions options) { try { View view = Views.fieldTypesToView( Iterables.getLast(datasetConfig.getFullPathList()), datasetConfig.getVirtualDataset().getSql(), ViewFieldsHelper.getCalciteViewFields(datasetConfig), datasetConfig.getVirtualDataset().getContextList() ); // 1.4.0 and earlier didn't correctly save virtual dataset schema information. BatchSchema schema = DatasetHelper.getSchemaBytes(datasetConfig) != null ? BatchSchema.fromDataset(datasetConfig) : null; return new ViewTable(new NamespaceKey(datasetConfig.getFullPathList()), view, datasetConfig, schema); } catch (Exception e) { logger.warn("Failure parsing virtual dataset, not including in available schema.", e); return null; } }
Objects.equals(DatasetHelper.getSchemaBytes(currentConfig), DatasetHelper.getSchemaBytes(datasetConfig))) { return false;
.setAccelerationNeverRefresh(settings.getNeverRefresh()); final ByteString schemaBytes = DatasetHelper.getSchemaBytes(config); if (schemaBytes != null) { final BatchSchema schema = BatchSchema.deserialize(schemaBytes.toByteArray());
case PHYSICAL_DATASET_SOURCE_FILE: case PHYSICAL_DATASET_SOURCE_FOLDER: final ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig); if (schemaBytes == null) { return null;
if(oldConfig == null || DatasetHelper.getSchemaBytes(oldConfig) == null){ schema = null; } else {
if (oldConfig != null && DatasetHelper.getSchemaBytes(oldConfig) != null) { schema = BatchSchema.fromDataset(oldConfig).merge(newSchema);
@Test public void testDataSetSchema() throws Exception { try( final KVStoreProvider kvstore = new LocalKVStoreProvider(DremioTest.CLASSPATH_SCAN_RESULT, null, true, false); ) { kvstore.start(); final NamespaceService ns = new NamespaceServiceImpl(kvstore); Field field1 = new Field("a", true, new Int(32, true), null); Field child1 = new Field("c", true, Utf8.INSTANCE, null); Field field2 = new Field("b", true, Struct.INSTANCE, ImmutableList.of(child1)); Schema schema = new Schema(ImmutableList.of(field1, field2)); FlatBufferBuilder builder = new FlatBufferBuilder(); schema.getSchema(builder); builder.finish(schema.getSchema(builder)); addSource(ns, "s"); addPhysicalDS(ns, "s.foo", builder.sizedByteArray()); ByteBuffer bb = ByteBuffer.wrap(DatasetHelper.getSchemaBytes(ns.getDataset(new NamespaceKey(PathUtils.parseFullPath("s.foo")))).toByteArray()); Schema returnedSchema = Schema.convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(bb)); assertEquals(schema, returnedSchema); } }
private BatchSchema getSampledSchema(HTableDescriptor descriptor, DatasetConfig oldConfig) throws Exception { BatchSchema oldSchema = null; ByteString bytes = oldConfig != null ? DatasetHelper.getSchemaBytes(oldConfig) : null; if(bytes != null) { oldSchema = BatchSchema.deserialize(bytes); } final HBaseSubScanSpec spec = new HBaseSubScanSpec(getNamespace(), getTableName(), null, null, null); try ( BufferAllocator allocator = context.getAllocator().newChildAllocator("hbase-sample", 0, Long.MAX_VALUE); SampleMutator mutator = new SampleMutator(allocator); HBaseRecordReader reader = new HBaseRecordReader(connect.getConnection(), spec, GroupScan.ALL_COLUMNS, null, true); ) { reader.setNumRowsPerBatch(100); if(oldSchema != null) { oldSchema.materializeVectors(GroupScan.ALL_COLUMNS, mutator); } // add row key. mutator.addField(CompleteType.VARBINARY.toField(HBaseRecordReader.ROW_KEY), ValueVector.class); // add all column families. for (HColumnDescriptor col : descriptor.getFamilies()) { mutator.addField(CompleteType.struct().toField(col.getNameAsString()), ValueVector.class); } reader.setup(mutator); reader.next(); mutator.getContainer().buildSchema(SelectionVectorMode.NONE); return mutator.getContainer().getSchema(); } catch (ExecutionSetupException e) { throw UserException.dataReadError(e).message("Unable to sample schema for table %s.", key).build(logger); } }