/** * Imports an existing schema stored at the given path. This * is generally used to bring in schemas written by previous * versions of this library. * * @param schemaPath A path to a schema to import * @return The URI of the schema file managed by this manager. */ public URI importSchema(Path schemaPath) { Schema schema = loadSchema(schemaPath); return writeSchema(schema); }
@Test public void testSameSchemaUpdate() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); URI uri1 = manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); URI uri2 = manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Assert.assertEquals("Updating with the same schema should not create a new URI", uri1, uri2); } }
new Path(metadataLocation, SCHEMA_DIRECTORY_NAME)); manager.writeSchema(descriptor.getSchema());
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Compatibility.checkDatasetName(namespace, name); Compatibility.checkDescriptor(descriptor); String resolved = resolveNamespace(namespace, name); if (resolved != null) { Table table = getMetaStoreUtil().getTable(resolved, name); Path managerPath = new Path(new Path(table.getSd().getLocation()), SCHEMA_DIRECTORY); SchemaManager manager = SchemaManager.create(conf, managerPath); DatasetDescriptor newDescriptor; try { URI schemaURI = manager.writeSchema(descriptor.getSchema()); newDescriptor = new DatasetDescriptor.Builder(descriptor) .schemaUri(schemaURI).build(); } catch (IOException e) { throw new DatasetIOException("Unable to create schema", e); } HiveUtils.updateTableSchema(table, newDescriptor); getMetaStoreUtil().alterTable(table); return descriptor; } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Compatibility.checkDatasetName(namespace, name); Compatibility.checkDescriptor(descriptor); String resolved = resolveNamespace(namespace, name); if (resolved != null) { Table table = getMetaStoreUtil().getTable(resolved, name); Path managerPath = new Path(new Path(table.getSd().getLocation()), SCHEMA_DIRECTORY); SchemaManager manager = SchemaManager.create(conf, managerPath); DatasetDescriptor newDescriptor; try { URI schemaURI = manager.writeSchema(descriptor.getSchema()); newDescriptor = new DatasetDescriptor.Builder(descriptor) .schemaUri(schemaURI).build(); } catch (IOException e) { throw new DatasetIOException("Unable to create schema", e); } HiveUtils.updateTableSchema(table, newDescriptor); getMetaStoreUtil().alterTable(table); return descriptor; } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
@Test public void testCreateSchema() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Schema schema = manager.getNewestSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); }
URI managedSchemaUri = manager.writeSchema(descriptor.getSchema());
@Test(expected = IncompatibleSchemaException.class) public void testIncompatibleUpdate() { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Trivially incompatible schemas should yield an exception. manager.writeSchema(SchemaBuilder.record("test") .fields() .requiredString("foo") .endRecord()); manager.writeSchema(SchemaBuilder.record("test") .fields() .requiredString("bar") .endRecord()); }
URI managedSchemaUri = manager.writeSchema(descriptor.getSchema());
@Test(expected = IncompatibleSchemaException.class) public void testIndirectIncompatibleUpdate() { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Write two schemas that are compatible since they use optional fields. manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalString("foo") .endRecord()); manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalString("bar") .endRecord()); // This schema creates a schema compatible with the immediately previous // version, but incompatible with the original. manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalInt("foo") .endRecord()); }
URI schemaLocation = manager.writeSchema(descriptor.getSchema());
URI schemaLocation = manager.writeSchema(descriptor.getSchema());
@Test public void testUpdateSchema() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Schema schema = manager.getNewestSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); // Create an updated schema and ensure it can be written. Schema updatedSchema = SchemaBuilder.record(schema.getName()) .fields() .requiredString("username") .requiredString("email") .optionalBoolean("extra_field").endRecord(); manager.writeSchema(updatedSchema); Assert.assertEquals(updatedSchema, manager.getNewestSchema()); }
@Test public void testManyUpdates() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Create an updated schema and ensure it can be written. for (int i = 0; i < 20; ++i) { SchemaBuilder.FieldAssembler<Schema> fields = SchemaBuilder .record("test").fields(); for (int j = 0; j <= i; ++j) { fields.optionalString("field_" + j); } Schema schema = fields.endRecord(); manager.writeSchema(schema); // Ensure we always see the newest schema on load. Assert.assertEquals(schema, manager.getNewestSchema()); } // Make sure all of the updates are in place. Map<Integer, Schema> schemas = manager.getSchemas(); Assert.assertEquals(20, schemas.size()); }