@Test public void testConstraintsSignaledReady() throws IOException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "testConstraintsSignaledReady@domain.com"); manager.signalReady(constraints); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertTrue(this.fileSystem.exists(new Path(this.testDirectory, normalizedConstraints))); }
@Override public boolean isReady() { if (signalManager != null) { long readyTimestamp = signalManager.getReadyTimestamp(getConstraints()); return readyTimestamp != -1; } return false; } }
@Test public void testConstraintsSignaledReadyPreviouslySignaled() throws IOException, InterruptedException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA) .with("email", "testConstraintsSignaledReadyPreviouslySignaled@domain.com"); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Path signalFilePath = new Path(this.testDirectory,normalizedConstraints); manager.signalReady(constraints); Assert.assertTrue(this.fileSystem.exists(signalFilePath)); long firstSignalContents = this.fileSystem.getFileStatus(signalFilePath).getModificationTime(); // the modified time depends on the filesystem, and may only be granular to the second // signal and check until the modified time is after the current time, or until // enough time has past that the signal should have been distinguishable long spinStart = System.currentTimeMillis(); long signaledTime = 0; while(firstSignalContents >= signaledTime && (System.currentTimeMillis() - spinStart) <= 2000) { manager.signalReady(constraints); signaledTime = manager.getReadyTimestamp(constraints); } Assert.assertFalse("Second signal should not match the first", signaledTime == firstSignalContents); }
/** * Check the last time the specified constraints have been signaled as ready. * * @param viewConstraints The constraints to check for a signal. * * @return the timestamp of the last time the constraints were signaled as ready. * if the constraints have never been signaled, -1 will be returned. * * @throws DatasetException if the signals could not be accessed. */ public long getReadyTimestamp(Constraints viewConstraints) { String normalizedConstraints = getNormalizedConstraints(viewConstraints); Path signalPath = new Path(signalDirectory, normalizedConstraints); // check if the signal exists try { try { FileStatus signalStatus = rootFileSystem.getFileStatus(signalPath); return signalStatus.getModificationTime(); } catch (final FileNotFoundException ex) { // empty, will be thrown when the signal path doesn't exist } return -1; } catch (IOException e) { throw new DatasetIOException("Could not access signal path: " + signalPath, e); } }
@Test public void testConstraintsGetReadyTimestamp() throws IOException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "testConstraintsReady@domain.com"); Path signalFilePath = new Path(this.testDirectory, "email=testConstraintsReady%40domain.com"); // drop a file at the signal path FSDataOutputStream stream = this.fileSystem.create(signalFilePath, true); stream.writeUTF(String.valueOf(System.currentTimeMillis())); stream.close(); Assert.assertTrue(manager.getReadyTimestamp(constraints) != -1); }
@Test public void testSignalDirectoryCreatedOnSignal() throws IOException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Assert.assertFalse("Signal directory shouldn't exist before signals", fileSystem.exists(testDirectory)); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "signalCreatesDir@domain.com"); manager.signalReady(constraints); Assert.assertTrue("Signal directory created on signals", fileSystem.exists(testDirectory)); }
@Override public void signalReady() { if (signalManager != null) { signalManager.signalReady(getConstraints()); } }
FileSystemDataset(FileSystem fileSystem, Path directory, String namespace, String name, DatasetDescriptor descriptor, URI uri, @Nullable PartitionListener partitionListener, Class<E> type) { super(type, descriptor.getSchema()); if (Formats.PARQUET.equals(descriptor.getFormat())) { Preconditions.checkArgument(IndexedRecord.class.isAssignableFrom(type) || type == Object.class, "Parquet only supports generic and specific data models, type" + " parameter must implement IndexedRecord"); } this.fileSystem = fileSystem; this.directory = directory; this.namespace = namespace; this.name = name; this.descriptor = descriptor; this.partitionStrategy = descriptor.isPartitioned() ? descriptor.getPartitionStrategy() : null; this.partitionListener = partitionListener; this.convert = new PathConversion(descriptor.getSchema()); this.uri = uri; Path signalsPath = new Path(getDirectory(fileSystem, directory), SIGNALS_DIRECTORY_NAME); this.signalManager = new SignalManager(fileSystem, signalsPath); this.unbounded = new FileSystemPartitionView<E>( this, partitionListener, signalManager, type); // remove this.partitionKey for 0.14.0 this.partitionKey = null; }
/** * Create a signal for the specified constraints. * * @param viewConstraints The constraints to create a signal for. * * @throws DatasetException if the signal could not be created. */ public void signalReady(Constraints viewConstraints) { try { rootFileSystem.mkdirs(signalDirectory); } catch (IOException e) { throw new DatasetIOException("Unable to create signal manager directory: " + signalDirectory, e); } String normalizedConstraints = getNormalizedConstraints(viewConstraints); Path signalPath = new Path(signalDirectory, normalizedConstraints); try{ // create the output stream to overwrite the current contents, if the directory or file // exists it will be overwritten to get a new timestamp FSDataOutputStream os = rootFileSystem.create(signalPath, true); os.close(); } catch (IOException e) { throw new DatasetIOException("Could not access signal path: " + signalPath, e); } }
@Test public void testConstraintsGetReadyTimestampNotYetSignaled() throws IOException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "testConstraintsGetReadyTimestampNotYetSignaled@domain.com"); Assert.assertEquals("A constraint that is not signaled should show -1", -1, manager.getReadyTimestamp(constraints)); }
@Test public void testMultiConstraintsSignaledReady() throws IOException { SignalManager manager = new SignalManager(fileSystem, testDirectory); Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "kiteuser@domain.com").with("username", "kiteuser"); manager.signalReady(constraints); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertTrue(this.fileSystem.exists(new Path(this.testDirectory, normalizedConstraints))); }
@Test public void testNormalizeConstraintsUnbounded() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("unbounded", normalizedConstraints); }
@Override public long getLastModified() { long lastMod = -1; for (Iterator<Path> i = dirIterator(); i.hasNext(); ) { Path dir = i.next(); try { for (FileStatus st : fs.listStatus(dir)) { if (lastMod < st.getModificationTime()) { lastMod = st.getModificationTime(); } } } catch (IOException e) { throw new DatasetIOException("Cannot find last modified time of of " + dir, e); } } // if view was marked ready more recently count it as the modified time if (signalManager != null) { long readyTimestamp = signalManager.getReadyTimestamp(getConstraints()); if (lastMod < readyTimestamp) { lastMod = readyTimestamp; } } return lastMod; }
@Test public void testNormalizeConstraintsValueExists() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", ""); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("email=in()", normalizedConstraints); }
@Test public void testNormalizeConstraintsOrderedSets() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.OLD_VALUE_SCHEMA). with("value", 7L,2L,3L); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("value=2,3,7", normalizedConstraints); }
@Test public void testNormalizeConstraints() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("email", "user@domain.com"); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("email=user%40domain.com", normalizedConstraints); }
@Test public void testNormalizeConstraintsIntervals() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.OLD_VALUE_SCHEMA). toBefore("value", 12L); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("value=(,12)", normalizedConstraints); }
@Test public void testNormalizeConstraintsOrderedKeys() throws IOException { Constraints constraints = new Constraints(DatasetTestUtilities.USER_SCHEMA). with("username", "kite").with("email", "kite@domain.com"); String normalizedConstraints = SignalManager.getNormalizedConstraints(constraints); Assert.assertEquals("email=kite%40domain.com&username=kite", normalizedConstraints); }