@SuppressWarnings("unchecked") private Path toDirectoryName(@Nullable Path dir, PartitionKey key) { Path result = dir; for (int i = 0; i < key.getLength(); i++) { final FieldPartitioner fp = Accessor.getDefault().getFieldPartitioners(partitionStrategy).get(i); if (result != null) { result = new Path(result, PathConversion.dirnameForValue(fp, key.get(i))); } else { result = new Path(PathConversion.dirnameForValue(fp, key.get(i))); } } return result; }
public static <T> T valueForDirname(FieldPartitioner<?, T> field, Schema schema, String name) { return Conversions.convert(dirnameToValueString(name), SchemaUtil.getPartitionType(field, schema)); }
public StorageKey reuseFor(Path path, PathConversion conversion) { conversion.toKey(path, this); this.path = path; return this; }
@Test public void testIgnoresPartitionName() { Assert.assertEquals("10", PathConversion.dirnameToValueString("10")); Assert.assertEquals("10", PathConversion.dirnameToValueString("=10")); Assert.assertEquals("10", PathConversion.dirnameToValueString("anything=10")); Assert.assertEquals(10, (int) convert.valueForDirname( new MonthFieldPartitioner("timestamp", "month"), "10")); Assert.assertEquals(10, (int) convert.valueForDirname( new MonthFieldPartitioner("timestamp", "month"), "=10")); Assert.assertEquals(10, (int) convert.valueForDirname( new MonthFieldPartitioner("timestamp", "month"), "anything=10")); Assert.assertEquals(10, (int) convert.valueForDirname( new MonthFieldPartitioner("timestamp", "month"), "even=strange=10")); }
public <T> T valueForDirname(FieldPartitioner<?, T> field, String name) { return valueForDirname(field, schema, name); }
@Test @SuppressWarnings("unchecked") public void toDirNameIdentityWithSlashes() { PartitionStrategy strategy = new PartitionStrategy.Builder() .identity("name") .identity("address") .build(); StorageKey key = new StorageKey(strategy); key.replaceValues((List) Lists.newArrayList("John Doe", "NY/USA")); Assert.assertEquals( new Path("name_copy=John+Doe/address_copy=NY%2FUSA"), convert.fromKey(key)); }
PartitionStrategy strategy = descriptor.getPartitionStrategy(); PathConversion conversion = new PathConversion(schema); Iterator<String> parts = PATH_SPLITTER.split(relative.getRawPath()).iterator(); for (FieldPartitioner<?, ?> fp : Accessor.getDefault().getFieldPartitioners(strategy)) { fp.getName(), conversion.valueForDirname(fp, parts.next()));
public static <T> String dirnameForValue(FieldPartitioner<?, T> field, T value) { return PART_JOIN.join(field.getName(), valueToString(field, value)); }
public MakeKey(PartitionStrategy strategy, Schema schema) { this.reusableKey = new StorageKey(strategy); this.convert = new PathConversion(schema); }
@SuppressWarnings("unchecked") private PartitionKey keyFromDirectory(String name) { final FieldPartitioner fp = Accessor.getDefault().getFieldPartitioners(partitionStrategy).get(0); final List<Object> values = Lists.newArrayList(); if (partitionKey != null) { values.addAll(partitionKey.getValues()); } values.add(convert.valueForDirname(fp, name)); return new PartitionKey(values.toArray()); }
@Test @SuppressWarnings("unchecked") public void testFromKey() { PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build(); StorageKey key = new StorageKey(strategy); key.replaceValues((List) Lists.newArrayList(2013, 11, 5)); Assert.assertEquals( new Path("year=2013/month=11/day=05"), convert.fromKey(key)); }
Accessor.getDefault().getFieldPartitioners(partitionStrategy)) { hdrs.put(PARTITION_PREFIX + fp.getName(), PathConversion.valueToString(fp, key.get(i++)));
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
public StorageKey toKey(Path fromPath, StorageKey storage) { final List<FieldPartitioner> partitioners = Accessor.getDefault().getFieldPartitioners(storage.getPartitionStrategy()); final List<Object> values = Lists.newArrayList( new Object[partitioners.size()]); Path currentPath = fromPath; int index = partitioners.size() - 1; while (currentPath != null && index >= 0) { values.set(index, valueForDirname( (FieldPartitioner<?, ?>) partitioners.get(index), currentPath.getName())); // update currentPath = currentPath.getParent(); index -= 1; } storage.replaceValues(values); return storage; }
expected.replaceValues((List) Lists.newArrayList(1L, 2013, 11)); Assert.assertEquals(expected, convert.toKey( new Path(ROOT_PATH, "id=1/year=2013/month=11"), new StorageKey(strategy))); Assert.assertEquals(expected, convert.toKey( new Path(ROOT_PATH, "id=1/year=2013"), new StorageKey(strategy))); Assert.assertEquals(expected, convert.toKey( new Path(ROOT_PATH, "id=1"), new StorageKey(strategy))); Assert.assertEquals(expected, convert.toKey( ROOT_PATH, new StorageKey(strategy)));
public Path fromKey(StorageKey key) { final StringBuilder pathBuilder = new StringBuilder(); final List<FieldPartitioner> partitioners = Accessor.getDefault().getFieldPartitioners(key.getPartitionStrategy()); for (int i = 0; i < partitioners.size(); i++) { final FieldPartitioner fp = partitioners.get(i); if (i != 0) { pathBuilder.append(Path.SEPARATOR_CHAR); } @SuppressWarnings("unchecked") String dirname = dirnameForValue(fp, key.get(i)); pathBuilder.append(dirname); } return new Path(pathBuilder.toString()); }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification="Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
Accessor.getDefault().getFieldPartitioners(partitionStrategy)) { hdrs.put(PARTITION_PREFIX + fp.getName(), PathConversion.valueToString(fp, key.get(i++)));
public DatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
@SuppressWarnings("unchecked") public PartitionKey keyFromDirectory(Path dir) { Path relDir = null; URI relUri = directory.toUri().relativize(dir.toUri()); if (!relUri.toString().isEmpty()) { relDir = new Path(relUri); Preconditions.checkState(!relDir.equals(dir), "Partition directory %s is not " + "relative to dataset directory %s", dir, directory); } List<String> pathComponents = Lists.newArrayList(); while (relDir != null && !relDir.getName().equals("")) { pathComponents.add(0, relDir.getName()); relDir = relDir.getParent(); } List<FieldPartitioner> fps = Accessor.getDefault().getFieldPartitioners(partitionStrategy); Preconditions.checkState(pathComponents.size() <= fps.size(), "Number of components in partition directory %s (%s) exceeds number of field " + "partitioners %s", dir, pathComponents, partitionStrategy); List<Object> values = Lists.newArrayList(); for (int i = 0; i < pathComponents.size(); i++) { values.add(convert.valueForDirname(fps.get(i), pathComponents.get(i))); } if (partitionKey != null) { values.addAll(0, partitionKey.getValues()); } return new PartitionKey(values.toArray()); }