Preconditions.checkArgument(allowedFormats().contains(format.getName()), "Unsupported format: " + format.getName()); this.reuseEntity = !(Formats.PARQUET.equals(format)); DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format)); DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
public DatasetDefinition() { this(null, true, Formats.AVRO.getName()); }
@Override public void sync() throws EventDeliveryException { if (nEventsHandled > 0) { if (Formats.PARQUET.equals( dataset.getDataset().getDescriptor().getFormat())) { // We need to close the writer on sync if we're writing to a Parquet // dataset close(); } else { if (writer instanceof Syncable) { ((Syncable) writer).sync(); } } } }
private static void checkCompressionType(Format format, @Nullable CompressionType compressionType) { if (compressionType == null) { return; } ValidationException.check(format.getSupportedCompressionTypes() .contains(compressionType), "Format %s doesn't support compression format %s", format.getName(), compressionType.getName()); }
this.format.getDefaultCompressionType() : compressionType;
private static String uniqueFilename(Format format) { return UUID.randomUUID() + "." + format.getExtension(); }
@Override public Format newFormat(String name) { try { return Formats.fromString(name); } catch (IllegalArgumentException ex) { return new Format(name, CompressionType.Uncompressed, new CompressionType[] { CompressionType.Uncompressed }); } }
public DatasetDefinition(Class<?> targetClass, PartitionStrategy partitionStrategy) { this(targetClass, Formats.AVRO.getName(), partitionStrategy); }
@Override Format directory(FileSystem fs, Path path, List<Format> formats) throws IOException { Format format = null; for (Format otherFormat : formats) { if (format == null) { format = otherFormat; } else if (!format.equals(otherFormat)) { throw new ValidationException(String.format( "Path contains multiple formats (%s, %s): %s", format, otherFormat, path)); } } return format; }
final Path path = new Path("hdfs://" + auth + "/tmp/test.avro"); AvroAppender<String> appender = new AvroAppender<String>( fs, path, schema, Formats.AVRO.getDefaultCompressionType());
@Override Schema file(FileSystem fs, Path path) throws IOException { String filename = path.getName(); if (filename.endsWith(Formats.AVRO.getExtension())) { return Schemas.fromAvro(fs, path); } else if (filename.endsWith(Formats.PARQUET.getExtension())) { return Schemas.fromParquet(fs, path); } else if (filename.endsWith(Formats.JSON.getExtension())) { return Schemas.fromJSON(name, fs, path); } return null; } }
public DatasetDefinition(boolean allowNullValues) { this(null, allowNullValues, Formats.AVRO.getName()); }
Preconditions.checkArgument(allowedFormats().contains(format.getName()), "Unsupported format: " + format.getName()); this.reuseEntity = !(Formats.PARQUET.equals(format)); DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format)); DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
private static void setConfigProperties(Configuration conf, Format format, Schema schema, Class<?> type) { GenericData model = DataModelUtil.getDataModelForType(type); if (Formats.AVRO.equals(format)) { setModel.invoke(conf, model.getClass()); conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString()); } else if (Formats.PARQUET.equals(format)) { // TODO: update to a version of Parquet with setAvroDataSupplier //AvroReadSupport.setAvroDataSupplier(conf, // DataModelUtil.supplierClassFor(model)); AvroReadSupport.setAvroReadSchema(conf, schema); } }
final Path path = new Path("file:/tmp/test.avro"); AvroAppender<String> appender = new AvroAppender<String>( fs, path, schema, Formats.AVRO.getDefaultCompressionType());
private static Format formatFromExt(Path path) { String filename = path.getName(); String ext = Iterables.getLast(DOT.split(filename)); for (Format format : SUPPORTED_FORMATS) { if (ext.equals(format.getExtension())) { return format; } } return null; }
/** * Instantiates a new {@code DataStoreWriter} for writing Parquet records to a {@code org.kitesdk.data.Dataset}. * * @param entityClass the {@code Class} that the writer will write to the Dataset * @param datasetRepositoryFactory the {@code DatasetRepositoryFactory} to be used for the writer */ public ParquetDatasetStoreWriter(Class<T> entityClass, DatasetRepositoryFactory datasetRepositoryFactory) { this(entityClass, datasetRepositoryFactory, new DatasetDefinition(entityClass, false, Formats.PARQUET.getName())); }
static boolean isSupportedFormat(DatasetDescriptor descriptor) { Format format = descriptor.getFormat(); return (SUPPORTED_FORMATS.contains(format) || (Formats.CSV.equals(format) && DescriptorUtil.isEnabled(FileSystemProperties.ALLOW_CSV_PROP, descriptor) )); }
/** * Instantiates a new {@code DataStoreWriter} for writing Avro records to a {@code org.kitesdk.data.Dataset}. * * @param entityClass the {@code Class} that the writer will write to the Dataset * @param datasetRepositoryFactory the {@code DatasetRepositoryFactory} to be used for the writer */ public AvroPojoDatasetStoreWriter(Class<T> entityClass, DatasetRepositoryFactory datasetRepositoryFactory) { this(entityClass, datasetRepositoryFactory, new DatasetDefinition(entityClass, false, Formats.AVRO.getName())); }