return Datasets.load(uri).getDataset().getDescriptor().getSchema(); } else if ("resource".equals(uri.getScheme())) { try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {
@Override public void sync() throws EventDeliveryException { if (nEventsHandled > 0) { if (Formats.PARQUET.equals( dataset.getDataset().getDescriptor().getFormat())) { // We need to close the writer on sync if we're writing to a Parquet // dataset close(); } else { if (writer instanceof Syncable) { ((Syncable) writer).sync(); } } } }
DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); Preconditions.checkArgument(allowedFormats().contains(format.getName()), DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format)); this.datasetName = view.getDataset().getName();
final Schema schema = target.getDataset().getDescriptor().getSchema();
@SuppressWarnings("unchecked") private void checkCompactable(View<T> view) { Dataset<T> dataset = view.getDataset(); if (!(dataset instanceof Replaceable)) { throw new IllegalArgumentException("Cannot compact dataset: " + dataset); } Replaceable<View<T>> replaceable = ((Replaceable<View<T>>) dataset); Preconditions.checkArgument(replaceable.canReplace(view), "Cannot compact view: " + view); } }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH", justification="Null case checked by precondition") public int run() throws IOException { Preconditions.checkArgument( datasets != null && !datasets.isEmpty(), "Missing dataset name"); for (String name : datasets) { printInfo(console, load(name, GenericRecord.class).getDataset()); } return 0; }
@SuppressWarnings("unchecked") private static boolean canReplace(View<?> view) { if (Hadoop.isHadoop1()) { // can't use replace because it is called in the OutputCommitter. return false; } Dataset<?> dataset = view.getDataset(); return (dataset instanceof Replaceable && ((Replaceable<View<?>>) dataset).canReplace(view)); } }
@SuppressWarnings("unchecked") private static boolean canReplace(View<?> view) { if (Hadoop.isHadoop1()) { // can't use replace because it is called in the OutputCommitter. return false; } Dataset<?> dataset = view.getDataset(); return (dataset instanceof Replaceable && ((Replaceable<View<?>>) dataset).canReplace(view)); } }
private static <E> boolean usePerTaskAttemptDatasets(View<E> target, Configuration conf) { // For performance reasons we should skip the intermediate task attempt and job output datasets if the // file system does not support efficient renaming (such as S3), and write to the target dataset directly. if (!FileSystemUtil.supportsRename(URI.create(target.getUri().getSchemeSpecificPart()), conf)) { return false; } // new API output committers are not called properly in Hadoop 1 return !Hadoop.isHadoop1() && target.getDataset() instanceof Mergeable; }
@Override @SuppressWarnings("unchecked") public void configureSource(Job job, int inputId) throws IOException { Configuration conf = job.getConfiguration(); if (inputId == -1) { job.setMapperClass(CrunchMapper.class); job.setInputFormatClass(formatBundle.getFormatClass()); formatBundle.configure(conf); } else { Path dummy = new Path("/view/" + view.getDataset().getName()); CrunchInputs.addInputPath(job, dummy, formatBundle, inputId); } }
@Override public void setConf(Configuration configuration) { conf = configuration; View<E> view = load(configuration); String partitionDir = conf.get(KITE_PARTITION_DIR); if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) { delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf); } else { delegate = getDelegateInputFormat(view, conf); } }
@Override public void setConf(Configuration configuration) { conf = configuration; View<E> view = load(configuration); String partitionDir = conf.get(KITE_PARTITION_DIR); if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) { delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf); } else { delegate = getDelegateInputFormat(view, conf); } }
public CSVRecordParser(CSVProperties props, View<E> view, @Nullable List<String> header) { this(props, view.getDataset().getDescriptor().getSchema(), view.getType(), header); }
@SuppressWarnings("unchecked") private static <E> AvroType<E> toAvroType(View<E> view, Class<E> type) { if (type.isAssignableFrom(GenericData.Record.class)) { return (AvroType<E>) Avros.generics( view.getDataset().getDescriptor().getSchema()); } else { return Avros.records(type); } }
@SuppressWarnings("unchecked") private static <T> AvroType<T> ptype(View<T> view) { Class<T> recordClass = view.getType(); if (GenericRecord.class.isAssignableFrom(recordClass)) { return (AvroType<T>) Avros.generics( view.getDataset().getDescriptor().getSchema()); } else { return Avros.records(recordClass); } }
@Override public boolean canReplace(View<E> part) { if (part instanceof FileSystemView) { return equals(part.getDataset()) && ((FileSystemView) part).getConstraints().alignedWithBoundaries(); } else if (part instanceof FileSystemDataset) { return equals(part); } return false; }
public void setView(View<E> view) { this.descriptor = view.getDataset().getDescriptor(); this.accessor = DataModelUtil.accessor(view.getType(), view.getSchema()); }
private static DatasetRepository getDatasetRepository(JobContext jobContext) { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI)); if (repo instanceof TemporaryDatasetRepositoryAccessor) { Dataset<Object> dataset = load(jobContext).getDataset(); String namespace = dataset.getNamespace(); repo = ((TemporaryDatasetRepositoryAccessor) repo) .getTemporaryRepository(namespace, getJobDatasetName(jobContext)); } return repo; }
public DatasetRecordWriter(View<E> view, boolean copyRecords) { this.datasetWriter = view.newWriter(); this.schema = view.getDataset().getDescriptor().getSchema(); this.dataModel = DataModelUtil.getDataModelForType( view.getType()); this.copyRecords = copyRecords; }
public DatasetTarget(View<E> view) { this.view = view; Configuration temp = emptyConf(); // use appendTo since handleExisting checks for existing data DatasetKeyOutputFormat.configure(temp).appendTo(view); this.formatBundle = outputBundle(temp); this.uri = view.getDataset().getUri(); }