return Datasets.load(uri).getDataset().getDescriptor().getSchema(); } else if ("resource".equals(uri.getScheme())) { try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {
DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); Preconditions.checkArgument(allowedFormats().contains(format.getName()), DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format)); this.datasetName = view.getDataset().getName(); this.writer = view.newWriter();
final Schema schema = target.getDataset().getDescriptor().getSchema(); target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true /* cannot roll back the write */);
@Override public void handle(Event event, Throwable cause) throws EventDeliveryException { try { if (writer == null) { writer = dataset.newWriter(); } final AvroFlumeEvent avroEvent = new AvroFlumeEvent(); avroEvent.setBody(ByteBuffer.wrap(event.getBody())); avroEvent.setHeaders(toCharSeqMap(event.getHeaders())); writer.write(avroEvent); nEventsHandled++; } catch (RuntimeException ex) { throw new EventDeliveryException(ex); } }
public DatasetRecordWriter(View<E> view, boolean copyRecords) { this.datasetWriter = view.newWriter(); this.schema = view.getDataset().getDescriptor().getSchema(); this.dataModel = DataModelUtil.getDataModelForType( view.getType()); this.copyRecords = copyRecords; }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
public CSVRecordParser(CSVProperties props, View<E> view, @Nullable List<String> header) { this(props, view.getDataset().getDescriptor().getSchema(), view.getType(), header); }
public void setView(View<E> view) { this.descriptor = view.getDataset().getDescriptor(); this.accessor = DataModelUtil.accessor(view.getType(), view.getSchema()); }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder writeTo(View<?> view) { if (view instanceof FileSystemDataset) { FileSystemDataset dataset = (FileSystemDataset) view; conf.set(KITE_PARTITION_DIR, String.valueOf(dataset.getDescriptor().getLocation())); } withType(view.getType()); return writeTo(view.getUri()); }
@Override public void replace(View<E> target, View<E> replacement) { DatasetDescriptor updateDescriptor = replacement.getDataset().getDescriptor(); target.getCoveringPartitions()); for (PartitionView<E> src : replacement.getCoveringPartitions()) { if (src instanceof FileSystemPartitionView) { FileSystemPartitionView<E> dest = getPartitionView( replacement.getCoveringPartitions()); List<Pair<Path, Path>> staged = FileSystemUtil.stageMove(fileSystem, new Path(srcPartition.getLocation().toString()),
@Override public Iterable<E> read(Configuration configuration) throws IOException { // TODO: what to do with Configuration? create new view? return view.newReader(); // TODO: who calls close? }
@Override public void checkOutputSpecs(JobContext jobContext) { // The committer setup will fail if the output dataset does not exist View<E> target = load(jobContext); Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); switch (conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT)) { case APPEND: break; case OVERWRITE: // if the merge won't use replace, then delete the existing data if (!canReplace(target)) { target.deleteAll(); } break; default: case DEFAULT: boolean isReady = false; if (target instanceof Signalable) { isReady = ((Signalable)target).isReady(); } if (isReady || !target.isEmpty()) { throw new DatasetException( "View is not empty or has been signaled as ready: " + target); } break; } }
@Override public int run() throws IOException { if (targets == null || targets.isEmpty()) { throw new IllegalArgumentException("No views or datasets were specified."); } for (String uriOrName : targets) { if (isViewUri(uriOrName)) { View view = Datasets.load(uriOrName); Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); view.deleteAll(); } else if (isDatasetUri(uriOrName)) { Datasets.delete(uriOrName); } else { getDatasetRepository().delete(namespace, uriOrName); } console.debug("Deleted {}", uriOrName); } return 0; }
private static boolean isHive(View<?> view) { return "hive".equals( URI.create(view.getUri().getSchemeSpecificPart()).getScheme()); }
private void delete(View view) { try { boolean deleted = view.deleteAll(); if (!deleted) { LOG.warn("No data was deleted."); } } catch (UnsupportedOperationException e) { LOG.error("Dataset view " + view + " cannot be deleted!"); throw new CrunchRuntimeException("Dataset view cannot be deleted:" + view, e); } }
public DatasetSourceTarget(View<E> view) { this(view, view.getType()); }
boolean exists = ready || !view.isEmpty(); if (exists) { switch (writeMode) {
@Override public void process(InputStream in) throws IOException { try (DataFileStream<Record> stream = new DataFileStream<>( in, AvroUtil.newDatumReader(schema, Record.class))) { IncompatibleSchemaException.check( SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema); long written = 0L; try (DatasetWriter<Record> writer = target.newWriter()) { for (Record record : stream) { writer.write(record); written += 1; } } finally { session.adjustCounter("Stored records", written, true /* cannot roll back the write */); } } } });