org.kitesdk.data.View.getDataset java code examples

  return Datasets.load(uri).getDataset().getDescriptor().getSchema();
} else if ("resource".equals(uri.getScheme())) {
  try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {

@Override
public void sync() throws EventDeliveryException {
 if (nEventsHandled > 0) {
  if (Formats.PARQUET.equals(
    dataset.getDataset().getDescriptor().getFormat())) {
   // We need to close the writer on sync if we're writing to a Parquet
   // dataset
   close();
  } else {
   if (writer instanceof Syncable) {
    ((Syncable) writer).sync();
   }
  }
 }
}

DatasetDescriptor descriptor = view.getDataset().getDescriptor();
Format format = descriptor.getFormat();
Preconditions.checkArgument(allowedFormats().contains(format.getName()),
  DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format));
this.datasetName = view.getDataset().getName();

final Schema schema = target.getDataset().getDescriptor().getSchema();

 @SuppressWarnings("unchecked")
 private void checkCompactable(View<T> view) {
  Dataset<T> dataset = view.getDataset();
  if (!(dataset instanceof Replaceable)) {
   throw new IllegalArgumentException("Cannot compact dataset: " + dataset);
  }
  Replaceable<View<T>> replaceable = ((Replaceable<View<T>>) dataset);
  Preconditions.checkArgument(replaceable.canReplace(view),
    "Cannot compact view: " + view);
 }
}

@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
  justification="Null case checked by precondition")
public int run() throws IOException {
 Preconditions.checkArgument(
   datasets != null && !datasets.isEmpty(),
   "Missing dataset name");
 for (String name : datasets) {
  printInfo(console, load(name, GenericRecord.class).getDataset());
 }
 return 0;
}

 @SuppressWarnings("unchecked")
 private static boolean canReplace(View<?> view) {
  if (Hadoop.isHadoop1()) {
   // can't use replace because it is called in the OutputCommitter.
   return false;
  }
  Dataset<?> dataset = view.getDataset();
  return (dataset instanceof Replaceable &&
    ((Replaceable<View<?>>) dataset).canReplace(view));
 }
}

 @SuppressWarnings("unchecked")
 private static boolean canReplace(View<?> view) {
  if (Hadoop.isHadoop1()) {
   // can't use replace because it is called in the OutputCommitter.
   return false;
  }
  Dataset<?> dataset = view.getDataset();
  return (dataset instanceof Replaceable &&
    ((Replaceable<View<?>>) dataset).canReplace(view));
 }
}

private static <E> boolean usePerTaskAttemptDatasets(View<E> target, Configuration conf) {
 // For performance reasons we should skip the intermediate task attempt and job output datasets if the
 // file system does not support efficient renaming (such as S3), and write to the target dataset directly.
 if (!FileSystemUtil.supportsRename(URI.create(target.getUri().getSchemeSpecificPart()), conf)) {
  return false;
 }
 // new API output committers are not called properly in Hadoop 1
 return !Hadoop.isHadoop1() && target.getDataset() instanceof Mergeable;
}

@Override
@SuppressWarnings("unchecked")
public void configureSource(Job job, int inputId) throws IOException {
 Configuration conf = job.getConfiguration();
 if (inputId == -1) {
  job.setMapperClass(CrunchMapper.class);
  job.setInputFormatClass(formatBundle.getFormatClass());
  formatBundle.configure(conf);
 } else {
  Path dummy = new Path("/view/" + view.getDataset().getName());
  CrunchInputs.addInputPath(job, dummy, formatBundle, inputId);
 }
}

@Override
public void setConf(Configuration configuration) {
 conf = configuration;
 View<E> view = load(configuration);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) {
  delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf);
 } else {
  delegate = getDelegateInputFormat(view, conf);
 }
}

@Override
public void setConf(Configuration configuration) {
 conf = configuration;
 View<E> view = load(configuration);
 String partitionDir = conf.get(KITE_PARTITION_DIR);
 if (view.getDataset().getDescriptor().isPartitioned() && partitionDir != null) {
  delegate = getDelegateInputFormatForPartition(view.getDataset(), partitionDir, conf);
 } else {
  delegate = getDelegateInputFormat(view, conf);
 }
}

public CSVRecordParser(CSVProperties props, View<E> view,
            @Nullable List<String> header) {
 this(props, view.getDataset().getDescriptor().getSchema(), view.getType(),
    header);
}

@SuppressWarnings("unchecked")
private static <E> AvroType<E> toAvroType(View<E> view, Class<E> type) {
 if (type.isAssignableFrom(GenericData.Record.class)) {
  return (AvroType<E>) Avros.generics(
   view.getDataset().getDescriptor().getSchema());
 } else {
  return Avros.records(type);
 }
}

@SuppressWarnings("unchecked")
private static <T> AvroType<T> ptype(View<T> view) {
 Class<T> recordClass = view.getType();
 if (GenericRecord.class.isAssignableFrom(recordClass)) {
  return (AvroType<T>) Avros.generics(
    view.getDataset().getDescriptor().getSchema());
 } else {
  return Avros.records(recordClass);
 }
}

@Override
public boolean canReplace(View<E> part) {
 if (part instanceof FileSystemView) {
  return equals(part.getDataset()) &&
    ((FileSystemView) part).getConstraints().alignedWithBoundaries();
 } else if (part instanceof FileSystemDataset) {
  return equals(part);
 }
 return false;
}

public void setView(View<E> view) {
 this.descriptor = view.getDataset().getDescriptor();
 this.accessor = DataModelUtil.accessor(view.getType(), view.getSchema());
}

private static DatasetRepository getDatasetRepository(JobContext jobContext) {
 Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
 DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI));
 if (repo instanceof TemporaryDatasetRepositoryAccessor) {
  Dataset<Object> dataset = load(jobContext).getDataset();
  String namespace = dataset.getNamespace();
  repo = ((TemporaryDatasetRepositoryAccessor) repo)
    .getTemporaryRepository(namespace, getJobDatasetName(jobContext));
 }
 return repo;
}

public DatasetRecordWriter(View<E> view, boolean copyRecords) {
 this.datasetWriter = view.newWriter();
 this.schema = view.getDataset().getDescriptor().getSchema();
 this.dataModel = DataModelUtil.getDataModelForType(
   view.getType());
 this.copyRecords = copyRecords;
}

public DatasetTarget(View<E> view) {
 this.view = view;
 Configuration temp = emptyConf();
 // use appendTo since handleExisting checks for existing data
 DatasetKeyOutputFormat.configure(temp).appendTo(view);
 this.formatBundle = outputBundle(temp);
 this.uri = view.getDataset().getUri();
}

Javadoc

Returns the underlying org.kitesdk.data.Dataset for the View.

Popular methods of View

newWriter
deleteAll
getType
getUri
newReader
getSchema
isEmpty
asSchema
asType
getCoveringPartitions
Returns an Iterable of non-overlapping View objects that partition the underlying Dataset and cover
includes
Returns whether an entity Object would be included in this View if it were present in the Dataset.

includes

Popular in Java

Updating database using SQL prepared statement
setContentView (Activity)
getApplicationContext (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Kernel (java.awt.image)
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top plugins for Android Studio

How to use getDatasetmethodin org.kitesdk.data.View

Best Java code snippets using org.kitesdk.data.View.getDataset (Showing top 20 results out of 315)

How to use
getDataset
method
in
org.kitesdk.data.View