/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String datasetName, Map<String, String> arguments) { return fromDataset(datasetName, arguments, null); }
/** * Creates a {@link JavaRDD} that represents all events from the given stream. * * @param namespace namespace in which the stream exists * @param streamName name of the stream * @return A new {@link JavaRDD} instance that reads from the given stream * @throws DatasetInstantiationException if the stream doesn't exist */ public JavaRDD<StreamEvent> fromStream(String namespace, String streamName) { return fromStream(namespace, streamName, 0, Long.MAX_VALUE); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String datasetName) { saveAsDataset(rdd, datasetName, Collections.<String, String>emptyMap()); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String datasetName) { saveAsDataset(rdd, datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String datasetName, Map<String, String> arguments) { return fromDataset(datasetName, arguments, null); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String datasetName) { saveAsDataset(rdd, datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaRDD} that represents all events from the given stream. * * @param streamName name of the stream * @return A new {@link JavaRDD} instance that reads from the given stream * @throws DatasetInstantiationException if the stream doesn't exist */ public JavaRDD<StreamEvent> fromStream(String streamName) { return fromStream(streamName, 0, Long.MAX_VALUE); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String datasetName, Map<String, String> arguments) { return fromDataset(datasetName, arguments, null); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param namespace the namespace in which the specified dataset is to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String namespace, String datasetName) { saveAsDataset(rdd, namespace, datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaPairRDD} that represents all events from the given stream. The key in the * resulting {@link JavaPairRDD} is the event timestamp. The stream body will * be decoded as the give value type. Currently it supports {@link Text}, {@link String} and {@link ByteWritable}. * * @param streamName name of the stream * @param valueType type of the stream body to decode to * @return A new {@link JavaRDD} instance that reads from the given stream * @throws DatasetInstantiationException if the stream doesn't exist */ public <V> JavaPairRDD<Long, V> fromStream(String streamName, Class<V> valueType) { return fromStream(streamName, 0, Long.MAX_VALUE, valueType); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param namespace namespace in which the dataset exists * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String namespace, String datasetName, Map<String, String> arguments) { return fromDataset(namespace, datasetName, arguments, null); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param namespace the namespace in which the specified dataset is to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String namespace, String datasetName) { saveAsDataset(rdd, namespace, datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaPairRDD} that represents all events from the given stream. The key in the * resulting {@link JavaPairRDD} is the event timestamp. The stream body will * be decoded as the give value type. Currently it supports {@link Text}, {@link String} and {@link ByteWritable}. * * @param namespace namespace in which the stream exists * @param streamName name of the stream * @param valueType type of the stream body to decode to * @return A new {@link JavaRDD} instance that reads from the given stream * @throws DatasetInstantiationException if the stream doesn't exist */ public <V> JavaPairRDD<Long, V> fromStream(String namespace, String streamName, Class<V> valueType) { return fromStream(namespace, streamName, 0, Long.MAX_VALUE, valueType); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param namespace namespace in which the dataset exists * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String namespace, String datasetName, Map<String, String> arguments) { return fromDataset(namespace, datasetName, arguments, null); }
/** * Saves the given {@link JavaPairRDD} to the given {@link Dataset}. * * @param rdd the {@link JavaPairRDD} to be saved * @param namespace the namespace in which the specified dataset is to be saved * @param datasetName name of the Dataset * @throws DatasetInstantiationException if the Dataset doesn't exist */ public <K, V> void saveAsDataset(JavaPairRDD<K, V> rdd, String namespace, String datasetName) { saveAsDataset(rdd, namespace, datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaPairRDD} that represents all events from the given stream. * The first entry in the pair is a {@link Long}, representing the * event timestamp, while the second entry is a {@link GenericStreamEventData}, * which contains data decoded from the stream event body base on * the given {@link FormatSpecification}. * * @param streamName name of the stream * @param formatSpec the {@link FormatSpecification} describing the format in the stream * @param <T> value type * @return a new {@link JavaPairRDD} instance that reads from the given stream. * @throws DatasetInstantiationException if the Stream doesn't exist */ public <T> JavaPairRDD<Long, GenericStreamEventData<T>> fromStream(String streamName, FormatSpecification formatSpec, Class<T> dataType) { return fromStream(streamName, formatSpec, 0, Long.MAX_VALUE, dataType); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset} with the given set of dataset arguments. * * @param namespace namespace in which the dataset exists * @param datasetName name of the dataset * @param arguments arguments for the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String namespace, String datasetName, Map<String, String> arguments) { return fromDataset(namespace, datasetName, arguments, null); }
/** * Creates a {@link JavaPairRDD} that represents all events from the given stream. * The first entry in the pair is a {@link Long}, representing the * event timestamp, while the second entry is a {@link GenericStreamEventData}, * which contains data decoded from the stream event body base on * the given {@link FormatSpecification}. * * @param namespace namespace in which the stream exists * @param streamName name of the stream * @param formatSpec the {@link FormatSpecification} describing the format in the stream * @param <T> value type * @return a new {@link JavaPairRDD} instance that reads from the given stream. * @throws DatasetInstantiationException if the Stream doesn't exist */ public <T> JavaPairRDD<Long, GenericStreamEventData<T>> fromStream(String namespace, String streamName, FormatSpecification formatSpec, Class<T> dataType) { return fromStream(namespace, streamName, formatSpec, 0, Long.MAX_VALUE, dataType); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset}. * * @param datasetName name of the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String datasetName) { return fromDataset(datasetName, Collections.<String, String>emptyMap()); }
/** * Creates a {@link JavaPairRDD} from the given {@link Dataset}. * * @param datasetName name of the dataset * @param <K> key type * @param <V> value type * @return A new {@link JavaPairRDD} instance that reads from the given dataset * @throws DatasetInstantiationException if the dataset doesn't exist */ public <K, V> JavaPairRDD<K, V> fromDataset(String datasetName) { return fromDataset(datasetName, Collections.<String, String>emptyMap()); }