private DatasetProperties createProperties(OutputFormatProvider outputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(tpfsSinkConfig.basePath)) { properties.setBasePath(tpfsSinkConfig.basePath); } properties.setOutputFormat(outputFormatProvider.getOutputFormatClassName()); for (Map.Entry<String, String> formatProperty : outputFormatProvider.getOutputFormatConfiguration().entrySet()) { properties.setOutputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
private DatasetProperties createProperties(OutputFormatProvider outputFormatProvider) { FileSetProperties.Builder fileProperties = SnapshotFileSet.getBaseProperties(config); addFileProperties(fileProperties); fileProperties.setOutputFormat(outputFormatProvider.getOutputFormatClassName()); for (Map.Entry<String, String> formatProperty : outputFormatProvider.getOutputFormatConfiguration().entrySet()) { fileProperties.setOutputProperty(formatProperty.getKey(), formatProperty.getValue()); } return fileProperties.build(); }
.setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); dsFramework.addInstance("fileSet", rtOutput1, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); runtimeArguments = Maps.newHashMap();
.setEnableExploreOnCreate(true) .setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class) .setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",") .setExploreFormat("csv") .setExploreSchema("key string, value string")
.setOutputProperty(DatasetSystemMetadataProvider.FILESET_AVRO_SCHEMA_OUTPUT_KEY, "avro-output-schema") .build(); assertDatasetSchema("avro-output-schema", filesetAvroOutputProps); .setOutputProperty(DatasetSystemMetadataProvider.FILESET_AVRO_SCHEMA_OUTPUT_KEY, "avro-output-schema") .add(DatasetProperties.SCHEMA, "avro-schema") .build(); .setOutputProperty(DatasetSystemMetadataProvider.FILESET_PARQUET_SCHEMA_OUTPUT_KEY, "parquet-output-schema") .build(); assertDatasetSchema("parquet-output-schema", filesetParquetProps); .setOutputProperty(DatasetSystemMetadataProvider.FILESET_PARQUET_SCHEMA_OUTPUT_KEY, "parquet-output-schema") .add(DatasetProperties.SCHEMA, "parquet-schema") .build();
.setOutputProperty(TextOutputFormat.SEPERATOR, ",") .build()); .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
@Override public void configure() { try { createDataset("fs", FileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(MyTextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("tpfs", TimePartitionedFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("myfs", MyFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); addSpark(new FileCountSparkProgram()); addSpark(new ScalaFileCountSparkProgram()); } catch (Throwable t) { throw Throwables.propagate(t); } }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); Class<? extends InputFormat> inputFormatClass = getConfig().isUseCombineFileInputFormat() ? CombineTextInputFormat.class : TextInputFormat.class; createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder() .addStringField("type") .addLongField("time") .build()) // properties for file set .setBasePath("partitioned") .setInputFormat(inputFormatClass) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); String inputDataset = getConfig().inputDataset; String outputDataset = getConfig().outputDataset; createDataset(inputDataset, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); if (!outputDataset.equals(inputDataset)) { createDataset(outputDataset, "fileSet", FileSetProperties.builder() .setBasePath("foo/my-file-output") .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); } addMapReduce(new ComputeSum(getConfig())); }
@Override public void configure() { setName("AppWithMapReduceUsingMultipleInputs"); setDescription("Application with MapReduce job using multiple inputs"); createDataset(PURCHASES, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(PURCHASES2, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(CUSTOMERS, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(OUTPUT_DATASET, "fileSet", FileSetProperties.builder() .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ") .build()); addMapReduce(new ComputeSum()); addMapReduce(new InvalidMapReduce()); }
@Override public void configure() { addService(new PartitionService()); // Create a partitioned file set, configure it to work with MapReduce and with Explore createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("f1 STRING, f2 INT") .setDescription("App for testing authorization in partitioned filesets.") .build()); }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); createDataset(TIME_PARTITIONED, "timePartitionedFileSet", FileSetProperties.builder() // properties for file set .setBasePath("partitioned") .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Override public void configure() { createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY); // create two pfs, identical except for their (table) names for (String name : new String[] { PFS, OTHER }) { createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("number").build()) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setEnableExploreOnCreate(true) .setExploreTableName(name) .setExploreSchema("key STRING, value STRING") .setExploreFormat("csv") .build()); } addMapReduce(new PartitionWriterMR()); }
@Override public void configure() { setName("AppWithMapReduceUsingMultipleOutputs"); setDescription("Application with MapReduce job using multiple outputs"); createDataset(PURCHASES, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(SEPARATED_PURCHASES, "fileSet", FileSetProperties.builder() .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ") .build()); addMapReduce(new SeparatePurchases()); addMapReduce(new InvalidMapReduce()); }
@Override protected void configure() { setName(SPARK); setMainClass(getClass()); createDataset(SPARK_INPUT, FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset(SPARK_TABLE, Table.class); usePlugin("t1", "n1", "plugin", PluginProperties.builder().add(KEY, TEST).build()); }