/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the complete table-schema, including the record-schema *and* the partitioning schema. * @param conf the Configuration object * @return the table schema, including the record-schema and partitioning schema. * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchemaWithPartitionColumns(Configuration conf) throws IOException { return getJobInfo(conf).getTableInfo().getAllColumns(); }
/** * Specifies a SQL-like filter condition on the table's partition columns. * Filter conditions on non-partition columns are invalid. * A partition filter can significantly reduce the amount of data to be read. * * @param filter A SQL-like filter condition on the table's partition columns. * @return This InputFormat with specified partition filter. * @throws java.io.IOException */ public HCatInputFormatBase<T> withFilter(String filter) throws IOException { // set filter this.hCatInputFormat.setFilter(filter); return this; }
/** * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call * on the specified job context. * Note: This is the record-schema for the table. It does not include the table's partition columns. * @param conf the Configuration object * @return the table schema, excluding partition columns * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context */ public static HCatSchema getTableSchema(Configuration conf) throws IOException { OutputJobInfo jobInfo = getJobInfo(conf); return jobInfo.getTableInfo().getDataColumns(); }
@Override public void abortTask(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { FileOutputFormatContainer.setWorkOutputPath(context); getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); } else { try { TaskCommitContextRegistry.getInstance().abortTask(context); } finally { TaskCommitContextRegistry.getInstance().discardCleanupFor(context); } } }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { //this needs to be manually set, under normal circumstances MR Task does this setWorkOutputPath(context); return new FileOutputCommitterContainer(context, HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed() ? null : new JobConf(context.getConfiguration()).getOutputCommitter()); }
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName) throws IOException { return setInput(conf, dbName, tableName, null); }
@Override public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { if (!dynamicPartitioningUsed) { FileOutputFormatContainer.setWorkOutputPath(context); return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); } else { // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default return true; } }
@Override public void setupJob(JobContext context) throws IOException { if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); } // in dynamic usecase, called through FileRecordWriterContainer }
/** * Gets the table schema. * @return the table schema */ public HCatSchema getTableSchema() { assert this.partitionInfo.getTableInfo() != null : "TableInfo should have been set at this point."; return this.partitionInfo.getTableInfo().getAllColumns(); }
@Override protected LocalFileWriter getLocalFileWriter(HCatRecord value) throws IOException, HCatException { return new LocalFileWriter(getBaseRecordWriter(), objectInspector, serDe, jobInfo); } }
@Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { getBaseRecordWriter().close(InternalUtil.createReporter(context)); }
@Override public InputFormat<?, ?> getInputFormat() throws IOException { if (hcatInputFormat == null) { hcatInputFormat = new HCatInputFormat(); } return hcatInputFormat; }
/** * Gets the data schema. * @return the table schema */ public HCatSchema getDataSchema() { return this.partitionInfo.getPartitionSchema(); }
/** * Configure the output storage handler * @param jobContext the job context * @throws IOException */ @SuppressWarnings("unchecked") static void configureOutputStorageHandler( JobContext jobContext) throws IOException { configureOutputStorageHandler(jobContext, (List<String>) null); }
protected void configureDynamicStorageHandler(JobContext context, List<String> dynamicPartVals) throws IOException { HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); } }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new MultiOutputCommitter(context); }
@Override public RecordWriter<Writable, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new MultiRecordWriter(context); }
public TestHCatExternalDynamicPartitioned(String formatName, String serdeClass, String inputFormatClass, String outputFormatClass) throws Exception { super(formatName, serdeClass, inputFormatClass, outputFormatClass); tableName = "testHCatExternalDynamicPartitionedTable_" + formatName; generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); generateDataColumns(); }
public TestHCatDynamicPartitioned(String formatName, String serdeClass, String inputFormatClass, String outputFormatClass) throws Exception { super(formatName, serdeClass, inputFormatClass, outputFormatClass); tableName = "testHCatDynamicPartitionedTable_" + formatName; generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); generateDataColumns(); }
Job runMRCreate(Map<String, String> partitionValues, List<HCatFieldSchema> partitionColumns, List<HCatRecord> records, int writeCount, boolean assertWrite) throws Exception { return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true, null); }