/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Configuration conf, String dbName, String tableName) throws IOException { return setInput(conf, dbName, tableName, null); }
@Override public Iterator<HCatRecord> read() throws HCatException { HCatInputFormat inpFmt = new HCatInputFormat(); RecordReader<WritableComparable, HCatRecord> rr; try { TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); rr = inpFmt.createRecordReader(split, cntxt); rr.initialize(split, cntxt); } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } return new HCatRecordItr(rr); }
@Override public ReaderContext prepareRead() throws HCatException { try { Job job = new Job(conf); HCatInputFormat hcif = HCatInputFormat.setInput( job, re.getDbName(), re.getTableName(), re.getFilterString()); ReaderContextImpl cntxt = new ReaderContextImpl(); cntxt.setInputSplits(hcif.getSplits( ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); cntxt.setConf(job.getConfiguration()); return cntxt; } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e);
@Override public InputFormat<?, ?> getInputFormat() throws IOException { if (hcatInputFormat == null) { hcatInputFormat = new HCatInputFormat(); } return hcatInputFormat; }
@Test public void testGetPartitionAndDataColumns() throws Exception { Configuration conf = new Configuration(); Job myJob = new Job(conf, "hcatTest"); HCatInputFormat.setInput(myJob, "default", "testHCIFMethods"); HCatSchema cols = HCatInputFormat.getDataColumns(myJob.getConfiguration()); Assert.assertTrue(cols.getFields() != null); Assert.assertEquals(cols.getFields().size(), 2); Assert.assertTrue(cols.getFields().get(0).getName().equals("a")); Assert.assertTrue(cols.getFields().get(1).getName().equals("b")); Assert.assertTrue(cols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING)); Assert.assertTrue(cols.getFields().get(1).getType().equals(HCatFieldSchema.Type.INT)); HCatSchema pcols = HCatInputFormat.getPartitionColumns(myJob.getConfiguration()); Assert.assertTrue(pcols.getFields() != null); Assert.assertEquals(pcols.getFields().size(), 2); Assert.assertTrue(pcols.getFields().get(0).getName().equals("x")); Assert.assertTrue(pcols.getFields().get(1).getName().equals("y")); Assert.assertTrue(pcols.getFields().get(0).getType().equals(HCatFieldSchema.Type.STRING)); Assert.assertTrue(pcols.getFields().get(1).getType().equals(HCatFieldSchema.Type.STRING)); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
@Override public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException { configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits); JobContext jobContext = new JobContextImpl(configuration, new JobID()); List<InputSplit> splits; try { splits = this.hCatInputFormat.getSplits(jobContext); } catch (InterruptedException e) { throw new IOException("Could not get Splits.", e); } HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()]; for (int i = 0; i < hadoopInputSplits.length; i++){ hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext); } return hadoopInputSplits; }
@Override public void open(HadoopInputSplit split) throws IOException { TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID()); try { this.recordReader = this.hCatInputFormat .createRecordReader(split.getHadoopInputSplit(), context); this.recordReader.initialize(split.getHadoopInputSplit(), context); } catch (InterruptedException e) { throw new IOException("Could not create RecordReader.", e); } finally { this.fetched = false; } }
protected HCatSchema getTableSchema() throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read schema test"); job.setJarByClass(this.getClass()); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName); return HCatInputFormat.getTableSchema(job.getConfiguration()); }
HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e); HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); outputSchema = hcatTableSchema; HCatInputFormat.setOutputSchema(job, outputSchema); } catch (Exception e) { throw new IOException(e);
@SuppressWarnings("unchecked") private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { this.fieldNames = new String[in.readInt()]; for (int i = 0; i < this.fieldNames.length; i++) { this.fieldNames[i] = in.readUTF(); } Configuration configuration = new Configuration(); configuration.readFields(in); if (this.configuration == null) { this.configuration = configuration; } this.hCatInputFormat = new org.apache.hive.hcatalog.mapreduce.HCatInputFormat(); this.outputSchema = (HCatSchema) HCatUtil.deserialize(this.configuration.get("mapreduce.lib.hcat.output.schema")); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, HiveStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); org.apache.hadoop.mapred.InputFormat inputFormat = HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, InternalUtil.createReporter(taskContext)); }
/** * Initializes the input with a null filter. * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String, String)} */ public static HCatInputFormat setInput( Job job, String dbName, String tableName) throws IOException { return setInput(job.getConfiguration(), dbName, tableName, null); }
@Override public ReaderContext prepareRead() throws HCatException { try { Job job = new Job(conf); HCatInputFormat hcif = HCatInputFormat.setInput( job, re.getDbName(), re.getTableName(), re.getFilterString()); ReaderContextImpl cntxt = new ReaderContextImpl(); cntxt.setInputSplits(hcif.getSplits( ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); cntxt.setConf(job.getConfiguration()); return cntxt; } catch (IOException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } catch (InterruptedException e) { throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); } }
List<HCatFieldSchema> columns; try { HCatInputFormat.setInput(conf, database == null ? "default" : database, table, partitionFilter); HCatSchema tableSchema = HCatInputFormat.getTableSchema(conf); columns = tableSchema.getFields(); } catch (IOException exc) {