protected HCatSchema getTableSchema() throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "hcat mapreduce read schema test"); job.setJarByClass(this.getClass()); // input/output settings job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, dbName, tableName); return HCatInputFormat.getTableSchema(job.getConfiguration()); }
/** * Creates a HCatInputFormat for the given database, table, and * {@link org.apache.hadoop.conf.Configuration}. * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}. * The return type of the InputFormat can be changed to Flink-native tuples by calling * {@link HCatInputFormatBase#asFlinkTuples()}. * * @param database The name of the database to read from. * @param table The name of the table to read. * @param config The Configuration for the InputFormat. * @throws java.io.IOException */ public HCatInputFormatBase(String database, String table, Configuration config) throws IOException { super(); this.configuration = config; HadoopUtils.mergeHadoopConf(this.configuration); this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table); this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration); // configure output schema of HCatFormat configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema)); // set type information this.resultType = new WritableTypeInfo(DefaultHCatRecord.class); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); IIManager mgr = IIManager.getInstance(config); IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME)); IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW); this.info = new TableRecordInfo(seg); this.rec = this.info.createTableRecord(); outputKey = new LongWritable(); outputValue = new ImmutableBytesWritable(rec.getBytes()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); fields = schema.getFields(); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); List<TblColRef> columns = baseCuboid.getColumns(); ArrayList<Integer> factDictCols = new ArrayList<Integer>(); RowKeyDesc rowkey = cubeDesc.getRowkey(); DictionaryManager dictMgr = DictionaryManager.getInstance(config); for (int i = 0; i < columns.size(); i++) { TblColRef col = columns.get(i); if (rowkey.isUseDictionary(col) == false) continue; String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getModel().isFactTable(scanTable)) { factDictCols.add(i); } } this.factDictCols = new int[factDictCols.size()]; for (int i = 0; i < factDictCols.size(); i++) this.factDictCols[i] = factDictCols.get(i); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); columnSize = schema.getFields().size(); }
try { HCatInputFormat.setInput(conf, database == null ? "default" : database, table, partitionFilter); HCatSchema tableSchema = HCatInputFormat.getTableSchema(conf); columns = tableSchema.getFields(); } catch (IOException exc) {
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); IIManager mgr = IIManager.getInstance(config); IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME)); IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW); this.info = new TableRecordInfo(seg); this.rec = this.info.createTableRecord(); outputKey = new LongWritable(); outputValue = new ImmutableBytesWritable(rec.getBytes()); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); fields = schema.getFields(); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); List<TblColRef> columns = baseCuboid.getColumns(); ArrayList<Integer> factDictCols = new ArrayList<Integer>(); RowKeyDesc rowkey = cubeDesc.getRowkey(); DictionaryManager dictMgr = DictionaryManager.getInstance(config); for (int i = 0; i < columns.size(); i++) { TblColRef col = columns.get(i); if (rowkey.isUseDictionary(col) == false) continue; String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getModel().isFactTable(scanTable)) { factDictCols.add(i); } } this.factDictCols = new int[factDictCols.size()]; for (int i = 0; i < factDictCols.size(); i++) this.factDictCols[i] = factDictCols.get(i); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); }