org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema java code examples

protected HCatSchema getTableSchema() throws Exception {
 Configuration conf = new Configuration();
 Job job = new Job(conf, "hcat mapreduce read schema test");
 job.setJarByClass(this.getClass());
 // input/output settings
 job.setInputFormatClass(HCatInputFormat.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 HCatInputFormat.setInput(job, dbName, tableName);
 return HCatInputFormat.getTableSchema(job.getConfiguration());
}

/**
 * Creates a HCatInputFormat for the given database, table, and
 * {@link org.apache.hadoop.conf.Configuration}.
 * By default, the InputFormat returns {@link org.apache.hive.hcatalog.data.HCatRecord}.
 * The return type of the InputFormat can be changed to Flink-native tuples by calling
 * {@link HCatInputFormatBase#asFlinkTuples()}.
 *
 * @param database The name of the database to read from.
 * @param table The name of the table to read.
 * @param config The Configuration for the InputFormat.
 * @throws java.io.IOException
 */
public HCatInputFormatBase(String database, String table, Configuration config) throws IOException {
  super();
  this.configuration = config;
  HadoopUtils.mergeHadoopConf(this.configuration);
  this.hCatInputFormat = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.setInput(this.configuration, database, table);
  this.outputSchema = org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema(this.configuration);
  // configure output schema of HCatFormat
  configuration.set("mapreduce.lib.hcat.output.schema", HCatUtil.serialize(outputSchema));
  // set type information
  this.resultType = new WritableTypeInfo(DefaultHCatRecord.class);
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  columnSize = schema.getFields().size();
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  columnSize = schema.getFields().size();
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
  IIManager mgr = IIManager.getInstance(config);
  IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
  IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
  this.info = new TableRecordInfo(seg);
  this.rec = this.info.createTableRecord();
  outputKey = new LongWritable();
  outputValue = new ImmutableBytesWritable(rec.getBytes());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  
  fields = schema.getFields();
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
  cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
  cube = CubeManager.getInstance(config).getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
  long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
  Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
  List<TblColRef> columns = baseCuboid.getColumns();
  ArrayList<Integer> factDictCols = new ArrayList<Integer>();
  RowKeyDesc rowkey = cubeDesc.getRowkey();
  DictionaryManager dictMgr = DictionaryManager.getInstance(config);
  for (int i = 0; i < columns.size(); i++) {
    TblColRef col = columns.get(i);
    if (rowkey.isUseDictionary(col) == false)
      continue;
    String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
    if (cubeDesc.getModel().isFactTable(scanTable)) {
      factDictCols.add(i);
    }
  }
  this.factDictCols = new int[factDictCols.size()];
  for (int i = 0; i < factDictCols.size(); i++)
    this.factDictCols[i] = factDictCols.get(i);
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  columnSize = schema.getFields().size();
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  columnSize = schema.getFields().size();
}

try {
 HCatInputFormat.setInput(conf, database == null ? "default" : database, table, partitionFilter);
 HCatSchema tableSchema = HCatInputFormat.getTableSchema(conf);
 columns = tableSchema.getFields();
} catch (IOException exc) {

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
  IIManager mgr = IIManager.getInstance(config);
  IIInstance ii = mgr.getII(conf.get(BatchConstants.CFG_II_NAME));
  IISegment seg = ii.getSegment(conf.get(BatchConstants.CFG_II_SEGMENT_NAME), SegmentStatusEnum.NEW);
  this.info = new TableRecordInfo(seg);
  this.rec = this.info.createTableRecord();
  outputKey = new LongWritable();
  outputValue = new ImmutableBytesWritable(rec.getBytes());
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
  fields = schema.getFields();
}

@Override
protected void setup(Context context) throws IOException {
  super.publishConfiguration(context.getConfiguration());
  Configuration conf = context.getConfiguration();
  KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf);
  cubeName = conf.get(BatchConstants.CFG_CUBE_NAME);
  cube = CubeManager.getInstance(config).getCube(cubeName);
  cubeDesc = cube.getDescriptor();
  intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null);
  long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc);
  Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId);
  List<TblColRef> columns = baseCuboid.getColumns();
  ArrayList<Integer> factDictCols = new ArrayList<Integer>();
  RowKeyDesc rowkey = cubeDesc.getRowkey();
  DictionaryManager dictMgr = DictionaryManager.getInstance(config);
  for (int i = 0; i < columns.size(); i++) {
    TblColRef col = columns.get(i);
    if (rowkey.isUseDictionary(col) == false)
      continue;
    String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0];
    if (cubeDesc.getModel().isFactTable(scanTable)) {
      factDictCols.add(i);
    }
  }
  this.factDictCols = new int[factDictCols.size()];
  for (int i = 0; i < factDictCols.size(); i++)
    this.factDictCols[i] = factDictCols.get(i);
  schema = HCatInputFormat.getTableSchema(context.getConfiguration());
}

Popular methods of HCatInputFormat

setInput
Initializes the input with a provided filter. See #setInput(org.apache.hadoop.conf.Configuration,Str
<init>
createRecordReader
getSplits
getMapRedInputFormat
setFilter
setOutputSchema
getDataColumns
Return data columns for this input, can only be called after setInput is called.
getPartitionColumns
Return partitioning columns for this input, can only be called after setInput is called.

Popular in Java

Reading from database using SQL prepared statement
compareTo (BigDecimal)
getResourceAsStream (ClassLoader)
getContentResolver (Context)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Best IntelliJ plugins

How to use getTableSchemamethodin org.apache.hive.hcatalog.mapreduce.HCatInputFormat

Best Java code snippets using org.apache.hive.hcatalog.mapreduce.HCatInputFormat.getTableSchema (Showing top 11 results out of 315)

How to use
getTableSchema
method
in
org.apache.hive.hcatalog.mapreduce.HCatInputFormat