public PartitionDesc(final Partition part) throws HiveException { this(part, getTableDesc(part.getTable())); }
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 final JobConf cloneJobConf = new JobConf(jobConf); final PartitionDesc part = pathToPartitionInfo.get(path); if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); } pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath()); return cloneJobConf; } }
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 final JobConf cloneJobConf = new JobConf(jobConf); final PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, null, false, true); try { if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); } } catch (Exception e) { throw new IOException(e); } pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath()); return cloneJobConf; } }
private VectorPartitionContext(PartitionDesc partDesc) { this.partDesc = partDesc; TableDesc td = partDesc.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); Map<String, String> partSpec = partDesc.getPartSpec(); tableName = String.valueOf(overlayedProps.getProperty("name")); partName = String.valueOf(partSpec); }
private boolean needConversion(PartitionDesc partitionDesc) { boolean isAcid = AcidUtils.isTablePropertyTransactional(partitionDesc.getTableDesc().getProperties()); if (Utilities.isSchemaEvolutionEnabled(job, isAcid) && Utilities.isInputFileFormatSelfDescribing(partitionDesc)) { return false; } return needConversion(partitionDesc.getTableDesc(), Arrays.asList(partitionDesc)); }
private static DruidSerDe createAndInitializeSerde(Configuration jobConf) { DruidSerDe serDe = new DruidSerDe(); MapWork mapWork = Preconditions.checkNotNull(Utilities.getMapWork(jobConf), "Map work is null"); Properties properties = mapWork.getPartitionDescs() .stream() .map(partitionDesc -> partitionDesc.getTableDesc().getProperties()) .findAny() .orElseThrow(() -> new RuntimeException("Can not find table property at the map work")); try { serDe.initialize(jobConf, properties, null); } catch (SerDeException e) { throw new RuntimeException("Can not initialized the serde", e); } return serDe; } }
@SuppressWarnings("Duplicates") private static KafkaSerDe createAndInitializeSerde(Configuration jobConf) { KafkaSerDe serDe = new KafkaSerDe(); MapWork mapWork = Preconditions.checkNotNull(Utilities.getMapWork(jobConf), "Map work is null"); Properties properties = mapWork.getPartitionDescs() .stream() .map(partitionDesc -> partitionDesc.getTableDesc().getProperties()) .findAny() .orElseThrow(() -> new RuntimeException("Can not find table property at the map work")); try { serDe.initialize(jobConf, properties, null); } catch (SerDeException e) { throw new RuntimeException("Can not initialized the serde", e); } return serDe; } }
private VectorPartitionContext(PartitionDesc partDesc) { this.partDesc = partDesc; TableDesc td = partDesc.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); Map<String, String> partSpec = partDesc.getPartSpec(); tableName = String.valueOf(overlayedProps.getProperty("name")); partName = String.valueOf(partSpec); }
private boolean needConversion(PartitionDesc partitionDesc) { boolean isAcid = AcidUtils.isTablePropertyTransactional(partitionDesc.getTableDesc().getProperties()); if (Utilities.isSchemaEvolutionEnabled(job, isAcid) && Utilities.isInputFileFormatSelfDescribing(partitionDesc)) { return false; } return needConversion(partitionDesc.getTableDesc(), Arrays.asList(partitionDesc)); }
private boolean getNextPath() throws Exception { while (iterPath.hasNext()) { currPath = iterPath.next(); currDesc = iterPartDesc.next(); if (isNonNativeTable) { return true; } FileSystem fs = currPath.getFileSystem(job); if (fs.exists(currPath)) { if (extractValidWriteIdList() != null && AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) { return true; } for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { if (fStat.getLen() > 0) { return true; } } } } return false; }
private void processCurrPathForMmWriteIds(InputFormat inputFormat, List<Path> dirs, List<Path> dirsWithOriginals) throws IOException { if (inputFormat instanceof HiveInputFormat) { dirs.add(currPath); // No need to process here. } ValidWriteIdList validWriteIdList; if (AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) { validWriteIdList = extractValidWriteIdList(); } else { validWriteIdList = null; // non-MM case } if (validWriteIdList != null) { Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths"); } HiveInputFormat.processPathsForMmRead( Lists.newArrayList(currPath), job, validWriteIdList, dirs, dirsWithOriginals); }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, PartitionDesc partDesc, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); LOG.info("Changed input file {} to empty file {} ({})", strPath, newPath, oneRow); return newPath; }
@Override public void configureJobConf(JobConf job) { for (PartitionDesc partition : aliasToPartnInfo.values()) { PlanUtils.configureJobConf(partition.getTableDesc(), job); } Collection<Operator<?>> mappers = aliasToWork.values(); for (FileSinkOperator fs : OperatorUtils.findOperators(mappers, FileSinkOperator.class)) { PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job); } }
@Override public void configureJobConf(JobConf job) { for (PartitionDesc partition : aliasToPartnInfo.values()) { PlanUtils.configureJobConf(partition.getTableDesc(), job); } Collection<Operator<?>> mappers = aliasToWork.values(); for (FileSinkOperator fs : OperatorUtils.findOperators(mappers, FileSinkOperator.class)) { PlanUtils.configureJobConf(fs.getConf().getTableInfo(), job); } for (IConfigureJobConf icjc : OperatorUtils.findOperators(mappers, IConfigureJobConf.class)) { icjc.configureJobConf(job); } }
/** * Return a deserializer object corresponding to the partitionDesc. */ public Deserializer getDeserializer(Configuration conf) throws Exception { Properties schema = getProperties(); String clazzName = getDeserializerClassName(); Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(clazzName) .asSubclass(Deserializer.class), conf); SerDeUtils.initializeSerDe(deserializer, conf, getTableDesc().getProperties(), schema); return deserializer; }
/** * Return a deserializer object corresponding to the partitionDesc. */ public Deserializer getDeserializer(Configuration conf) throws Exception { Properties schema = getProperties(); String clazzName = getDeserializerClassName(); Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(clazzName) .asSubclass(Deserializer.class), conf); SerDeUtils.initializeSerDe(deserializer, conf, getTableDesc().getProperties(), schema); return deserializer; }
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception { setChildOperators(children); Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf); for (Operator<?> child : children) { TableScanOperator tsOp = (TableScanOperator) child; StructObjectInspector soi = null; PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias()); Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName()); Deserializer serde = partDesc.getTableDesc().getDeserializer(); partDesc.setProperties(partDesc.getProperties()); MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc); StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector(); initObjectInspector(newConf, opCtx, tableRowOI); soi = opCtx.rowObjectInspector; child.getParentOperators().add(this); childrenOpToOI.put(child, soi); } }
@Override public int hashCode() { final int prime = 31; int result = 1; result = result * prime + (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode()); result = result * prime + (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode()); result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode()); result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode()); result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode()); result = result * prime + (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode()); return result; }
@Override public int hashCode() { final int prime = 31; int result = 1; result = result * prime + (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode()); result = result * prime + (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode()); result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode()); result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode()); result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode()); result = result * prime + (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode()); return result; }
public PartitionDesc(final Partition part) throws HiveException { PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true); if (Utilities.isInputFileFormatSelfDescribing(this)) { // if IF is self describing no need to send column info per partition, since its not used anyway. Table tbl = part.getTable(); setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(), part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys())); } else { setProperties(part.getMetadataFromPartitionSchema()); } }