private int getReadCSVSpeed(String partitionID) { return (int)(totalRecords / getCsvInputStepTime(partitionID)); }
private double getTotalTime(String partitionID) { this.totalTime = getLoadCsvfilesToDfTime() + getDicShuffleAndWriteFileTotalTime() + getLruCacheLoadTime() + getDictionaryValuesTotalTime(partitionID) + getDictionaryValue2MdkAdd2FileTime(partitionID); return totalTime; }
private int getGenSurKeySpeed(String partitionID) { return (int)(totalRecords / getGeneratingDictionaryValuesTime(partitionID)); }
private int getGenDicSpeed() { return (int)(totalRecords / getLoadCsvfilesToDfTime() + getDicShuffleAndWriteFileTotalTime()); }
private void printLoadSpeedInfo(String partitionID) { LOGGER.info("===============Load_Speed_Info==============="); LOGGER.info("Total Num of Records Processed: " + getTotalRecords()); LOGGER.info("Total Time Cost: " + getTotalTime(partitionID) + "(s)"); LOGGER.info("Total Load Speed: " + getLoadSpeed() + "records/s"); LOGGER.info("Generate Dictionaries Speed: " + getGenDicSpeed() + "records/s"); LOGGER.info("Read CSV Speed: " + getReadCSVSpeed(partitionID) + " records/s"); LOGGER.info("Generate Surrogate Key Speed: " + getGenSurKeySpeed(partitionID) + " records/s"); LOGGER.info("Sort Key/Write Temp Files Speed: " + getSortKeySpeed(partitionID) + " records/s"); LOGGER.info("MDK Step Speed: " + getMDKSpeed(partitionID) + " records/s"); LOGGER.info("============================================="); }
private void printDictionaryValuesGenStatisticsInfo(String partitionID) { double dictionaryValuesTotalTime = getDictionaryValuesTotalTime(partitionID); LOGGER.info("STAGE 4 ->Total cost of gen dictionary values, sort and write to temp files: " + dictionaryValuesTotalTime + "(s)"); double csvInputStepTime = getCsvInputStepTime(partitionID); double generatingDictionaryValuesTime = getGeneratingDictionaryValuesTime(partitionID); LOGGER.info("STAGE 4.1 -> |_read csv file: " + csvInputStepTime + "(s)"); LOGGER.info("STAGE 4.2 -> |_transform to surrogate key: " + generatingDictionaryValuesTime + "(s)"); }
private void printHostBlockMapInfo() { LOGGER.info("========== BLOCK_INFO =========="); if (getHostBlockMap().size() > 0) { for (String host: getHostBlockMap().keySet()) { LOGGER.info("BLOCK_INFO ->Node host: " + host); LOGGER.info("BLOCK_INFO ->The block count in this node: " + getHostBlockMap().get(host)); } } else if (getPartitionBlockMap().size() > 0) { for (String parID: getPartitionBlockMap().keySet()) { LOGGER.info("BLOCK_INFO ->Partition ID: " + parID); LOGGER.info("BLOCK_INFO ->The block count in this partition: " + getPartitionBlockMap().get(parID)); } } }
private int getMDKSpeed(String partitionID) { return (int)(totalRecords / getDictionaryValue2MdkAdd2FileTime(partitionID)); }
private static LoadStatistics genLoadStatisticsInstance() { if (loadStatisticsInstanceType.equalsIgnoreCase("true")) { return CarbonLoadStatisticsImpl.getInstance(); } else { return CarbonLoadStatisticsDummy.getInstance(); } }
private void printDicGenStatisticsInfo() { double loadCsvfilesToDfTime = getLoadCsvfilesToDfTime(); LOGGER.info("STAGE 1 ->Load csv to DataFrame and generate" + " block distinct values: " + loadCsvfilesToDfTime + "(s)"); double dicShuffleAndWriteFileTotalTime = getDicShuffleAndWriteFileTotalTime(); LOGGER.info("STAGE 2 ->Global dict shuffle and write dict file: " + + dicShuffleAndWriteFileTotalTime + "(s)"); }
private void printGenMdkStatisticsInfo(String partitionID) { double dictionaryValue2MdkAdd2FileTime = getDictionaryValue2MdkAdd2FileTime(partitionID); LOGGER.info("STAGE 5 ->Transform to MDK, compress and write fact files: " + dictionaryValue2MdkAdd2FileTime + "(s)"); }