/** * Main entry point. */ public static void main(String[] args) throws Exception { int ret = ToolRunner.run(new HashTable(HBaseConfiguration.create()), args); System.exit(ret); }
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (!doCommandLine(otherArgs)) { return 1; } Job job = createSubmittableJob(otherArgs); writeTempManifestFile(); if (!job.waitForCompletion(true)) { LOG.info("Map-reduce job failed!"); return 1; } completeManifest(); return 0; }
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("tableName=").append(tableName); if (families != null) { sb.append(", families=").append(families); } sb.append(", batchSize=").append(batchSize); sb.append(", numHashFiles=").append(numHashFiles); if (!isTableStartRow(startRow)) { sb.append(", startRowHex=").append(Bytes.toHex(startRow)); } if (!isTableEndRow(stopRow)) { sb.append(", stopRowHex=").append(Bytes.toHex(stopRow)); } if (scanBatch >= 0) { sb.append(", scanBatch=").append(scanBatch); } if (versions >= 0) { sb.append(", versions=").append(versions); } if (startTime != 0) { sb.append("startTime=").append(startTime); } if (endTime != 0) { sb.append("endTime=").append(endTime); } return sb.toString(); }
private void hashSourceTable(TableName sourceTableName, Path testDir) throws Exception, IOException { int numHashFiles = 3; long batchSize = 100; // should be 2 batches per region int scanBatch = 1; HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration()); int code = hashTable.run(new String[] { "--batchsize=" + batchSize, "--numhashfiles=" + numHashFiles, "--scanbatch=" + scanBatch, sourceTableName.getNameAsString(), testDir.toString()}); assertEquals("hash table job failed", 0, code); FileSystem fs = TEST_UTIL.getTestFileSystem(); HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir); assertEquals(sourceTableName.getNameAsString(), tableHash.tableName); assertEquals(batchSize, tableHash.batchSize); assertEquals(numHashFiles, tableHash.numHashFiles); assertEquals(numHashFiles - 1, tableHash.partitions.size()); LOG.info("Hash table completed"); }
HashTable frequencyDistribution = new HashTable(); for(int i=0; i<array.length; i++) { int key = array[i]; if( !frequencyDistribution.containsKey( key ) ) frequencyDistribution.add( key, 0 ); frequencyDistribution[ key ]++; } int modeKey; int modeCnt = int.MIN; foreach(int key in frequencyDistribution.keys) { int cnt = frequencyDistribution[key]; if( cnt > modeCnt ) modeKey = key; } print("Most frequent is: {0} as it appears {1} times.", modeKey, modeCnt);
public boolean findCommon(int[] arr1, int[] arr2) { HashTable hash = new HashTable(); for (item : arr1){ if !(hash.containsKey(item)){ hash.put(item, "foo"); } } for (item : arr2){ if (hash.containsKey(item)){ return(true); } } return(false); }
//initialize hash table HashTable ht = new HashTable(); //instead of outputBuffer.append, put the values in to the table like ht.put("NEPE", value); ht.put("DK0", value); //etc //print the values after the while loop System.out.println("MREDID:" + ht.get("MREDID")); System.out.println("DK0:" + ht.get("DK0")); System.out.println("NEPE:" + ht.get("NEPE"));
private void completeManifest() throws IOException { Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME); Path manifestPath = new Path(destPath, MANIFEST_FILE_NAME); FileSystem fs = tempManifestPath.getFileSystem(getConf()); fs.rename(tempManifestPath, manifestPath); }
TableSplit split = (TableSplit) context.getInputSplit(); byte[] splitEndRow = split.getEndRow(); boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
private boolean doCommandLine(final String[] args) { if (args.length < NUM_ARGS) { printUsage(null); return false; String cmd = args[i]; if (cmd.equals("-h") || cmd.startsWith("--h")) { printUsage(null); return false; printUsage("Invalid argument '" + cmd + "'"); return false; printUsage("Invalid time range filter: starttime=" + tableHash.startTime + " >= endtime=" + tableHash.endTime); return false; printUsage("Can't start because " + e.getMessage()); return false;
void writePropertiesFile(FileSystem fs, Path path) throws IOException { Properties p = new Properties(); p.setProperty("table", tableName); if (families != null) { p.setProperty("columnFamilies", families); } p.setProperty("targetBatchSize", Long.toString(batchSize)); p.setProperty("numHashFiles", Integer.toString(numHashFiles)); if (!isTableStartRow(startRow)) { p.setProperty("startRowHex", Bytes.toHex(startRow)); } if (!isTableEndRow(stopRow)) { p.setProperty("stopRowHex", Bytes.toHex(stopRow)); } if (scanBatch > 0) { p.setProperty("scanBatch", Integer.toString(scanBatch)); } if (versions >= 0) { p.setProperty("versions", Integer.toString(versions)); } if (startTime != 0) { p.setProperty("startTimestamp", Long.toString(startTime)); } if (endTime != 0) { p.setProperty("endTimestamp", Long.toString(endTime)); } try (OutputStreamWriter osw = new OutputStreamWriter(fs.create(path), Charsets.UTF_8)) { p.store(osw, null); } }
private void writeTempManifestFile() throws IOException { Path tempManifestPath = new Path(destPath, TMP_MANIFEST_FILE_NAME); FileSystem fs = tempManifestPath.getFileSystem(getConf()); tableHash.writePropertiesFile(fs, tempManifestPath); }
TableSplit split = (TableSplit) context.getInputSplit(); byte[] splitEndRow = split.getEndRow(); boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
private boolean doCommandLine(final String[] args) { if (args.length < NUM_ARGS) { printUsage(null); return false; String cmd = args[i]; if (cmd.equals("-h") || cmd.startsWith("--h")) { printUsage(null); return false; printUsage("Invalid argument '" + cmd + "'"); return false; printUsage("Invalid time range filter: starttime=" + tableHash.startTime + " >= endtime=" + tableHash.endTime); return false; printUsage("Can't start because " + e.getMessage()); return false;
@Override public int run(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs(); if (!doCommandLine(otherArgs)) { return 1; } Job job = createSubmittableJob(otherArgs); writeTempManifestFile(); if (!job.waitForCompletion(true)) { LOG.info("Map-reduce job failed!"); return 1; } completeManifest(); return 0; }
if ((isTableStartRow(startRow) || isTableEndRow(regionEndKey) || Bytes.compareTo(startRow, regionEndKey) < 0) && (isTableEndRow(stopRow) || isTableStartRow(regionStartKey) || Bytes.compareTo(stopRow, regionStartKey) > 0)) { startKeys.add(regionStartKey);
table = new HashTable() # Count the colors for pixel in pixels: if pixel.color not in table.keys: table[pixel.color] = 0 table[pixel.color]++ # Find the color associated with the max count result = table[table.keys[0]] for color in table.keys: if table[color] > table[result]: result = color print(result)