@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); Job job = Job.getInstance(conf); TextInputFormat fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); String[] locations = splits.get(0).getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
/** * Create a HashMap that has as key the hostname and values the splits that belong to this hostname; * * @return * @throws IOException */ public HashMap<String, ArrayList<Integer>> getLocationsOfSplits() throws IOException { HashMap<String, ArrayList<Integer>> splits_map = new HashMap<String, ArrayList<Integer>>(); ArrayList<Integer> temp; int i = 0; String hostname; for (InputSplit s : this.splits) { SplitLocationInfo info[] = s.getLocationInfo(); hostname = info[0].getLocation(); if (splits_map.containsKey(hostname)) { temp = splits_map.get(hostname); temp.add(i); } else { temp = new ArrayList<Integer>(); temp.add(i); splits_map.put(hostname, temp); } i++; } return splits_map; }
Long lenInMap; for (InputSplit split : wrappedSplits) { SplitLocationInfo[] locs = split.getLocationInfo(); if( locs != null) { for (SplitLocationInfo loc : locs) {