private SplitLocationInfo[] makeLocationHints(TaskLocationHint hint) { Set<String> hosts = hint.getHosts(); if (hosts.size() != 1) { LOG.warn("Bad # of locations: " + hosts.size()); } SplitLocationInfo[] locations = new SplitLocationInfo[hosts.size()]; int j = 0; for (String host : hosts) { locations[j++] = new SplitLocationInfo(host, false); } return locations; }
private static String[] extractHosts(FileSplit split, boolean isInMemory) throws IOException { SplitLocationInfo[] locInfo = split.getLocationInfo(); if (locInfo == null) return new String[0]; List<String> hosts = null; // TODO: most of the time, there's no in-memory. Use an array? for (int i = 0; i < locInfo.length; i++) { if (locInfo[i].isInMemory() != isInMemory) continue; if (hosts == null) { hosts = new ArrayList<>(); } hosts.add(locInfo[i].getLocation()); } if (hosts == null) return new String[0]; return hosts.toArray(new String[hosts.size()]); }
@Override public String[] getLocations() throws IOException { String[] locs = new String[locations.length]; for (int i = 0; i < locations.length; ++i) { locs[i] = locations[i].getLocation(); } return locs; }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); Job job = Job.getInstance(conf); TextInputFormat fileInputFormat = new TextInputFormat(); List<InputSplit> splits = fileInputFormat.getSplits(job); String[] locations = splits.get(0).getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
@Override public void write(DataOutput out) throws IOException { out.writeInt(splitNum); out.writeInt(planBytes.length); out.write(planBytes); out.writeInt(fragmentBytes.length); out.write(fragmentBytes); if (fragmentBytesSignature != null) { out.writeInt(fragmentBytesSignature.length); out.write(fragmentBytesSignature); } else { out.writeInt(0); } out.writeInt(locations.length); for (int i = 0; i < locations.length; ++i) { out.writeUTF(locations[i].getLocation()); } schema.write(out); out.writeUTF(llapUser); if (tokenBytes != null) { out.writeInt(tokenBytes.length); out.write(tokenBytes); } else { out.writeInt(0); } }
@Test public void testSplitLocationInfo() throws Exception { Configuration conf = getConfiguration(); conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2"); JobConf job = new JobConf(conf); TextInputFormat fileInputFormat = new TextInputFormat(); fileInputFormat.configure(job); FileSplit[] splits = (FileSplit[]) fileInputFormat.getSplits(job, 1); String[] locations = splits[0].getLocations(); Assert.assertEquals(2, locations.length); SplitLocationInfo[] locationInfo = splits[0].getLocationInfo(); Assert.assertEquals(2, locationInfo.length); SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1]; SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1]; Assert.assertTrue(localhostInfo.isOnDisk()); Assert.assertTrue(localhostInfo.isInMemory()); Assert.assertTrue(otherhostInfo.isOnDisk()); Assert.assertFalse(otherhostInfo.isInMemory()); }
private SplitLocationInfo[] makeLocationHints(TaskLocationHint hint) { Set<String> hosts = hint.getHosts(); if (hosts == null) { LOG.warn("No hosts"); return new SplitLocationInfo[0]; } if (hosts.size() != 1) { LOG.warn("Bad # of locations: " + hosts.size()); } SplitLocationInfo[] locations = new SplitLocationInfo[hosts.size()]; int j = 0; for (String host : hosts) { locations[j++] = new SplitLocationInfo(host, false); } return locations; }
private static String[] extractHosts(FileSplit split, boolean isInMemory) throws IOException { SplitLocationInfo[] locInfo = split.getLocationInfo(); if (locInfo == null) return new String[0]; List<String> hosts = null; // TODO: most of the time, there's no in-memory. Use an array? for (int i = 0; i < locInfo.length; i++) { if (locInfo[i].isInMemory() != isInMemory) continue; if (hosts == null) { hosts = new ArrayList<>(); } hosts.add(locInfo[i].getLocation()); } if (hosts == null) return new String[0]; return hosts.toArray(new String[hosts.size()]); }
@Override public String[] getLocations() throws IOException { String[] locs = new String[locations.length]; for (int i = 0; i < locations.length; ++i) { locs[i] = locations[i].getLocation(); } return locs; }
@Override public void readFields(DataInput in) throws IOException { splitNum = in.readInt(); int length = in.readInt(); planBytes = new byte[length]; in.readFully(planBytes); length = in.readInt(); fragmentBytes = new byte[length]; in.readFully(fragmentBytes); length = in.readInt(); if (length > 0) { fragmentBytesSignature = new byte[length]; in.readFully(fragmentBytesSignature); } length = in.readInt(); locations = new SplitLocationInfo[length]; for (int i = 0; i < length; ++i) { locations[i] = new SplitLocationInfo(in.readUTF(), false); } schema = new Schema(); schema.readFields(in); llapUser = in.readUTF(); length = in.readInt(); if (length > 0) { tokenBytes = new byte[length]; in.readFully(tokenBytes); } }
/** * Create a HashMap that has as key the hostname and values the splits that belong to this hostname; * * @return * @throws IOException */ public HashMap<String, ArrayList<Integer>> getLocationsOfSplits() throws IOException { HashMap<String, ArrayList<Integer>> splits_map = new HashMap<String, ArrayList<Integer>>(); ArrayList<Integer> temp; int i = 0; String hostname; for (InputSplit s : this.splits) { SplitLocationInfo info[] = s.getLocationInfo(); hostname = info[0].getLocation(); if (splits_map.containsKey(hostname)) { temp = splits_map.get(hostname); temp.add(i); } else { temp = new ArrayList<Integer>(); temp.add(i); splits_map.put(hostname, temp); } i++; } return splits_map; }
/** Constructs a split with host and cached-blocks information * * @param file the file name * @param start the position of the first byte in the file to process * @param length the number of bytes in the file to process * @param hosts the list of hosts containing the block * @param inMemoryHosts the list of hosts containing the block in memory */ public FileSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { this(file, start, length, hosts); hostInfos = new SplitLocationInfo[hosts.length]; for (int i = 0; i < hosts.length; i++) { // because N will be tiny, scanning is probably faster than a HashSet boolean inMemory = false; for (String inMemoryHost : inMemoryHosts) { if (inMemoryHost.equals(hosts[i])) { inMemory = true; break; } } hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); } }
@Override public void write(DataOutput out) throws IOException { out.writeInt(splitNum); out.writeInt(planBytes.length); out.write(planBytes); out.writeInt(fragmentBytes.length); out.write(fragmentBytes); if (fragmentBytesSignature != null) { out.writeInt(fragmentBytesSignature.length); out.write(fragmentBytesSignature); } else { out.writeInt(0); } out.writeInt(locations.length); for (int i = 0; i < locations.length; ++i) { out.writeUTF(locations[i].getLocation()); } schema.write(out); out.writeUTF(llapUser); if (tokenBytes != null) { out.writeInt(tokenBytes.length); out.write(tokenBytes); } else { out.writeInt(0); } }
/** Constructs a split with host and cached-blocks information * * @param file the file name * @param start the position of the first byte in the file to process * @param length the number of bytes in the file to process * @param hosts the list of hosts containing the block * @param inMemoryHosts the list of hosts containing the block in memory */ public FileSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { this(file, start, length, hosts); hostInfos = new SplitLocationInfo[hosts.length]; for (int i = 0; i < hosts.length; i++) { // because N will be tiny, scanning is probably faster than a HashSet boolean inMemory = false; for (String inMemoryHost : inMemoryHosts) { if (inMemoryHost.equals(hosts[i])) { inMemory = true; break; } } hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); } }
/** Constructs a split with host and cached-blocks information * * @param file the file name * @param start the position of the first byte in the file to process * @param length the number of bytes in the file to process * @param hosts the list of hosts containing the block * @param inMemoryHosts the list of hosts containing the block in memory */ public FileSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { this(file, start, length, hosts); hostInfos = new SplitLocationInfo[hosts.length]; for (int i = 0; i < hosts.length; i++) { // because N will be tiny, scanning is probably faster than a HashSet boolean inMemory = false; for (String inMemoryHost : inMemoryHosts) { if (inMemoryHost.equals(hosts[i])) { inMemory = true; break; } } hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); } }
/** Constructs a split with host and cached-blocks information * * @param file the file name * @param start the position of the first byte in the file to process * @param length the number of bytes in the file to process * @param hosts the list of hosts containing the block * @param inMemoryHosts the list of hosts containing the block in memory */ public FileSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { this(file, start, length, hosts); hostInfos = new SplitLocationInfo[hosts.length]; for (int i = 0; i < hosts.length; i++) { // because N will be tiny, scanning is probably faster than a HashSet boolean inMemory = false; for (String inMemoryHost : inMemoryHosts) { if (inMemoryHost.equals(hosts[i])) { inMemory = true; break; } } hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); } }
@Override public void readFields(DataInput in) throws IOException { splitNum = in.readInt(); int length = in.readInt(); planBytes = new byte[length]; in.readFully(planBytes); length = in.readInt(); fragmentBytes = new byte[length]; in.readFully(fragmentBytes); length = in.readInt(); if (length > 0) { fragmentBytesSignature = new byte[length]; in.readFully(fragmentBytesSignature); } length = in.readInt(); locations = new SplitLocationInfo[length]; for (int i = 0; i < length; ++i) { locations[i] = new SplitLocationInfo(in.readUTF(), false); } schema = new Schema(); schema.readFields(in); llapUser = in.readUTF(); length = in.readInt(); if (length > 0) { tokenBytes = new byte[length]; in.readFully(tokenBytes); } }