/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } } }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } } }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } if(this.extensions != null) { out.writeInt(extensions.length); for(int i = 0; i < extensions.length; i++) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutput ext = null; try { ext = new ObjectOutputStream(bos); ext.writeObject(extensions[i]); byte[] bytes = bos.toByteArray(); out.writeInt(bytes.length); out.write(bytes); } finally { IOUtils.closeQuietly(bos); } } }else{ out.writeInt(0); } } }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } if(this.extensions != null) { out.writeInt(extensions.length); for(int i = 0; i < extensions.length; i++) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutput ext = null; try { ext = new ObjectOutputStream(bos); ext.writeObject(extensions[i]); byte[] bytes = bos.toByteArray(); out.writeInt(bytes.length); out.write(bytes); } finally { IOUtils.closeQuietly(bos); } } }else{ out.writeInt(0); } } }
/** * Check this case: all file splits are gzip files, but they are empty with only 20 bytes. Usally if it is 0 byte * can be found in getGuaguaSplits, while gzip empty file is not real empty. */ private boolean isAllFileSplitsEmptyGzip(GuaguaInputSplit guaguaInputSplit) { boolean isGzipEmptyFile = true;// by default true for(FileSplit fileSplit: guaguaInputSplit.getFileSplits()) { if(fileSplit.getPath().getName().toLowerCase().endsWith("gz") && fileSplit.getStart() == 0L && fileSplit.getLength() <= 20) { // just set to true isGzipEmptyFile = true; } else { // if found one is not gzip or gzip but not 20 bytes, return false to denote it is not empty gzip file return false; } } return isGzipEmptyFile; }
GuaguaInputSplit inputSplit = (GuaguaInputSplit) (this.inputSplits.get(currentPartition - 1)); String host = null; FileSplit[] fileSplits = inputSplit.getFileSplits(); if(fileSplits != null) { try {
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
protected void addCrossValidationDataset(List<InputSplit> trainingSplit, JobContext context) throws IOException { List<InputSplit> trainingNoMaster = new ArrayList<InputSplit>(); for(InputSplit split: trainingSplit) { GuaguaInputSplit guaguaInput = (GuaguaInputSplit) split; if(guaguaInput.isMaster()) { continue; } trainingNoMaster.add(guaguaInput); } List<List<FileSplit>> csSplits = this.getCrossValidationSplits(context, trainingNoMaster.size()); for(int i = 0; i < csSplits.size(); i++) { List<FileSplit> oneInput = csSplits.get(i); GuaguaInputSplit guaguaInput = (GuaguaInputSplit) trainingNoMaster.get(i); int trainingSize = guaguaInput.getFileSplits().length; FileSplit[] finalSplits = (FileSplit[]) ArrayUtils.addAll(guaguaInput.getFileSplits(), oneInput.toArray(new FileSplit[0])); guaguaInput.setFileSplits(finalSplits); Boolean[] validationFlags = new Boolean[finalSplits.length]; for(int j = 0; j < finalSplits.length; j++) { validationFlags[j] = j < trainingSize ? false : true; } guaguaInput.setExtensions(validationFlags); } LOG.info("Training input split size is: {}.", trainingSplit.size()); LOG.info("Validation input split size is {}.", csSplits.size()); }
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
/** * Check this case: all file splits are gzip files, but they are empty with only 20 bytes. Usally if it is 0 byte * can be found in getGuaguaSplits, while gzip empty file is not real empty. */ private boolean isAllFileSplitsEmptyGzip(GuaguaInputSplit guaguaInputSplit) { boolean isGzipEmptyFile = true;// by default true for(FileSplit fileSplit: guaguaInputSplit.getFileSplits()) { if(fileSplit.getPath().getName().toLowerCase().endsWith("gz") && fileSplit.getStart() == 0L && fileSplit.getLength() <= 20) { // just set to true isGzipEmptyFile = true; } else { // if found one is not gzip or gzip but not 20 bytes, return false to denote it is not empty gzip file return false; } } return isGzipEmptyFile; }
@Override protected void setup(Context context) throws java.io.IOException, InterruptedException { GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit(); this.setMaster(inputSplit.isMaster()); if(this.isMaster()) { context.setStatus("Master initializing ..."); this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { context.setStatus("Worker initializing ..."); this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(int i = 0; i < inputSplit.getFileSplits().length; i++) { FileSplit fs = inputSplit.getFileSplits()[i]; GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength()); if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) { gfs.setExtension(inputSplit.getExtensions()[i]); } splits.add(gfs); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(context.getConfiguration()); this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID)); this.getGuaguaService().setContainerId( context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION)); this.getGuaguaService().init(props); this.getGuaguaService().start(); }
@Override protected void setup(Context context) throws java.io.IOException, InterruptedException { GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit(); this.setMaster(inputSplit.isMaster()); if(this.isMaster()) { context.setStatus("Master initializing ..."); this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { context.setStatus("Worker initializing ..."); this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(int i = 0; i < inputSplit.getFileSplits().length; i++) { FileSplit fs = inputSplit.getFileSplits()[i]; GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength()); if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) { gfs.setExtension(inputSplit.getExtensions()[i]); } splits.add(gfs); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(context.getConfiguration()); this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID)); this.getGuaguaService().setContainerId( context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION)); this.getGuaguaService().init(props); this.getGuaguaService().start(); }
/** * Set up guagua service */ protected void setup() { this.setMaster(this.getInputSplit().isMaster()); if(this.isMaster()) { this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(FileSplit fileSplit: getInputSplit().getFileSplits()) { splits.add(new GuaguaFileSplit(fileSplit.getPath().toString(), fileSplit.getStart(), fileSplit .getLength())); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(); this.getGuaguaService().setAppId(this.getAppId().toString()); this.getGuaguaService().setContainerId(this.getPartition() + ""); this.getGuaguaService().init(props); this.getGuaguaService().start(); initRPCClient(); }
/** * Set up guagua service */ protected void setup() { this.setMaster(this.getInputSplit().isMaster()); if(this.isMaster()) { this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(FileSplit fileSplit: getInputSplit().getFileSplits()) { splits.add(new GuaguaFileSplit(fileSplit.getPath().toString(), fileSplit.getStart(), fileSplit .getLength())); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(); this.getGuaguaService().setAppId(this.getAppId().toString()); this.getGuaguaService().setContainerId(this.getPartition() + ""); this.getGuaguaService().init(props); this.getGuaguaService().start(); initRPCClient(); }