@Override public List<InputSplit> getSplits(JobContext job) throws IOException { List<InputSplit> splits = super.getSplits(job); List<InputSplit> newSplits = new ArrayList<InputSplit>(); for(int i = 0; i < job.getConfiguration().getInt(NNConstants.NN_TEST_SCALE, 1); i++) { for(InputSplit inputSplit: splits) { if(isNotPigOrHadoopMetaFile(((FileSplit) inputSplit).getPath())) { newSplits.add(new GuaguaInputSplit(false, new FileSplit[] { (FileSplit) inputSplit })); } } } newSplits.add(new GuaguaInputSplit(true, (FileSplit) null)); int mapperSize = newSplits.size(); LOG.info("inputs size including master: {}", mapperSize); LOG.debug("input splits inclduing: {}", newSplits); job.getConfiguration().set(GuaguaConstants.GUAGUA_WORKER_NUMBER, (mapperSize - 1) + ""); return newSplits; }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } } }
protected void addCrossValidationDataset(List<InputSplit> trainingSplit, JobContext context) throws IOException { List<InputSplit> trainingNoMaster = new ArrayList<InputSplit>(); for(InputSplit split: trainingSplit) { GuaguaInputSplit guaguaInput = (GuaguaInputSplit) split; if(guaguaInput.isMaster()) { continue; } trainingNoMaster.add(guaguaInput); } List<List<FileSplit>> csSplits = this.getCrossValidationSplits(context, trainingNoMaster.size()); for(int i = 0; i < csSplits.size(); i++) { List<FileSplit> oneInput = csSplits.get(i); GuaguaInputSplit guaguaInput = (GuaguaInputSplit) trainingNoMaster.get(i); int trainingSize = guaguaInput.getFileSplits().length; FileSplit[] finalSplits = (FileSplit[]) ArrayUtils.addAll(guaguaInput.getFileSplits(), oneInput.toArray(new FileSplit[0])); guaguaInput.setFileSplits(finalSplits); Boolean[] validationFlags = new Boolean[finalSplits.length]; for(int j = 0; j < finalSplits.length; j++) { validationFlags[j] = j < trainingSize ? false : true; } guaguaInput.setExtensions(validationFlags); } LOG.info("Training input split size is: {}.", trainingSplit.size()); LOG.info("Validation input split size is {}.", csSplits.size()); }
@Override public void readFields(DataInput in) throws IOException { this.setMaster(in.readBoolean()); if(!isMaster()) { int len = in.readInt(); FileSplit[] splits = new FileSplit[len]; for(int i = 0; i < len; i++) { splits[i] = new FileSplit(null, 0, 0, (String[]) null); splits[i].readFields(in); } this.setFileSplits(splits); } }
@Override public void readFields(DataInput in) throws IOException { this.setMaster(in.readBoolean()); if(!isMaster()) { int len = in.readInt(); FileSplit[] splits = new FileSplit[len]; splits[i].readFields(in); this.setFileSplits(splits); int extLen = in.readInt(); if(extLen > 0) { this.setExtensions(exts);
@Override protected void setup(Context context) throws java.io.IOException, InterruptedException { GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit(); this.setMaster(inputSplit.isMaster()); if(this.isMaster()) { context.setStatus("Master initializing ..."); this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { context.setStatus("Worker initializing ..."); this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(int i = 0; i < inputSplit.getFileSplits().length; i++) { FileSplit fs = inputSplit.getFileSplits()[i]; GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength()); if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) { gfs.setExtension(inputSplit.getExtensions()[i]); } splits.add(gfs); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(context.getConfiguration()); this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID)); this.getGuaguaService().setContainerId( context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION)); this.getGuaguaService().init(props); this.getGuaguaService().start(); }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
@Override public void readFields(DataInput in) throws IOException { this.setMaster(in.readBoolean()); if(!isMaster()) { int len = in.readInt(); FileSplit[] splits = new FileSplit[len]; splits[i].readFields(in); this.setFileSplits(splits); int extLen = in.readInt(); if(extLen > 0) { this.setExtensions(exts);
@Override public void readFields(DataInput in) throws IOException { this.setMaster(in.readBoolean()); if(!isMaster()) { int len = in.readInt(); FileSplit[] splits = new FileSplit[len]; for(int i = 0; i < len; i++) { splits[i] = new FileSplit(null, 0, 0, (String[]) null); splits[i].readFields(in); } this.setFileSplits(splits); } }
@Override protected void setup(Context context) throws java.io.IOException, InterruptedException { GuaguaInputSplit inputSplit = (GuaguaInputSplit) context.getInputSplit(); this.setMaster(inputSplit.isMaster()); if(this.isMaster()) { context.setStatus("Master initializing ..."); this.setGuaguaService(new GuaguaMasterService<MASTER_RESULT, WORKER_RESULT>()); } else { context.setStatus("Worker initializing ..."); this.setGuaguaService(new GuaguaWorkerService<MASTER_RESULT, WORKER_RESULT>()); List<GuaguaFileSplit> splits = new LinkedList<GuaguaFileSplit>(); for(int i = 0; i < inputSplit.getFileSplits().length; i++) { FileSplit fs = inputSplit.getFileSplits()[i]; GuaguaFileSplit gfs = new GuaguaFileSplit(fs.getPath().toString(), fs.getStart(), fs.getLength()); if(inputSplit.getExtensions() != null && i < inputSplit.getExtensions().length) { gfs.setExtension(inputSplit.getExtensions()[i]); } splits.add(gfs); } this.getGuaguaService().setSplits(splits); } Properties props = replaceConfToProps(context.getConfiguration()); this.getGuaguaService().setAppId(context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_JOB_ID)); this.getGuaguaService().setContainerId( context.getConfiguration().get(GuaguaMapReduceConstants.MAPRED_TASK_PARTITION)); this.getGuaguaService().init(props); this.getGuaguaService().start(); }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
@Override public void write(DataOutput out) throws IOException { out.writeBoolean(this.isMaster()); if(!this.isMaster()) { int length = this.getFileSplits().length; out.writeInt(length); for(int i = 0; i < length; i++) { this.getFileSplits()[i].write(out); } } }
/** * Copy from pig implementation, need to check this code logic. */ public static List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
/** * Copy from pig implementation, need to check this code logic. */ protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
/** * Data locality functions, return all hosts for all file splits. */ @Override public String[] getLocations() throws IOException, InterruptedException { if(this.getFileSplits() == null || this.getFileSplits().length == 0) { return new String[0]; } List<String> hosts = new ArrayList<String>(); for(FileSplit fileSplit: this.getFileSplits()) { if(fileSplit != null) { hosts.addAll(Arrays.asList(fileSplit.getLocations())); } } return hosts.toArray(new String[0]); }
/** * For master split, use <code>Long.MAX_VALUE</code> as its length to make it is the first task for Hadoop job. It * is convenient for users to check master in Hadoop UI. */ @Override public long getLength() throws IOException, InterruptedException { if(isMaster()) { return Long.MAX_VALUE; } long len = 0; for(FileSplit split: this.getFileSplits()) { len += split.getLength(); } return len; }
/** * Copy from pig implementation, need to check this code logic. */ protected List<InputSplit> getFinalCombineGuaguaSplits(List<InputSplit> newSplits, long combineSize) throws IOException { List<List<InputSplit>> combinePigSplits; try { combinePigSplits = getCombineGuaguaSplits(newSplits, combineSize); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new GuaguaRuntimeException(e); } newSplits = new ArrayList<InputSplit>(); for(List<InputSplit> inputSplits: combinePigSplits) { FileSplit[] fss = new FileSplit[inputSplits.size()]; for(int i = 0; i < inputSplits.size(); i++) { fss[i] = (FileSplit) (inputSplits.get(i)); } newSplits.add(new GuaguaInputSplit(false, fss)); } return newSplits; }
GuaguaInputSplit inputSplit = (GuaguaInputSplit) (this.inputSplits.get(currentPartition - 1)); String host = null; FileSplit[] fileSplits = inputSplit.getFileSplits(); if(fileSplits != null) { try {