private void setLoadDataMap() { // This function sets up the LO-TO-Data map, eq. class, and lineage for the base data used in the coming runner // this must be called after logToDataMap has been properly (re)set and before the runner is started if (baseData != null) { if (poToEqclassesMap == null) poToEqclassesMap = new HashMap<PhysicalOperator, Collection<IdentityHashSet<Tuple>>>(); else poToEqclassesMap.clear(); for (LOLoad lo : baseData.keySet()) { logToDataMap.get(lo).addAll(baseData.get(lo)); LinkedList<IdentityHashSet<Tuple>> equivalenceClasses = new LinkedList<IdentityHashSet<Tuple>>(); IdentityHashSet<Tuple> equivalenceClass = new IdentityHashSet<Tuple>(); equivalenceClasses.add(equivalenceClass); for (Tuple t : baseData.get(lo)) { lineage.insert(t); equivalenceClass.add(t); } poToEqclassesMap.put(logToPhyMap.get(lo), equivalenceClasses); } } }
public void attachInput(Object key, DataBag[] bags, boolean[] readOnce) throws ExecException { checkBagType(); this.key = key; this.bags = bags; this.readOnce = readOnce; // We assume that we need all bags materialized. Specialized subclasses // may choose to handle this differently for (int i = 0; i < bags.length; i++) { if (readOnce[i]) { DataBag materializedBag = getBag(); materializedBag.addAll(bags[i]); bags[i] = materializedBag; } } }
@Override public void attachInput(Object key, DataBag[] bags, boolean[] readOnce) throws ExecException { checkBagType(); this.key = key; this.bags = bags; this.readOnce = readOnce; // JoinPackager expects all but the last bag to be materialized for (int i = 0; i < bags.length - 1; i++) { if (readOnce[i]) { DataBag materializedBag = getBag(); materializedBag.addAll(bags[i]); bags[i] = materializedBag; } } if (readOnce[numInputs - 1] != true) { throw new ExecException( "JoinPackager expects the last input to be streamed"); } this.newKey = true; }
@SuppressWarnings("unchecked") @Override public int compareTo(Object other) { if (this == other) return 0; if (other instanceof DataBag) { DataBag bOther = (DataBag) other; if (this.size() != bOther.size()) { if (this.size() > bOther.size()) return 1; else return -1; } // if we got this far, both bags should have same size // make a LimitedSortedBag for the other bag with same comparator and limit // so that both bag are sorted and we can loop through both iterators DataBag otherCloneDataBag = new LimitedSortedDataBag(mComp, limit); otherCloneDataBag.addAll((DataBag) other); Iterator<Tuple> thisIt = this.iterator(); Iterator<Tuple> otherIt = otherCloneDataBag.iterator(); while (thisIt.hasNext() && otherIt.hasNext()) { Tuple thisT = thisIt.next(); Tuple otherT = otherIt.next(); int c = thisT.compareTo(otherT); if (c != 0) return c; } return 0; // if we got this far, they must be equal } else { return DataType.compare(this, other); } }
private boolean checkNewBaseData(DataBag data, Map<LOLoad, DataBag> newBaseData, Set<Tuple> loadData) throws FrontendException { List<Pair<Tuple, Double>> sortedBase = new LinkedList<Pair<Tuple, Double>>(); DataBag oldData = BagFactory.getInstance().newDefaultBag(); oldData.addAll(data); double tmpCompleteness = completeness; for (Tuple t : loadData) { data.addAll(oldData); for (Pair<Tuple, Double> p : sortedBase) { data.add(p.first);
newBaseData.put(e.getKey(), bag); bag.addAll(e.getValue());
@Override public DataBag exec(Tuple input) throws IOException { DataBag bag = (DataBag) input.get(0); long n = 0L; DataBag selected = bagFactory.newDefaultBag(); DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator()); for (Tuple innerTuple : bag) { n += (Long) innerTuple.get(0); selected.addAll((DataBag) innerTuple.get(1)); waiting.addAll((DataBag) innerTuple.get(2)); } long sampleSize = (long) Math.ceil(_samplingProbability * n); long nNeeded = sampleSize - selected.size(); for (Tuple scored : waiting) { if (nNeeded <= 0) { break; } selected.add(ScoredTuple.fromIntermediateTuple(scored).getTuple()); nNeeded--; } return selected; } }
inputConstraints.addAll(inputData);
/** * Converts List objects to DataBag to keep Pig happy * * @param l * @return */ @SuppressWarnings("unchecked") private DataBag convertListToBag(List<Object> l) { DataBag dbag = bagFactory.newDefaultBag(); Tuple t = tupleFactory.newTuple(); for (Object o : l) { if (o instanceof List) { dbag.addAll(convertListToBag((List<Object>) o)); } else { t.append(o); } } if (t.size() > 0) { dbag.add(t); } return dbag; }
@Override public DataBag exec(Tuple tuple) throws IOException { DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get()); for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0)) { candidates.addAll((DataBag) intermediateOutputTuple.get(0)); } DataBag outputBag = bagFactory.newDefaultBag(); int i = -1; for (Tuple candidate : candidates) { int pos = (Integer) candidate.get(0); if (pos > i) { outputBag.add((Tuple) candidate.get(2)); i = pos; } } return outputBag; } }
@Override public Tuple exec(Tuple tuple) throws IOException { // sort candidates first by index, then by key DataBag candidates = bagFactory.newSortedBag(CandidateComparator.get()); for (Tuple intermediateOutputTuple : (DataBag) tuple.get(0)) { candidates.addAll((DataBag) intermediateOutputTuple.get(0)); } DataBag outputBag = bagFactory.newDefaultBag(); int i = -1; for (Tuple candidate : candidates) { int pos = (Integer) candidate.get(0); if (pos > i) { outputBag.add(candidate); i = pos; } } return tupleFactory.newTuple(outputBag); }
selected.addAll((DataBag) innerTuple.get(1));