Refine search
@Override public Tuple exec(Tuple input) throws IOException { // Since Initial is guaranteed to be called // only in the map, it will be called with an // input of a bag with a single tuple - the // count should always be 1 if bag is non empty DataBag bag = (DataBag)input.get(0); return mTupleFactory.newTuple(bag.iterator().hasNext()? Long.valueOf(1L) : Long.valueOf(0L)); } }
private PriorityQueue<pair> load_bags(Tuple input) throws IOException { PriorityQueue<pair> pq = new PriorityQueue<pair>(input.size()); for (int i=0; i < input.size(); i++) { Object o = input.get(i); if (!(o instanceof DataBag)) throw new RuntimeException("parameters must be databags"); Iterator<Tuple> inputIterator= ((DataBag) o).iterator(); if(inputIterator.hasNext()) pq.add(new pair(inputIterator)); } return pq; }
@Override public Object getListElement(Object list, int i) { if (i==0 || list!=cachedObject) { cachedObject = list; index = -1; DataBag db = (DataBag)list; iter = db.iterator(); } if (i==index+1) { index++; try { Tuple t = iter.next(); // If single item tuple, take the item directly from list if (t.size() == 1) { return t.get(0); } else { return t; } } catch (Exception e) { throw new RuntimeException(e); } } else { throw new RuntimeException("Only sequential read is supported"); } }
static String[][] MakeArray(Operator op, DataBag bag) throws Exception { int rows = (int) bag.size(); int cols = ((LogicalRelationalOperator)op).getSchema().getFields().size(); String[][] table = new String[rows][cols]; Iterator<Tuple> it = bag.iterator(); for (int i = 0; i < rows; ++i) { Tuple t = it.next(); for (int j = 0; j < cols; ++j) { table[i][j] = ShortenField(t.get(j)); } } return table; }
@Override public Tuple exec(Tuple input) throws IOException { if (!isInitialized()) initialize(); try { IRubyObject inp = PigJrubyLibrary.pigToRuby(ruby, ((DataBag)input.get(0)).iterator().next().get(0)); IRubyObject rubyResult = rubyEngine.callMethod(getReceiver(), getStage(), inp, IRubyObject.class); return mTupleFactory.newTuple(PigJrubyLibrary.rubyToPig(rubyResult)); } catch (Exception e) { throw new IOException("Error executing initial function", e); } } }
public static String format(DataBag bag) { StringBuffer sb = new StringBuffer(); sb.append('{'); Iterator<Tuple> it = bag.iterator(); while (it.hasNext()) { Tuple t = it.next(); String s = TupleFormat.format(t); sb.append(s); if (it.hasNext()) sb.append(","); } sb.append('}'); return sb.toString(); } }
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) return null; // Strip off the initial level of bag DataBag values = (DataBag)input.get(0); Iterator<Tuple> it = values.iterator(); Tuple t = it.next(); // If the input tuple has only one field, then we'll extract // that field and serialize it into a key. If it has multiple // fields, we'll serialize the whole tuple. byte[] b; if (t.size() == 1) b = DataType.toBytes(t.get(0)); else b = DataType.toBytes(t, DataType.TUPLE); Key k = new Key(b); filter = new BloomFilter(vSize, numHash, hType); filter.add(k); return TupleFactory.getInstance().newTuple(bloomOut()); } }
private void computeDiff( DataBag bag1, DataBag bag2, DataBag emitTo) { // Build two hash tables and probe with first one, then the other. // This does make the assumption that the distinct set of keys from // each bag will fit in memory. Set<Tuple> s1 = new HashSet<Tuple>(); Iterator<Tuple> i1 = bag1.iterator(); while (i1.hasNext()) s1.add(i1.next()); Set<Tuple> s2 = new HashSet<Tuple>(); Iterator<Tuple> i2 = bag2.iterator(); while (i2.hasNext()) s2.add(i2.next()); for (Tuple t : s1) if (!s2.contains(t)) emitTo.add(t); for (Tuple t : s2) if (!s1.contains(t)) emitTo.add(t); }
static protected Long sum(Tuple input) throws ExecException, NumberFormatException { DataBag values = (DataBag)input.get(0); long sum = 0; for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple t = it.next(); // Have faith here. Checking each value before the cast is // just too much. sum += (Long)t.get(0); } return sum; }