org.apache.pig.data.DataBag.iterator java code examples

Refine search

boolean needTuple = tupFS.getType() == Type.STRUCT;
List<Object> bagContents = new ArrayList<Object>((int) pigBag.size());
Iterator<Tuple> bagItr = pigBag.iterator();
while (bagItr.hasNext()) {
 bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));

 @Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

  @Override
  public Tuple exec(Tuple input) throws IOException {
    // Since Initial is guaranteed to be called
    // only in the map, it will be called with an
    // input of a bag with a single tuple - the 
    // count should always be 1 if bag is non empty
    DataBag bag = (DataBag)input.get(0);
    return mTupleFactory.newTuple(bag.iterator().hasNext()? 
        Long.valueOf(1L) : Long.valueOf(0L));
  }
}

private static Set<Tuple> toSet(DataBag bag) {
  Set<Tuple> set = new HashSet<Tuple>();
  Iterator<Tuple> iterator = bag.iterator();
  while (iterator.hasNext()) {
    set.add(iterator.next());
  }
  return set;
}

 @Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

private PriorityQueue<pair> load_bags(Tuple input) throws IOException
{
 PriorityQueue<pair> pq = new PriorityQueue<pair>(input.size());
 for (int i=0; i < input.size(); i++) {
  Object o = input.get(i);
  if (!(o instanceof DataBag))
   throw new RuntimeException("parameters must be databags");
  Iterator<Tuple> inputIterator= ((DataBag) o).iterator();
  if(inputIterator.hasNext())
   pq.add(new pair(inputIterator));
 }
 return pq;
}

protected static void updateTop(PriorityQueue<Tuple> store, int limit, DataBag inputBag) {
  Iterator<Tuple> itr = inputBag.iterator();
  while (itr.hasNext()) {
    Tuple t = itr.next();
    store.add(t);
    if (store.size() > limit)
      store.poll();
  }
}

 @Override
 public Tuple exec(Tuple input) throws IOException {
  // Retrieve the first element (tuple) in the given bag
  return ((DataBag)input.get(0)).iterator().next();
 }
}

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
  if (input == null) {
    if (!init) {
      init = true;
      return true;
    }
    return false;
  }
  if (it == null)
    it = input.iterator();
  if (!it.hasNext())
    return false;
  value = it.next();
  return true;
}

@Override
public Object getListElement(Object list, int i) {
  if (i==0 || list!=cachedObject) {
    cachedObject = list;
    index = -1;
    DataBag db = (DataBag)list;
    iter = db.iterator();
  }
  if (i==index+1) {
    index++;
    try {
      Tuple t = iter.next();
      // If single item tuple, take the item directly from list
      if (t.size() == 1) {
        return t.get(0);
      } else {
        return t;
      }
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  } else {
    throw new RuntimeException("Only sequential read is supported");
  }
}

/**
 * Add contents of a bag to the bag.
 * @param b bag to add contents of.
 */
@Override
public void addAll(DataBag b) {
  Iterator<Tuple> it = b.iterator();
  while(it.hasNext()) {
    add(it.next());
  }
}

static String[][] MakeArray(Operator op, DataBag bag)
    throws Exception {
  int rows = (int) bag.size();
  int cols = ((LogicalRelationalOperator)op).getSchema().getFields().size();
  String[][] table = new String[rows][cols];
  Iterator<Tuple> it = bag.iterator();
  for (int i = 0; i < rows; ++i) {
    Tuple t = it.next();
    for (int j = 0; j < cols; ++j) {
      table[i][j] = ShortenField(t.get(j));
    }
  }
  return table;
}

static private Tuple realexec(Tuple input) throws IOException
{
  DataBag bag = (DataBag) input.get(0);
  Iterator<Tuple> it = bag.iterator();
  while (it.hasNext()) {
    Tuple t = it.next();
    if (t != null) {
      return t;
    }
  }
  return null;
}

  @Override
  public Tuple exec(Tuple input) throws IOException {
    if (!isInitialized())
      initialize();
    try {
      IRubyObject inp = PigJrubyLibrary.pigToRuby(ruby, ((DataBag)input.get(0)).iterator().next().get(0));
      IRubyObject rubyResult = rubyEngine.callMethod(getReceiver(), getStage(), inp, IRubyObject.class);
      return mTupleFactory.newTuple(PigJrubyLibrary.rubyToPig(rubyResult));
    } catch (Exception e) {
      throw new IOException("Error executing initial function",  e);
    }
  }
}

  public static String format(DataBag bag) {
    StringBuffer sb = new StringBuffer();
    sb.append('{');

    Iterator<Tuple> it = bag.iterator();
    while (it.hasNext()) {
      Tuple t = it.next();
      String s = TupleFormat.format(t);
      sb.append(s);
      if (it.hasNext())
        sb.append(",");
    }
    sb.append('}');
    return sb.toString();
  }
}

  @Override
  public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;
    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();
    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);
    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);
    return TupleFactory.getInstance().newTuple(bloomOut());
  }
}

private void computeDiff(
    DataBag bag1,
    DataBag bag2,
    DataBag emitTo) {
  // Build two hash tables and probe with first one, then the other.
  // This does make the assumption that the distinct set of keys from
  // each bag will fit in memory.
  Set<Tuple> s1 = new HashSet<Tuple>();
  Iterator<Tuple> i1 = bag1.iterator();
  while (i1.hasNext()) s1.add(i1.next());
  Set<Tuple> s2 = new HashSet<Tuple>();
  Iterator<Tuple> i2 = bag2.iterator();
  while (i2.hasNext()) s2.add(i2.next());
  for (Tuple t : s1) if (!s2.contains(t)) emitTo.add(t);
  for (Tuple t : s2) if (!s1.contains(t)) emitTo.add(t);
}

static protected Long sum(Tuple input) throws ExecException, NumberFormatException {
  DataBag values = (DataBag)input.get(0);
  long sum = 0;
  for (Iterator<Tuple> it = values.iterator(); it.hasNext();) {
    Tuple t = it.next();
    // Have faith here.  Checking each value before the cast is
    // just too much.
    sum += (Long)t.get(0);
  }
  return sum;
}

  public String exec(Tuple input) throws IOException {
    DataBag bag = (DataBag) input.get(0);
    Iterator<Tuple> it = bag.iterator();
  if (!it.hasNext()) {
   return null;
  }
  Tuple tup = it.next();

    return (String) tup.get(0);
  }
}

 public String exec(Tuple input) throws IOException {
  DataBag bag = (DataBag) input.get(0);
  Iterator<Tuple> it = bag.iterator();
  if (!it.hasNext()) {
   return null;
  }
  it.next();
  if (!it.hasNext()) {
   return null;
  }

  Tuple tup = it.next();

  return (String) tup.get(0);
 }
}

Javadoc

Get an iterator to the bag. For default and distinct bags, no particular order is guaranteed. For sorted bags the order is guaranteed to be sorted according to the provided comparator.

Popular methods of DataBag

add
Add a tuple to the bag.
size
Get the number of elements in the bag, both in memory and on disk.
addAll
Add contents of a bag to the bag.
clear
Clear out the contents of the bag, both on disk and in memory. Any attempts to read after this is ca
compareTo
getMemorySize
isDistinct
Find out if the bag is distinct.
isSorted
Find out if the bag is sorted.
spill

Popular in Java

Parsing JSON documents to java classes using gson
scheduleAtFixedRate (ScheduledExecutorService)
getSharedPreferences (Context)
findViewById (Activity)
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Top 12 Jupyter Notebook extensions

How to use iteratormethodin org.apache.pig.data.DataBag

Best Java code snippets using org.apache.pig.data.DataBag.iterator (Showing top 20 results out of 315)

Refine search

How to use
iterator
method
in
org.apache.pig.data.DataBag