Refine search
private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
HCatSchema structSubSchema = hcatFS.getStructSubSchema(); List<Object> all = ((Tuple) pigObj).getAll(); ArrayList<Object> converted = new ArrayList<Object>(all.size()); for (int i = 0; i < all.size(); i++) { HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); boolean needTuple = tupFS.getType() == Type.STRUCT; List<Object> bagContents = new ArrayList<Object>((int) pigBag.size()); Iterator<Tuple> bagItr = pigBag.iterator(); bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS));
@Override public Tuple exec(Tuple input) throws IOException { // Initial is called in the map. // we just send the tuple down try { // input is a bag with one tuple containing // the column we are trying to operate on DataBag bg = (DataBag) input.get(0); if (bg.iterator().hasNext()) { return bg.iterator().next(); } else { // make sure that we call the object constructor, not the list constructor return tfact.newTuple((Object) null); } } catch (ExecException e) { throw e; } catch (Exception e) { int errCode = 2106; throw new ExecException("Error executing an algebraic function", errCode, PigException.BUG, e); } } }
@Override public DataBag exec(Tuple input) throws IOException { // initialize a reverse mapping HashMap<Integer, String> positionToAlias = new HashMap<Integer, String>(); for (String alias : getFieldAliases().keySet()) { positionToAlias.put(getFieldAliases().get(alias), alias); } DataBag output = BagFactory.getInstance().newDefaultBag(); for (int i=0; i<input.size(); i++) { Tuple tuple = TupleFactory.getInstance().newTuple(); tuple.append(positionToAlias.get(i)); tuple.append(input.get(i)); output.add(tuple); } return output; }
if (input.size() != 2) { int errCode = 2107; String msg = "DIFF expected two inputs but received " + input.size() + " inputs."; throw new ExecException(msg, errCode, PigException.BUG); Object o1 = input.get(0); if (o1 instanceof DataBag) { DataBag bag1 = (DataBag)o1; Object d2 = input.get(1); if (!d1.equals(d2)) { output.add(mTupleFactory.newTuple(d1)); output.add(mTupleFactory.newTuple(d2));
public DataBag exec(Tuple input) throws IOException { DataBag outputBag = bagFactory.newDefaultBag(); String idBase = (String)input.get(0); for (int k=0; k < numKeys; k++) { String key = idBase+k; int key_bucket = random.nextInt(maxRandom); Tuple next = tupleFactory.newTuple(2); next.set(0, key); next.set(1, key_bucket); outputBag.add(next); } return outputBag; }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
final TupleFactory tf = TupleFactory.getInstance(); Map<String, Object> distMap = (Map<String, Object>) t.get(0); DataBag partitionList = (DataBag) distMap.get(PartitionSkewedKeys.PARTITION_LIST); Iterator<Tuple> it = partitionList.iterator(); while (it.hasNext()) { Tuple idxTuple = it.next(); Integer maxIndex = (Integer) idxTuple.get(idxTuple.size() - 1); Integer minIndex = (Integer) idxTuple.get(idxTuple.size() - 2); Tuple keyTuple = tf.newTuple(); for (int i = 0; i < idxTuple.size() - 2; i++) { keyTuple.append(idxTuple.get(i)); log.warn(e.getMessage());
@Override public void accumulate(Tuple arg0) throws IOException { DataBag inputBag = (DataBag)arg0.get(0); for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i++; count++; } }
@Override public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } DataBag db = (DataBag) input.get(0); Iterator<Tuple> iter = db.iterator(); Tuple output = tupleFactory.newTuple(); while (iter.hasNext()) { Tuple t = iter.next(); for (Object o : t.getAll()) { output.append(o); } } return output; }
public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() < 1 || input.isNull(0)) return null; // Output bag DataBag bagOfTokens = bagFactory.newDefaultBag(); StringReader textInput = new StringReader(input.get(0).toString()); PTBTokenizer ptbt = new PTBTokenizer(textInput, new CoreLabelTokenFactory(), ""); for (CoreLabel label; ptbt.hasNext(); ) { label = (CoreLabel)ptbt.next(); Tuple termText = tupleFactory.newTuple(label.toString()); bagOfTokens.add(termText); } return bagOfTokens; } }
public DataBag call(DataBag inputBag) throws IOException { DataBag outputBag = BagFactory.getInstance().newDefaultBag(); long i = start, count = 0; i = inputBag.size() - 1 + start; for (Tuple t : inputBag) { Tuple t1 = TupleFactory.getInstance().newTuple(t.getAll()); t1.append(i); outputBag.add(t1); if (count % 1000000 == 0) { outputBag.spill(); count = 0; } i--; count++; } return outputBag; }
@Override public Tuple exec(Tuple input) throws IOException DataBag bag = (DataBag) input.get(0); DataBag selected = bagFactory.newDefaultBag(); DataBag aggWaiting = bagFactory.newSortedBag(new ScoredTupleComparator()); DataBag waiting = bagFactory.newSortedBag(new ScoredTupleComparator()); Tuple output = tupleFactory.newTuple(); n += (Long) innerTuple.get(0); selected.addAll((DataBag) innerTuple.get(1)); selected.add(scored.getTuple()); aggWaiting.add(t); selected.add(scored.getTuple()); waiting.add(t); output.append(waiting); System.err.println("Read " + n + " items, selected " + selected.size() + ", and wait-listed " + aggWaiting.size() + ".");
@Override public Tuple exec(Tuple input) throws IOException { // Since Initial is guaranteed to be called // only in the map, it will be called with an // input of a bag with a single tuple - the // count should always be 1 if bag is non empty DataBag bag = (DataBag)input.get(0); return mTupleFactory.newTuple(bag.iterator().hasNext()? Long.valueOf(1L) : Long.valueOf(0L)); } }
@Override public Tuple exec(Tuple input) throws IOException { // Since Initial is guaranteed to be called // only in the map, it will be called with an // input of a bag with a single tuple - the // count should always be 1 if bag is non empty DataBag bag = (DataBag)input.get(0); Iterator it = bag.iterator(); if (it.hasNext()){ Tuple t = (Tuple)it.next(); if (t != null && t.size() > 0 && t.get(0) != null) return mTupleFactory.newTuple(Long.valueOf(1)); } return mTupleFactory.newTuple(Long.valueOf(0)); } }
Tuple tupleOfMaxSchemaSize = null; for (DataBag bag : inputDataMap.get(fs)) { if (bag.size() > 0) { int size = 0; Tuple t = null; t = bag.iterator().next(); size = t.size(); if (size > maxSchemaSize) { maxSchemaSize = size; if (bag.size() > 0) { for (Iterator<Tuple> it = bag.iterator(); it.hasNext();) { Tuple t = it.next(); for (int i = t.size(); i < maxSchemaSize; ++i) { t.append(tupleOfMaxSchemaSize.get(i)); newBaseData.put(e.getKey(), bag); bag.addAll(e.getValue());
private <T1, T2> DataBag listToDataBag(List<T1> list1, List<T2> list2) throws ExecException { DataBag output = BagFactory.getInstance().newDefaultBag(); for (int i = 0; i < Math.min(list1.size(), list2.size()); i++) { Tuple t = TupleFactory.getInstance().newTuple(2); t.set(0, list1.get(i)); t.set(1, list2.get(i)); output.add(t); } return output; }
/** * Converts List objects to DataBag to keep Pig happy * * @param l * @return */ @SuppressWarnings("unchecked") private DataBag convertListToBag(List<Object> l) { DataBag dbag = bagFactory.newDefaultBag(); Tuple t = tupleFactory.newTuple(); for (Object o : l) { if (o instanceof List) { dbag.addAll(convertListToBag((List<Object>) o)); } else { t.append(o); } } if (t.size() > 0) { dbag.add(t); } return dbag; }