private static DataBag transformToBag(List<?> list, HCatFieldSchema hfs) throws Exception { if (list == null) { return null; } HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); DataBag db = new DefaultDataBag(); for (Object o : list) { Tuple tuple; if (elementSubFieldSchema.getType() == Type.STRUCT) { tuple = transformToTuple((List<?>) o, elementSubFieldSchema); } else { // bags always contain tuples tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); } db.add(tuple); } return db; }
public DataBag call(DataBag inputBag, Tuple t) throws IOException { inputBag.add(t); return inputBag; }
public void addData(Tuple t) { data.add(t); }
@Override protected void addElement(DataBag bag, T element) { bag.add(TF.newTuple(element)); }
@Override public DataBag exec(Tuple input) throws IOException { DataBag b = mBagFactory.newDefaultBag(); for (int i = 0; i < numGroups; i++) { b.add(mTupleFactory.newTuple(Integer.valueOf(1))); } return b; }
public static DataBag bytesToBag(DataInput in) throws IOException { DataBag bag = mBagFactory.newDefaultBag(); long size = in.readLong(); for (long i = 0; i < size; i++) { try { Object o = readDatum(in); bag.add((Tuple)o); } catch (ExecException ee) { throw ee; } } return bag; }
public DataBag exec(Tuple input) { DataBag resultBag = bagFactory.newDefaultBag(); HashMap<String, Object> map = (HashMap<String, Object>) input.get(0); for (Map.Entry<String, Object> entry : map) { Tuple t = tupleFactory.newTuple(); t.append(entry.getKey()); t.append(entry.getValue().toString()); resultBag.add(t); } return resultBag; }
private <T1, T2> DataBag listToDataBag(List<T1> list1, List<T2> list2) throws ExecException { DataBag output = BagFactory.getInstance().newDefaultBag(); for (int i = 0; i < Math.min(list1.size(), list2.size()); i++) { Tuple t = TupleFactory.getInstance().newTuple(2); t.set(0, list1.get(i)); t.set(1, list2.get(i)); output.add(t); } return output; }
private void addToAnswer(DataBag output, Tuple lTuple, Tuple rTuple) { List<Object> attrs = lTuple.getAll(); attrs.addAll(rTuple.getAll()); Tuple outTuple = tupleFactory.newTuple(attrs); output.add(outTuple); }
@Override public void collect(Object input) throws HiveException { try { Tuple outputTuple = (Tuple)HiveUtils.convertHiveToPig(input, schemaInfo.outputObjectInspector, null); if (outputTuple.size()==1 && outputTuple.get(0) instanceof Tuple) { bag.add((Tuple)outputTuple.get(0)); } else { bag.add(outputTuple); } } catch(Exception e) { throw new HiveException(e); } }
@Override public Tuple call(Tuple2<String, Iterable<Tuple>> next) throws Exception { Tuple res = tf.newTuple(); res.append(next._1()); Iterator<Tuple> iter = next._2().iterator(); DataBag bag = bf.newDefaultBag(); while(iter.hasNext()) { bag.add(iter.next()); } res.append(bag); LOG.info("ToValueFunction1 out:" + res); return res; } }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
@Override public DataBag getValue() { DataBag output = BagFactory.getInstance().newDefaultBag(); for (ScoredTuple sample : getReservoir()) { output.add(sample.getTuple()); } return output; }
public DataBag exec(Tuple input) throws IOException { DataBag outputBag = bagFactory.newDefaultBag(); String idBase = (String)input.get(0); for (int k=0; k < numKeys; k++) { String key = idBase+k; int key_bucket = random.nextInt(maxRandom); Tuple next = tupleFactory.newTuple(2); next.set(0, key); next.set(1, key_bucket); outputBag.add(next); } return outputBag; }
private static DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private DataBag getCategories(List<ClassifCode> classifCodeList) { DataBag db = new DefaultDataBag(); for (ClassifCode code : classifCodeList) { for (String co_str : code.getValueList()) { db.add(TupleFactory.getInstance().newTuple(co_str)); } } return db; }
private <T> DataBag listToDataBag(List<T> list) { DataBag output = BagFactory.getInstance().newDefaultBag(); for (T l : list) { output.add(TupleFactory.getInstance().newTuple(l)); } return output; }
public DataBag load(LoadFunc lfunc, PigContext pigContext) throws IOException { DataBag content = BagFactory.getInstance().newDefaultBag(); ReadToEndLoader loader = new ReadToEndLoader(lfunc, ConfigurationUtil.toConfiguration(pigContext.getProperties()), file, 0); Tuple f = null; while ((f = loader.getNext()) != null) { content.add(f); } return content; }
public DataBag exec(Tuple input) throws IOException { int numBags = modelConfig.getBaggingNum(); double rate = modelConfig.getBaggingSampleRate(); DataBag bag = BagFactory.getInstance().newDefaultBag(); for(int i = 0; i < numBags; i++) { double r = rand.nextDouble(); if(r <= rate) { Tuple t = TupleFactory.getInstance().newTuple(); t.append(i); t.append(input); bag.add(t); } } return bag; }