/** * @param tuples * @return a bag containing the provided objects */ public static DataBag bag(Tuple... tuples) { return new NonSpillableDataBag(Arrays.asList(tuples)); }
@Override public void end() { parent.add(new NonSpillableDataBag(new ArrayList<Tuple>(buffer))); }
private DataBag createDataBag(int numBags) { if (!initialized) { initialized = true; if (PigMapReduce.sJobConfInternal.get() != null) { String bagType = PigMapReduce.sJobConfInternal.get().get(PigConfiguration.PIG_CACHEDBAG_TYPE); if (bagType != null && bagType.equalsIgnoreCase("default")) { useDefaultBag = true; } } } return useDefaultBag ? new NonSpillableDataBag() : new InternalCachedBag(numBags); }
private static DataBag toPigBag(Field field, Collection<Object> values, boolean lazy) { List<Tuple> tuples = Lists.newArrayListWithExpectedSize(values.size()); for(Object value : values) { Object pValue = toPigObject(field, value, lazy); if (pValue instanceof Tuple) { // DataBag should contain Tuples tuples.add((Tuple)pValue); } else { tuples.add(tupleFactory.newTuple(pValue)); } } return new NonSpillableDataBag(tuples); }
public POFRJoin(OperatorKey k, int rp, List<PhysicalOperator> inp, List<List<PhysicalPlan>> ppLists, List<List<Byte>> keyTypes, FileSpec[] replFiles, int fragment, boolean isLeftOuter, Tuple nullTuple, Schema[] inputSchemas, Schema[] keySchemas) throws ExecException { super(k, rp, inp); phyPlanLists = ppLists; this.fragment = fragment; this.keyTypes = keyTypes; this.replFiles = replFiles; LRs = new POLocalRearrange[ppLists.size()]; constExps = new ConstantExpression[ppLists.size()]; createJoinPlans(k); List<Tuple> tupList = new ArrayList<Tuple>(); tupList.add(nullTuple); nullBag = new NonSpillableDataBag(tupList); this.isLeftOuterJoin = isLeftOuter; if (inputSchemas != null) { this.inputSchemas = inputSchemas; } else { this.inputSchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } if (keySchemas != null) { this.keySchemas = keySchemas; } else { this.keySchemas = new Schema[replFiles == null ? 0 : replFiles.length]; } }
private HashMap<String, DataBag> doInverse(Map<String,Object> original) throws ExecException { final HashMap<String, DataBag> inverseMap = new HashMap<String, DataBag>(original.size()); for (Map.Entry<String, Object> entry : original.entrySet()) { Object o = entry.getValue(); String newKey; // Call toString for all primitive types, else throw an Exception if (!(o instanceof Tuple || o instanceof DataBag)) { newKey = o.toString(); } else { throw new ExecException("Wrong type. Value is of type " + o.getClass()); } // Create a new bag if "newKey" does not exist in Map DataBag bag = inverseMap.get(newKey); if (bag == null) { bag = new NonSpillableDataBag(); bag.add(TUPLE_FACTORY.newTuple(entry.getKey())); inverseMap.put(newKey, bag); } else { bag.add(TUPLE_FACTORY.newTuple(entry.getKey())); } } return inverseMap; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if(input == null || input.size() == 0) { return null; } Map<String, Object> m = null; //Input must be of type Map. This is verified at compile time m = (Map<String, Object>)(input.get(0)); if(m == null) { return null; } Collection c = m.values(); DataBag bag = new NonSpillableDataBag(c.size()); Iterator<Object> iter = c.iterator(); while(iter.hasNext()) { Tuple t = TUPLE_FACTORY.newTuple(iter.next()); bag.add(t); } return bag; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if (input == null || input.size() == 0) { return null; } Map<String, Object> m = null; // Input must be of type Map. This is verified at compile time m = (Map<String, Object>) (input.get(0)); if (m == null) { return null; } int initialSetSize = getInitialSetSize(m.values()); Set<Object> uniqueElements = new HashSet<Object>(initialSetSize); DataBag bag = new NonSpillableDataBag(); Iterator<Object> iter = m.values().iterator(); while (iter.hasNext()) { Object val = iter.next(); if (!uniqueElements.contains(val)) { uniqueElements.add(val); Tuple t = TUPLE_FACTORY.newTuple(val); bag.add(t); } } return bag; }
@SuppressWarnings("unchecked") @Override public DataBag exec(Tuple input) throws IOException { if(input == null || input.size() == 0) { return null; } Map<String, Object> m = null; //Input must be of type Map. This is verified at compile time m = (Map<String, Object>)(input.get(0)); if(m == null) { return null; } DataBag bag = new NonSpillableDataBag(m.size()); for (String s : m.keySet()) { Tuple t = TUPLE_FACTORY.newTuple(s); bag.add(t); } return bag; }
private Object wrap(Object value) { if (isNestedLoadEnabled && value instanceof JSONObject) { return walkJson((JSONObject) value); } else if (isNestedLoadEnabled && value instanceof JSONArray) { JSONArray a = (JSONArray) value; DataBag mapValue = new NonSpillableDataBag(a.size()); for (int i=0; i<a.size(); i++) { Tuple t = tupleFactory.newTuple(wrap(a.get(i))); mapValue.add(t); } return mapValue; } else { return value != null ? value.toString() : null; } }
/** * Translate a nested message to a tuple. If the field is repeated, it walks the list and adds each to a bag. * Otherwise, it just adds the given one. * @param fieldDescriptor the descriptor object for the given field. * @param fieldValue the object representing the value of this field, possibly null. * @return the object representing fieldValue in Pig -- either a bag or a tuple. */ @SuppressWarnings("unchecked") protected Object messageToTuple(FieldDescriptor fieldDescriptor, Object fieldValue) { if (fieldValue == null) { // protobufs unofficially ensures values are not null. just in case: return null; } assert fieldDescriptor.getType() == FieldDescriptor.Type.MESSAGE : "messageToTuple called with field of type " + fieldDescriptor.getType(); if (fieldDescriptor.isRepeated()) { // The protobuf contract is that if the field is repeated, then the object returned is actually a List // of the underlying datatype, which in this case is a nested message. List<Message> messageList = (List<Message>) (fieldValue != null ? fieldValue : Lists.newArrayList()); DataBag bag = new NonSpillableDataBag(messageList.size()); for (Message m : messageList) { bag.add(new ProtobufTuple(m)); } return bag; } else { return new ProtobufTuple((Message)fieldValue); } }
DataBag bag = new NonSpillableDataBag(fieldValueList.size()); for (Object singleFieldValue : fieldValueList) { Object nonEnumFieldValue = coerceToPigTypes(fieldDescriptor, singleFieldValue);
output.put(QUANTILES_LIST, new NonSpillableDataBag(quantilesList)); output.put(WEIGHTED_PARTS, weightedParts); return output;
input.result = new NonSpillableDataBag(); input.returnStatus = POStatus.STATUS_OK;
@Override public DataBag exec(Tuple input) throws IOException { try { // The assumption is that if the bag contents fits into // an input tuple, it will not need to be spilled. DataBag bag = new NonSpillableDataBag(input.size()); for (int i = 0; i < input.size(); ++i) { final Object object = input.get(i); if (object instanceof Tuple) { bag.add( (Tuple) object); } else { Tuple tp2 = TupleFactory.getInstance().newTuple(1); tp2.set(0, object); bag.add(tp2); } } return bag; } catch (Exception ee) { throw new RuntimeException("Error while creating a bag", ee); } }
break; ce.setValue(new NonSpillableDataBag(replicate.get(key)));
dbs[lastBagIndex] = new NonSpillableDataBag((int)chunkSize);
DataBag bg = new NonSpillableDataBag(bagContents); ce.setValue(bg); ce.setResultType(DataType.BAG);