Refine search
public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
validateAlias(pigField.alias); byte type = pigField.type; if (DataType.isComplex(type)) { switch (type) { for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos); for (FieldSchema innerField : pigField.schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
@Override public void checkSchema(ResourceSchema resourceSchema) throws IOException { /* Schema provided by user and the schema computed by Pig * at the time of calling store must match. */ Schema runtimeSchema = Schema.getPigSchema(resourceSchema); if (pigSchema != null) { if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); } } else { pigSchema = runtimeSchema; } UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); }
@Override public Schema outputSchema(Schema inputSchema) { try { if ((inputSchema == null) || ((inputSchema.size() != 1) && (inputSchema.size() != 2))) { throw new RuntimeException("Expecting 2 inputs, found: " + ((inputSchema == null) ? 0 : inputSchema.size())); FieldSchema inputFieldSchema = inputSchema.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expecting a bag of tuples: {()}, found data type: " + DataType.findTypeName(inputFieldSchema.type)); FieldSchema firstFieldSchema = inputFieldSchema.schema.getField(0); if ((firstFieldSchema == null) || (firstFieldSchema.schema == null) || firstFieldSchema.schema.size() < 1) { throw new RuntimeException("Expecting a bag and a delimeter, found: " + inputSchema); if (inputSchema.size() == 2) { FieldSchema secondInputFieldSchema = inputSchema.getField(1); return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY)); } catch (FrontendException e) { e.printStackTrace();
@Override public List<FuncSpec> getArgToFuncMapping() throws FrontendException { List<FuncSpec> funcList = new ArrayList<FuncSpec>(); Schema s = new Schema(); s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); s.add(new Schema.FieldSchema(null, DataType.CHARARRAY)); funcList.add(new FuncSpec(this.getClass().getName(), s)); return funcList; }
@Override public Schema outputSchema(Schema input) { try{ if(input.getField(0).type != DataType.MAP) { throw new RuntimeException("Expected map, received schema " +DataType.findTypeName(input.getField(0).type)); } } catch(FrontendException e) { throw new RuntimeException(e); } return new Schema(new Schema.FieldSchema(null, DataType.MAP)); }
if (input.size() != 1) { throw new RuntimeException("Expected input to have only a single field"); Schema.FieldSchema inputFieldSchema = input.getField(0); if (inputFieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input"); if (inputBagSchema.getField(0).type != DataType.TUPLE) { throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", DataType.findTypeName(inputBagSchema.getField(0).type))); this.dataBagSchema = inputBagSchema.getField(0).schema; this.dataBagSchema.prettyPrint(); fieldSchemas.addAll(dataBagSchema.getFields()); fieldSchemas.add(new Schema.FieldSchema("x_transform", DataType.DOUBLE)); fieldSchemas.add(new Schema.FieldSchema("rsvd_l", DataType.DOUBLE)); fieldSchemas.add(new Schema.FieldSchema("rsvd_s", DataType.DOUBLE)); fieldSchemas.add(new Schema.FieldSchema("rsvd_e", DataType.DOUBLE)); FieldSchema tupleFieldSchema = new FieldSchema(null, new Schema(fieldSchemas), DataType.TUPLE); FieldSchema bagFieldSchema = new FieldSchema(this.getClass().getName().toLowerCase().replace(".", "_"), new Schema(tupleFieldSchema), DataType.BAG); Schema outputSchema = new Schema(bagFieldSchema); return outputSchema;
@Override public Schema outputSchema(Schema input) { try { for (Schema.FieldSchema fieldSchema : input.getFields()) { if (fieldSchema.type != DataType.BAG) { throw new RuntimeException("Expected a bag but got: " + DataType.findTypeName(fieldSchema.type)); } } Schema bagSchema = input.getField(0).schema; Schema outputSchema = new Schema(new Schema.FieldSchema(getSchemaName(this.getClass() .getName() .toLowerCase(), input), bagSchema, DataType.BAG)); return outputSchema; } catch (Exception e) { return null; } } }
for (FieldSchema fieldSchema : input.getFields()) { if (type == null) { type = fieldSchema.type; throw new RuntimeException( String.format("Expected all input types to match. Got both %s and %s.", DataType.findTypeName(type.byteValue()), DataType.findTypeName(fieldSchema.type))); Schema outputTupleSchema = new Schema(); outputTupleSchema.add(new Schema.FieldSchema("key", DataType.CHARARRAY)); outputTupleSchema.add(new Schema.FieldSchema("value", type)); return new Schema(new Schema.FieldSchema( getSchemaName(this.getClass().getName().toLowerCase(), input), outputTupleSchema,
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); FieldSchema fieldSchema = input.getField(0); if (fieldSchema.type != DataType.BAG) { throw new RuntimeException(String.format("Expected input schema to be BAG, but instead found %s", DataType.findTypeName(fieldSchema.type))); } FieldSchema fieldSchema2 = fieldSchema.schema.getField(0); tupleSchema.add(new Schema.FieldSchema("elem1", fieldSchema2.schema)); tupleSchema.add(new Schema.FieldSchema("elem2", fieldSchema2.schema)); return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), tupleSchema, DataType.BAG)); } catch (Exception e) { return null; } }
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); for (int i = 0; i < input.size(); ++i) { tupleSchema.add(input.getField(i)); } return new Schema(new Schema.FieldSchema(getSchemaName(this .getClass().getName().toLowerCase(), input), tupleSchema, DataType.TUPLE)); } catch (Exception e) { return null; } }
@Override public Schema outputSchema(Schema input) { try { if (input.size() != 1) { throw new RuntimeException("Expected only a single field as input"); } if (input.getField(0).type != DataType.BAG) { throw new RuntimeException("Expected a BAG as input, but found " + DataType.findTypeName(input.getField(0).type)); } // get the size of the tuple within the bag int innerTupleSize = input.getField(0).schema.getField(0).schema.getFields().size(); getInstanceProperties().put("tuplesize", innerTupleSize); } catch (FrontendException e) { throw new RuntimeException(e); } return input; } }
protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { // Iterate through all the elements in Pig Schema and do validations as // dictated by semantics, consult HCatSchema of table when need be. int columnPos = 0;//helps with debug messages for (FieldSchema pigField : pigSchema.getFields()) { HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++); } try { PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); } catch (IOException e) { throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); } }
@Override public Schema outputSchema(Schema input) { try { Schema tupleSchema = new Schema(); tupleSchema.add(input.getField(0)); tupleSchema.add(input.getField(1)); tupleSchema.add(input.getField(2)); return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), tupleSchema, DataType.TUPLE)); } catch (Exception e) { return null; } } }
@Override public Schema outputSchema(Schema input) { if (invoker_ == null) return null; FieldSchema fs = new FieldSchema(null, DataType.findType(invoker_.getReturnType())); return new Schema(fs); } }
public static Schema outputSchemaForThrift(TypeRef<? extends TBase<?,?>> typeRef) { Schema outSchema; try { outSchema = ThriftToPig.toSchema(typeRef.getRawClass()); // wrap the schema if size > 1 if(outSchema.size() > 1) { outSchema = new Schema(new Schema.FieldSchema(typeRef.getRawClass().getSimpleName(), outSchema, DataType.TUPLE)); } } catch (FrontendException e) { throw new RuntimeException(e); } return outSchema; } }
private static void convertEmptyTupleToBytearrayTuple( FieldSchema fs) { if(fs.type == DataType.TUPLE && fs.schema != null && fs.schema.size() == 0){ fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY)); return; } if(fs.schema != null){ for(FieldSchema inFs : fs.schema.getFields()){ convertEmptyTupleToBytearrayTuple(inFs); } } }