public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { return true; } // Column was not found in table schema. Its a new column List<FieldSchema> tupSchema = bagFieldSchema.schema.getFields(); if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { return true; } return false; } /**
@Override public void checkSchema(ResourceSchema resourceSchema) throws IOException { /* Schema provided by user and the schema computed by Pig * at the time of calling store must match. */ Schema runtimeSchema = Schema.getPigSchema(resourceSchema); if (pigSchema != null) { if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); } } else { pigSchema = runtimeSchema; } UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); }
protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { // Iterate through all the elements in Pig Schema and do validations as // dictated by semantics, consult HCatSchema of table when need be. int columnPos = 0;//helps with debug messages for (FieldSchema pigField : pigSchema.getFields()) { HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); validateSchema(pigField, hcatField, pigSchema, tblSchema, columnPos++); } try { PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); } catch (IOException e) { throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); } }
@Override public Schema outputSchema(Schema input) { return new Schema(new Schema.FieldSchema("uniqueid", DataType.CHARARRAY)); } }
for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema), topLevelPigSchema, topLevelHCatSchema, columnPos); for (FieldSchema innerField : pigField.schema.getFields()) { validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema), topLevelPigSchema, topLevelHCatSchema, columnPos);
/** * Recursively set NULL type to the specifid type in a schema * @param s the schema whose NULL type has to be set * @param t the specified type */ public static void setSchemaDefaultType(Schema s, byte t) { if(null == s) return; for(Schema.FieldSchema fs: s.getFields()) { FieldSchema.setFieldSchemaDefaultType(fs, t); } }
@Test public void testSchemaSerializationPlusBase64() throws Exception { Schema schemaFromString = Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}"); Schema schemaSaved = IOUtils.deserializeFromBase64(IOUtils.serializeToBase64(schemaFromString)); assertEquals(schemaFromString.toString(), schemaSaved.toString()); }
@Override public Schema outputSchema(Schema input) { return new Schema(new Schema.FieldSchema("dist", DataType.DOUBLE)); } }
private static void addField(Schema schema, List<String> fields, FieldAlias fa, String currentNode) { for (FieldSchema field : schema.getFields()) { String node; if (field.alias != null) { // if no field node = fa.toES(field.alias); node = (currentNode != null ? currentNode + "." + node : node); } else { node = currentNode; } // && field.type != DataType.TUPLE if (field.schema != null) { addField(field.schema, fields, fa, node); } else { if (!StringUtils.hasText(node)) { LogFactory.getLog(PigUtils.class).warn("Cannot detect alias for field in schema" + schema); } if (node != null) { fields.add(fa.toES(node)); } } } }
field = bagSchema.getField(0).schema.getField(0); } else { field = bagSchema.getField(0); List<HCatFieldSchema> hcatFSs = new ArrayList<HCatFieldSchema>(); HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); List<FieldSchema> fields = fSchema.schema.getFields(); for (int i = 0; i < fields.size(); i++) { FieldSchema fieldSchema = fields.get(i);
/** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing * schema of the table in metastore. */ protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { if(LOG.isDebugEnabled()) { LOG.debug("convertPigSchemaToHCatSchema(pigSchema,tblSchema)=(" + pigSchema + "," + tableSchema + ")"); } List<HCatFieldSchema> fieldSchemas = new ArrayList<HCatFieldSchema>(pigSchema.size()); for (FieldSchema fSchema : pigSchema.getFields()) { try { HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); //if writing to a partitioned table, then pigSchema will have more columns than tableSchema //partition columns are not part of tableSchema... e.g. TestHCatStorer#testPartColsInData() // HCatUtil.assertNotNull(hcatFieldSchema, "Nothing matching '" + fSchema.alias + "' found " + // "in target table schema", LOG); fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema, pigSchema, tableSchema)); } catch (HCatException he) { throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); } } HCatSchema s = new HCatSchema(fieldSchemas); LOG.debug("convertPigSchemaToHCatSchema(computed)=(" + s + ")"); return s; }
@Test(expected = Exception.class) public void testLoadingOfBagSchema() throws Exception { assertNotNull(Utils.getSchemaFromString(Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}").toString())); }
@Override public Schema outputSchema(Schema input) { return new Schema(new Schema.FieldSchema("rand", DataType.INTEGER)); }
/** * Validates the schema returned when specific columns of a table are given as part of LOAD . * @throws Exception */ @Test public void testSchemaForTableWithSpecificColumns() throws Exception { //create the table final String TABLE = "TABLE2"; final String ddl = "CREATE TABLE " + TABLE + " (ID INTEGER NOT NULL PRIMARY KEY,NAME VARCHAR, AGE INTEGER) "; conn.createStatement().execute(ddl); final String selectColumns = "ID,NAME"; pigServer.registerQuery(String.format( "A = load 'hbase://table/%s/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, selectColumns, zkQuorum)); Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID")); assertTrue(fields.get(0).type == DataType.INTEGER); assertTrue(fields.get(1).alias.equalsIgnoreCase("NAME")); assertTrue(fields.get(1).type == DataType.CHARARRAY); }
@Override public Schema outputSchema(Schema input) { return new Schema(new Schema.FieldSchema("binning_info", DataType.CHARARRAY)); } }
/** * Validates the schema returned for a table with Pig data types. * @throws Exception */ @Test public void testSchemaForTable() throws Exception { final String TABLE = "TABLE1"; final String ddl = String.format("CREATE TABLE %s " + " (a_string varchar not null, a_binary varbinary not null, a_integer integer, cf1.a_float float" + " CONSTRAINT pk PRIMARY KEY (a_string, a_binary))\n", TABLE); conn.createStatement().execute(ddl); conn.commit(); pigServer.registerQuery(String.format( "A = load 'hbase://table/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", TABLE, zkQuorum)); final Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_binary")); assertTrue(fields.get(1).type == DataType.BYTEARRAY); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(2).type == DataType.INTEGER); assertTrue(fields.get(3).alias.equalsIgnoreCase("a_float")); assertTrue(fields.get(3).type == DataType.FLOAT); }
/** * Validates the schema returned when a SQL SELECT query is given as part of LOAD . * @throws Exception */ @Test public void testSchemaForQuery() throws Exception { //create the table. final String TABLE = "TABLE3"; String ddl = String.format("CREATE TABLE " + TABLE + " (A_STRING VARCHAR NOT NULL, A_DECIMAL DECIMAL NOT NULL, CF1.A_INTEGER INTEGER, CF2.A_DOUBLE DOUBLE" + " CONSTRAINT pk PRIMARY KEY (A_STRING, A_DECIMAL))\n", TABLE); conn.createStatement().execute(ddl); //sql query for LOAD final String sqlQuery = "SELECT A_STRING,CF1.A_INTEGER,CF2.A_DOUBLE FROM " + TABLE; pigServer.registerQuery(String.format( "A = load 'hbase://query/%s' using " + PhoenixHBaseLoader.class.getName() + "('%s');", sqlQuery, zkQuorum)); //assert the schema. Schema schema = pigServer.dumpSchema("A"); List<FieldSchema> fields = schema.getFields(); assertEquals(3, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a_string")); assertTrue(fields.get(0).type == DataType.CHARARRAY); assertTrue(fields.get(1).alias.equalsIgnoreCase("a_integer")); assertTrue(fields.get(1).type == DataType.INTEGER); assertTrue(fields.get(2).alias.equalsIgnoreCase("a_double")); assertTrue(fields.get(2).type == DataType.DOUBLE); }
List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("a"));
List<FieldSchema> fields = schema.getFields(); assertEquals(2, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("FOO"));
List<FieldSchema> fields = schema.getFields(); assertEquals(4, fields.size()); assertTrue(fields.get(0).alias.equalsIgnoreCase("ID"));