org.apache.pig.ResourceSchema java code examples

Refine search

public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException {
 List<ResourceFieldSchema> rfSchemaList = new ArrayList<ResourceFieldSchema>();
 for (HCatFieldSchema hfs : hcatSchema.getFields()) {
  ResourceFieldSchema rfSchema;
  rfSchema = getResourceSchemaFromFieldSchema(hfs);
  rfSchemaList.add(rfSchema);
 }
 ResourceSchema rSchema = new ResourceSchema();
 rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[rfSchemaList.size()]));
 return rSchema;
}

@Override
public void putNext(Tuple t) throws IOException {
  ResourceFieldSchema[] fieldSchemas = (schema == null) ? null : schema.getFields();      
  
  PhoenixRecord record = new PhoenixRecord(fieldSchemas);
  
  for(int i=0; i<t.size(); i++) {
    record.add(t.get(i));
  }
  
  try {
    writer.write(null, record);
  } catch (InterruptedException e) {
    throw new RuntimeException(e);
  }
  
}

if (serializedSchema == null) return tup;
try {
  schema = new ResourceSchema(Utils.getSchemaFromString(serializedSchema));
} catch (ParserException e) {
  mLog.error("Unable to parse serialized schema " + serializedSchema, e);
ResourceFieldSchema[] fieldSchemas = schema.getFields();
int tupleIdx = 0;
    if (tupleIdx >= tup.size()) {
      tup.append(null);
    if(tup.get(tupleIdx) != null){
      byte[] bytes = ((DataByteArray) tup.get(tupleIdx)).get();
      val = CastUtils.convertToType(caster, bytes,
          fieldSchemas[i], fieldSchemas[i].getType());
      tup.set(tupleIdx, val);

switch (fs.getType()) {
case DataType.BAG:
  if (obj instanceof DataBag) {
    DataBag db = (DataBag)obj;
    if (fs.getSchema()!=null) {
      ResourceFieldSchema tupleFs = fs.getSchema().getFields()[0];
      Iterator<Tuple> iter = db.iterator();
      if (innerSchema==null)
        return t;
      if (innerSchema.getFields().length!=t.size())
        return null;
      int i=0;
      for (ResourceFieldSchema fieldSchema : innerSchema.getFields()) {
        Object field = convertWithSchema(t.get(i), fieldSchema);
        t.set(i, field);
        i++;
  if (obj instanceof Map) {
    if (fs!=null && fs.getSchema()!=null) {
      ResourceFieldSchema innerFieldSchema = fs.getSchema().getFields()[0];
      Map m = (Map)obj;
      for (Object entry : m.entrySet()) {

int numFields = t.size();
Schema s = new Schema();
for (int i = 0; i < numFields; i++) {
  try {
    s.add(DataType.determineFieldSchema(t.get(i)));
return new ResourceSchema(s);

if (target instanceof PigTuple) {
  PigTuple pt = (PigTuple) target;
  ResourceFieldSchema[] fields = pt.getSchema().getSchema().getFields();
    if (fieldName.equals(field.getName())) {
      foundField = true;
      byte type = field.getType();
      try {
        Object object = pt.getTuple().get(i);
        if (DataType.isAtomic(type)) {
          target = object.toString();

ResourceSchema tupleSchema = schema.getSchema();
ResourceFieldSchema[] fields = tupleSchema.getFields();
Assert.isTrue(fields.length == 1, "When using JSON input, only one field is expected");
  object = pt.getTuple().get(0);
  type = pt.getTuple().getType(0);
} catch (Exception ex) {
  throw new EsHadoopIllegalStateException("Encountered exception while processing tuple", ex);

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
  this.writer = writer;
  Properties props = getUDFProperties();
  String s = props.getProperty(ResourceSchema.class.getName());
  if (!StringUtils.hasText(s)) {
    log.warn("No resource schema found; using an empty one....");
    this.schema = new ResourceSchema();
  }
  else {
    this.schema = IOUtils.deserializeFromBase64(s);
  }
  this.pigTuple = new PigTuple(schema);
}

ResourceFieldSchema[] fields = schema.getFields();
Tuple t = tupleFactory.newTuple(fields.length);
    t.set(i, readField(p, fields[i], i));

  throws ExecException, IOException {
final ResourceFieldSchema[] fieldSchemas = (schema == null) ? null
    : schema.getFields();
Iterator<Object> tupleIter = tuple.iterator();
if (1 >= tuple.size()) {
  log.debug("Ignoring tuple of size " + tuple.size());
  return Collections.emptyList();

@Test
public void testSchema() throws SQLException, IOException {
  
  final Configuration configuration = mock(Configuration.class);
  when(configuration.get(PhoenixConfigurationUtil.SCHEMA_TYPE)).thenReturn(SchemaType.TABLE.name());
  final ResourceSchema actual = PhoenixPigSchemaUtil.getResourceSchema(
      configuration, new Dependencies() {
        List<ColumnInfo> getSelectColumnMetadataList(
            Configuration configuration) throws SQLException {
          return Lists.newArrayList(ID_COLUMN, NAME_COLUMN);
        }
      });        
  // expected schema.
  final ResourceFieldSchema[] fields = new ResourceFieldSchema[2];
  fields[0] = new ResourceFieldSchema().setName("ID")
                      .setType(DataType.LONG);
  fields[1] = new ResourceFieldSchema().setName("NAME")
                      .setType(DataType.CHARARRAY);
  final ResourceSchema expected = new ResourceSchema().setFields(fields);
  
  assertEquals(expected.toString(), actual.toString());
  
}

  continue;
Schema toGen = Schema.getPigSchema(new ResourceSchema(logicalSchema));
      nullTuple.set(j, null);
Schema leftSchema = null;
if (logicalSchema != null) {
  leftSchema = Schema.getPigSchema(new ResourceSchema(logicalSchema));
  rightSchema = Schema.getPigSchema(new ResourceSchema(logicalSchema));
  mergedSchema = Schema.getPigSchema(new ResourceSchema(logicalSchema));

private Result write(Object object, ResourceFieldSchema field, Generator generator) {
  byte type = (field != null ? field.getType() : DataType.findType(object));
    throw new EsHadoopSerializationException("Big decimals are not supported by Elasticsearch - consider using a different type (such as string)");
  case DataType.MAP:
    ResourceSchema nestedSchema = field.getSchema();
    ResourceFieldSchema valueType = (nestedSchema != null ? nestedSchema.getFields()[0] : null);
    nestedSchema = field.getSchema();
    ResourceFieldSchema bagType = nestedSchema.getFields()[0];

private Result writeTuple(Object object, ResourceFieldSchema field, Generator generator, boolean writeTupleFieldNames, boolean isRoot) {
  ResourceSchema nestedSchema = field.getSchema();
    for (ResourceFieldSchema nestedField : nestedSchema.getFields()) {
      allEmpty = (nestedField.getSchema() == null && !isPopulatedMixedValueMap(nestedField, currentField, currentTuple) && PigUtils.isComplexType(nestedField));
  ResourceFieldSchema[] nestedFields = nestedSchema.getFields();
  List<Object> tuples = ((Tuple) object).getAll();
      String name = nestedFields[i].getName();

@Override
@SuppressWarnings("rawtypes")
public void prepareToRead(RecordReader reader, PigSplit split)
    throws IOException {
  this.reader = (OmnitureDataFileRecordReader) reader;
  ResourceSchema schema = new ResourceSchema(Utils.getSchemaFromString(this.schema));
  fields = schema.getFields();
}

if (fieldSchema.getSchema()!=null && fieldSchema.getSchema().getFields().length!=0) {
  ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields();
    t.append(field);
      t.append(value);
      break;
      t.append(value);
      mOut.reset();

/**
 * This method adds FieldSchema of 'input source tag/path' as the first
 * field. This will be called only when PigStorage is invoked with
 * '-tagFile' or '-tagPath' option and the schema file is present to be
 * loaded.
 *
 * @param schema
 * @param fieldName
 * @return ResourceSchema
 */
public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) {
  ResourceFieldSchema[] fieldSchemas = schema.getFields();
  ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY));
  ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1];
  fieldSchemasWithSourceTag[0] = sourceTagSchema;
  for(int j = 0; j < fieldSchemas.length; j++) {
    fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j];
  }
  return schema.setFields(fieldSchemasWithSourceTag);
}

case DataType.BAG:
 Schema innerBagSchema = resourceSchemaToAvroSchema(
   schema.getFields()[0].getSchema(), name, null,
   definedRecordNames,
   doubleColonsToDoubleUnderscores);
  throw new IOException("AvroStorage can't save maps with untyped values; please specify a value type or a schema.");
 byte innerType = schema.getFields()[0].getType();
 String desc = schema.getFields()[0].getDescription();
 if (desc != null) {
  if (desc.equals("autogenerated from Pig Field Schema")) {
  innerSchema = createNullableUnion(
    Schema.createMap(resourceSchemaToAvroSchema(
      schema.getFields()[0].getSchema(),
      name, nameSpace, definedRecordNames,
      doubleColonsToDoubleUnderscores)));
   "Don't know how to encode type "
     + DataType.findTypeName(type) + " in schema "
     + ((schema == null) ? "" : schema.toString()) 
     + "\n");

@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
  if(schema != null) {
    return schema;
  }
  PhoenixConfigurationUtil.loadHBaseConfiguration(job);
  final Configuration configuration = job.getConfiguration();
  this.initializePhoenixPigConfiguration(location, configuration);
  this.schema = PhoenixPigSchemaUtil.getResourceSchema(this.config);
  if(LOG.isDebugEnabled()) {
    LOG.debug(String.format("Resource Schema generated for location [%s] is [%s]", location, schema.toString()));
  }
  this.storeInUDFContext(this.contextSignature, RESOURCE_SCHEMA_SIGNATURE, ObjectSerializer.serialize(schema));
  return schema;
}

if (rs1.getVersion() != rs2.getVersion() 
    || !Arrays.equals(rs1.getSortKeys(), rs2.getSortKeys())
    || !Arrays.equals(rs1.getSortKeyOrders(), rs2.getSortKeyOrders())) {            
  return false;
ResourceFieldSchema[] rfs1 = rs1.getFields();
ResourceFieldSchema[] rfs2 = rs1.getFields();
  if (rfs1[i].getName()==null && rfs2[i].getName()!=null ||
      rfs1[i].getName()!=null && rfs2[i].getName()==null)
    return false;
  if (rfs1[i].getName()==null && rfs2[i].getName()==null) {
    return false;
  if (!equals(rfs1[i].getSchema(), rfs2[i].getSchema())) {
    return false;

Javadoc

A represenation of a schema used to communicate with load and store functions. This is separate from Schema, which is an internal Pig representation of a schema.

Most used methods

<init>
Only for use by Pig internal code. Construct a ResourceSchema from a LogicalSchema
setFields
Set all the fields. If fields are not currently null the new fields will be silently ignored.
getFields
Get field schema for each field
toString
fieldNames
Get all field names.
equals
Test whether two ResourceSchemas are the same. Two schemas are said to be the same if they are both
getSortKeyOrders
Get order for sort keys.
getSortKeys
Get the sort keys for this data.
getVersion
Get the version of this schema.
stringifyResourceSchema

Popular in Java

Reactive rest calls using spring rest template
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (ScheduledExecutorService)
scheduleAtFixedRate (Timer)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
JTable (javax.swing)
Top PhpStorm plugins

How to useResourceSchema in org.apache.pig

Best Java code snippets using org.apache.pig.ResourceSchema (Showing top 20 results out of 315)

Refine search

How to use
ResourceSchema
in
org.apache.pig