org.apache.gobblin.source.workunit.Extract java code examples

/**
 * Get the {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit}.
 *
 * @return {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit}
 */
public Extract getExtract() {
 return new Extract(this.workUnit.getExtract());
}

@Override
public int hashCode() {
 return (this.getNamespace() + this.getTable() + this.getExtractId()).hashCode();
}

/**
 * Set a (non-globally) unique ID for this {@link Extract}.
 *
 * @param extractId unique ID for this {@link Extract}
 */
public void setExtractId(String extractId) {
 setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId);
}

/**
 * Get the writer output file path corresponding to this {@link Extract}.
 *
 * @return writer output file path corresponding to this {@link Extract}
 * @deprecated As {@code this.getIsFull} is deprecated.
 */
@Deprecated
public String getOutputFilePath() {
 return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_"
   + (this.getIsFull() ? "full" : "append");
}

/**
 * Add more primary keys to the existing set of primary keys.
 *
 * @param primaryKeyFieldName primary key names
 * @deprecated @deprecated It is recommended to add primary keys in {@code WorkUnit} instead of {@code Extract}.
 */
@Deprecated
public void addPrimaryKey(String... primaryKeyFieldName) {
 StringBuilder sb = new StringBuilder(getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, ""));
 Joiner.on(",").appendTo(sb, primaryKeyFieldName);
 setProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, sb.toString());
}

 /**
  * Returns a unique {@link Extract} instance.
  * Any two calls of this method from the same {@link ExtractFactory} instance guarantees to
  * return {@link Extract}s with different IDs.
  *
  * @param type {@link TableType}
  * @param namespace dot separated namespace path
  * @param table table name
  * @return a unique {@link Extract} instance
  */
 public synchronized Extract getUniqueExtract(TableType type, String namespace, String table) {
  Extract newExtract = new Extract(type, namespace, table);
  while (this.createdInstances.contains(newExtract)) {
   if (Strings.isNullOrEmpty(newExtract.getExtractId())) {
    newExtract.setExtractId(this.dtf.print(new DateTime()));
   } else {
    DateTime extractDateTime = this.dtf.parseDateTime(newExtract.getExtractId());
    newExtract.setExtractId(this.dtf.print(extractDateTime.plusSeconds(1)));
   }
  }
  this.createdInstances.add(newExtract);
  return newExtract;
 }
}

if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) {
 this.previousTableState.addAll(pre);

@Test
public void testGetDefaultWriterFilePath() {
 String namespace = "gobblin.test";
 String tableName = "test-table";
 SourceState sourceState = new SourceState();
 WorkUnit state = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName));
 Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path(state.getExtract().getOutputFilePath()));
 Assert.assertEquals(WriterUtils.getWriterFilePath(state, 2, 0), new Path(state.getExtract().getOutputFilePath(),
   ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0"));
}

private Extract getExtractForFile(PartitionAwareFileRetriever.FileInfo file,
  String topicName,
  String namespace,
  Map<Long, Extract> extractMap) {
 Extract extract = extractMap.get(file.getWatermarkMsSinceEpoch());
 if (extract == null) {
  // Create an extract object for the dayPath
  extract = new Extract(this.tableType, namespace, topicName);
  LOG.info("Created extract: " + extract.getExtractId() + " for path " + topicName);
  extractMap.put(file.getWatermarkMsSinceEpoch(), extract);
 }
 return extract;
}

table.setNamespace(extract.getNamespace());
table.setName(extract.getTable());
if (extract.hasType()) {
 table.setType(TableTypeEnum.valueOf(extract.getType().name()));

@Test
public void schemaWithRecordOfEnum()
  throws Exception {
 String testName = "schemaWithRecordOfEnum";
 JsonObject schema = getSchemaData(testName).getAsJsonObject();
 JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
 RecordConverter converter = new RecordConverter(new JsonSchema(schema), state,
   buildNamespace(state.getExtract().getNamespace(), "something"));
 Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}

@Override
public MessageType convertSchema(JsonArray inputSchema, WorkUnitState workUnit)
  throws SchemaConversionException {
 String fieldName = workUnit.getExtract().getTable();
 JsonSchema jsonSchema = new JsonSchema(inputSchema);
 jsonSchema.setColumnName(fieldName);
 recordConverter = new RecordConverter(jsonSchema, ROOT);
 return (MessageType) recordConverter.schema();
}

 /**
  * Verify that each {@link Extract} created by an {@ExtractFactory} has a unique ID.
  */
 @Test
 public void testGetUniqueExtract() {
  ExtractFactory extractFactory = new ExtractFactory("yyyyMMddHHmmss");
  Set<String> extractIDs = Sets.newHashSet();
  int numOfExtracts = 100;
  for (int i = 0; i < numOfExtracts; i++) {
   extractIDs
     .add(extractFactory.getUniqueExtract(Extract.TableType.APPEND_ONLY, "namespace", "table").getExtractId());
  }
  Assert.assertEquals(extractIDs.size(), numOfExtracts);
 }
}

/**
 * Create a new properly populated {@link Extract} instance.
 *
 * <p>
 *   This method should always return a new unique {@link Extract} instance.
 * </p>
 *
 * @param type {@link org.apache.gobblin.source.workunit.Extract.TableType}
 * @param namespace namespace of the table this extract belongs to
 * @param table name of the table this extract belongs to
 * @return a new unique {@link Extract} instance
 *
 * @Deprecated Use {@link org.apache.gobblin.source.extractor.extract.AbstractSource#createExtract(
 *org.apache.gobblin.source.workunit.Extract.TableType, String, String)}
 */
@Deprecated
public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) {
 Extract extract = new Extract(this, type, namespace, table);
 while (EXTRACT_SET.contains(extract)) {
  if (Strings.isNullOrEmpty(extract.getExtractId())) {
   extract.setExtractId(DTF.print(new DateTime()));
  } else {
   DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId());
   extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1)));
  }
 }
 EXTRACT_SET.add(extract);
 return extract;
}

/**
 * Get the writer output file path corresponding to this {@link Extract}.
 *
 * @return writer output file path corresponding to this {@link Extract}
 * @deprecated As {@code this.getIsFull} is deprecated.
 */
@Deprecated
public String getOutputFilePath() {
 return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_"
   + (this.getIsFull() ? "full" : "append");
}

@Override
public Schema convertSchema(JsonArray schema, WorkUnitState workUnit)
  throws SchemaConversionException {
 try {
  JsonSchema jsonSchema = new JsonSchema(schema);
  jsonSchema.setColumnName(workUnit.getExtract().getTable());
  recordConverter = new RecordConverter(jsonSchema, workUnit, workUnit.getExtract().getNamespace());
 } catch (UnsupportedDateTypeException e) {
  throw new SchemaConversionException(e);
 }
 Schema recordSchema = recordConverter.schema();
 if (workUnit
   .getPropAsBoolean(CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED, DEFAULT_CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED)) {
  return this.generateSchemaWithNullifiedField(workUnit, recordSchema);
 }
 return recordSchema;
}

@Test
public void testGetDefaultWriterFilePathWithWorkUnitState() {
 String namespace = "gobblin.test";
 String tableName = "test-table";
 SourceState sourceState = new SourceState();
 WorkUnit workUnit = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName));
 WorkUnitState workUnitState = new WorkUnitState(workUnit);
 Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 0, 0), new Path(workUnitState.getExtract()
   .getOutputFilePath()));
 Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 2, 0), new Path(workUnitState.getExtract()
   .getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0"));
}

private Extract getExtractForFile(PartitionAwareFileRetriever.FileInfo file,
  String topicName,
  String namespace,
  Map<Long, Extract> extractMap) {
 Extract extract = extractMap.get(file.getWatermarkMsSinceEpoch());
 if (extract == null) {
  // Create an extract object for the dayPath
  extract = new Extract(this.tableType, namespace, topicName);
  LOG.info("Created extract: " + extract.getExtractId() + " for path " + topicName);
  extractMap.put(file.getWatermarkMsSinceEpoch(), extract);
 }
 return extract;
}

table.setNamespace(extract.getNamespace());
table.setName(extract.getTable());
if (extract.hasType()) {
 table.setType(TableTypeEnum.valueOf(extract.getType().name()));

@Test
public void schemaWithRecordOfArray()
  throws Exception {
 String testName = "schemaWithRecordOfArray";
 JsonObject schema = getSchemaData(testName).getAsJsonObject();
 JsonObject expected = getExpectedSchema(testName).getAsJsonObject();
 RecordConverter converter = new RecordConverter(new JsonSchema(schema), state,
   buildNamespace(state.getExtract().getNamespace(), "something"));
 Assert.assertEquals(avroSchemaToJsonElement(converter), expected);
}

Javadoc

A class representing all the base attributes required by all tables types. Subclasses will be expected to validate each table type for their respective required attributes.

The extract ID only needs to be unique for Extracts belonging to the same namespace/table. One or more WorkUnits can share the same extract ID. WorkUnits that do share an extract ID will be considered parts of a single Extract for the purpose of applying publishing policies.

Most used methods

<init>
Deep copy constructor.
getTable
Get the name of the table.
getNamespace
Get the dot-separated namespace of the table.
getExtractId
Get a (non-globally) unique ID for this Extract.
setProp
contains
equals
getIsFull
Check if this Extract represents the full contents of the source table.
getOutputFilePath
Get the writer output file path corresponding to this Extract.
getPreviousTableState
Get the previous table State.
getProp
getPropAsBoolean

Popular in Java

Finding current android device location
setContentView (Activity)
getSupportFragmentManager (FragmentActivity)
compareTo (BigDecimal)
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
ImageIO (javax.imageio)
Top PhpStorm plugins

How to useExtract in org.apache.gobblin.source.workunit

Best Java code snippets using org.apache.gobblin.source.workunit.Extract (Showing top 20 results out of 315)

How to use
Extract
in
org.apache.gobblin.source.workunit