/** * Get the {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit}. * * @return {@link org.apache.gobblin.source.workunit.Extract} associated with the {@link WorkUnit} */ public Extract getExtract() { return new Extract(this.workUnit.getExtract()); }
@Override public int hashCode() { return (this.getNamespace() + this.getTable() + this.getExtractId()).hashCode(); }
/** * Set a (non-globally) unique ID for this {@link Extract}. * * @param extractId unique ID for this {@link Extract} */ public void setExtractId(String extractId) { setProp(ConfigurationKeys.EXTRACT_EXTRACT_ID_KEY, extractId); }
/** * Get the writer output file path corresponding to this {@link Extract}. * * @return writer output file path corresponding to this {@link Extract} * @deprecated As {@code this.getIsFull} is deprecated. */ @Deprecated public String getOutputFilePath() { return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_" + (this.getIsFull() ? "full" : "append"); }
/** * Add more primary keys to the existing set of primary keys. * * @param primaryKeyFieldName primary key names * @deprecated @deprecated It is recommended to add primary keys in {@code WorkUnit} instead of {@code Extract}. */ @Deprecated public void addPrimaryKey(String... primaryKeyFieldName) { StringBuilder sb = new StringBuilder(getProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, "")); Joiner.on(",").appendTo(sb, primaryKeyFieldName); setProp(ConfigurationKeys.EXTRACT_PRIMARY_KEY_FIELDS_KEY, sb.toString()); }
/** * Returns a unique {@link Extract} instance. * Any two calls of this method from the same {@link ExtractFactory} instance guarantees to * return {@link Extract}s with different IDs. * * @param type {@link TableType} * @param namespace dot separated namespace path * @param table table name * @return a unique {@link Extract} instance */ public synchronized Extract getUniqueExtract(TableType type, String namespace, String table) { Extract newExtract = new Extract(type, namespace, table); while (this.createdInstances.contains(newExtract)) { if (Strings.isNullOrEmpty(newExtract.getExtractId())) { newExtract.setExtractId(this.dtf.print(new DateTime())); } else { DateTime extractDateTime = this.dtf.parseDateTime(newExtract.getExtractId()); newExtract.setExtractId(this.dtf.print(extractDateTime.plusSeconds(1))); } } this.createdInstances.add(newExtract); return newExtract; } }
if (previousExtract.getNamespace().equals(namespace) && previousExtract.getTable().equals(table)) { this.previousTableState.addAll(pre);
@Test public void testGetDefaultWriterFilePath() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit state = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path(state.getExtract().getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 2, 0), new Path(state.getExtract().getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
private Extract getExtractForFile(PartitionAwareFileRetriever.FileInfo file, String topicName, String namespace, Map<Long, Extract> extractMap) { Extract extract = extractMap.get(file.getWatermarkMsSinceEpoch()); if (extract == null) { // Create an extract object for the dayPath extract = new Extract(this.tableType, namespace, topicName); LOG.info("Created extract: " + extract.getExtractId() + " for path " + topicName); extractMap.put(file.getWatermarkMsSinceEpoch(), extract); } return extract; }
table.setNamespace(extract.getNamespace()); table.setName(extract.getTable()); if (extract.hasType()) { table.setType(TableTypeEnum.valueOf(extract.getType().name()));
@Test public void schemaWithRecordOfEnum() throws Exception { String testName = "schemaWithRecordOfEnum"; JsonObject schema = getSchemaData(testName).getAsJsonObject(); JsonObject expected = getExpectedSchema(testName).getAsJsonObject(); RecordConverter converter = new RecordConverter(new JsonSchema(schema), state, buildNamespace(state.getExtract().getNamespace(), "something")); Assert.assertEquals(avroSchemaToJsonElement(converter), expected); }
@Override public MessageType convertSchema(JsonArray inputSchema, WorkUnitState workUnit) throws SchemaConversionException { String fieldName = workUnit.getExtract().getTable(); JsonSchema jsonSchema = new JsonSchema(inputSchema); jsonSchema.setColumnName(fieldName); recordConverter = new RecordConverter(jsonSchema, ROOT); return (MessageType) recordConverter.schema(); }
/** * Verify that each {@link Extract} created by an {@ExtractFactory} has a unique ID. */ @Test public void testGetUniqueExtract() { ExtractFactory extractFactory = new ExtractFactory("yyyyMMddHHmmss"); Set<String> extractIDs = Sets.newHashSet(); int numOfExtracts = 100; for (int i = 0; i < numOfExtracts; i++) { extractIDs .add(extractFactory.getUniqueExtract(Extract.TableType.APPEND_ONLY, "namespace", "table").getExtractId()); } Assert.assertEquals(extractIDs.size(), numOfExtracts); } }
/** * Create a new properly populated {@link Extract} instance. * * <p> * This method should always return a new unique {@link Extract} instance. * </p> * * @param type {@link org.apache.gobblin.source.workunit.Extract.TableType} * @param namespace namespace of the table this extract belongs to * @param table name of the table this extract belongs to * @return a new unique {@link Extract} instance * * @Deprecated Use {@link org.apache.gobblin.source.extractor.extract.AbstractSource#createExtract( *org.apache.gobblin.source.workunit.Extract.TableType, String, String)} */ @Deprecated public synchronized Extract createExtract(Extract.TableType type, String namespace, String table) { Extract extract = new Extract(this, type, namespace, table); while (EXTRACT_SET.contains(extract)) { if (Strings.isNullOrEmpty(extract.getExtractId())) { extract.setExtractId(DTF.print(new DateTime())); } else { DateTime extractDateTime = DTF.parseDateTime(extract.getExtractId()); extract.setExtractId(DTF.print(extractDateTime.plusSeconds(1))); } } EXTRACT_SET.add(extract); return extract; }
/** * Get the writer output file path corresponding to this {@link Extract}. * * @return writer output file path corresponding to this {@link Extract} * @deprecated As {@code this.getIsFull} is deprecated. */ @Deprecated public String getOutputFilePath() { return this.getNamespace().replaceAll("\\.", "/") + "/" + this.getTable() + "/" + this.getExtractId() + "_" + (this.getIsFull() ? "full" : "append"); }
@Override public Schema convertSchema(JsonArray schema, WorkUnitState workUnit) throws SchemaConversionException { try { JsonSchema jsonSchema = new JsonSchema(schema); jsonSchema.setColumnName(workUnit.getExtract().getTable()); recordConverter = new RecordConverter(jsonSchema, workUnit, workUnit.getExtract().getNamespace()); } catch (UnsupportedDateTypeException e) { throw new SchemaConversionException(e); } Schema recordSchema = recordConverter.schema(); if (workUnit .getPropAsBoolean(CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED, DEFAULT_CONVERTER_AVRO_NULLIFY_FIELDS_ENABLED)) { return this.generateSchemaWithNullifiedField(workUnit, recordSchema); } return recordSchema; }
@Test public void testGetDefaultWriterFilePathWithWorkUnitState() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit workUnit = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); WorkUnitState workUnitState = new WorkUnitState(workUnit); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 0, 0), new Path(workUnitState.getExtract() .getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 2, 0), new Path(workUnitState.getExtract() .getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
private Extract getExtractForFile(PartitionAwareFileRetriever.FileInfo file, String topicName, String namespace, Map<Long, Extract> extractMap) { Extract extract = extractMap.get(file.getWatermarkMsSinceEpoch()); if (extract == null) { // Create an extract object for the dayPath extract = new Extract(this.tableType, namespace, topicName); LOG.info("Created extract: " + extract.getExtractId() + " for path " + topicName); extractMap.put(file.getWatermarkMsSinceEpoch(), extract); } return extract; }
table.setNamespace(extract.getNamespace()); table.setName(extract.getTable()); if (extract.hasType()) { table.setType(TableTypeEnum.valueOf(extract.getType().name()));
@Test public void schemaWithRecordOfArray() throws Exception { String testName = "schemaWithRecordOfArray"; JsonObject schema = getSchemaData(testName).getAsJsonObject(); JsonObject expected = getExpectedSchema(testName).getAsJsonObject(); RecordConverter converter = new RecordConverter(new JsonSchema(schema), state, buildNamespace(state.getExtract().getNamespace(), "something")); Assert.assertEquals(avroSchemaToJsonElement(converter), expected); }