@Override public Dataset<GenericRecord> run() { return Datasets.load(datasetUri); } });
@Override public void sync() throws EventDeliveryException { if (nEventsHandled > 0) { if (Formats.PARQUET.equals( dataset.getDataset().getDescriptor().getFormat())) { // We need to close the writer on sync if we're writing to a Parquet // dataset close(); } else { if (writer instanceof Syncable) { ((Syncable) writer).sync(); } } } }
private SavePolicy(Context context) { String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI); Preconditions.checkArgument(uri != null, "Must set " + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY + "=save"); if (Datasets.exists(uri)) { dataset = Datasets.load(uri, AvroFlumeEvent.class); } else { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(AvroFlumeEvent.class) .build(); dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class); } nEventsHandled = 0; }
DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); Preconditions.checkArgument(allowedFormats().contains(format.getName()), "Unsupported format: " + format.getName()); Schema newSchema = descriptor.getSchema(); if (datasetSchema == null || !newSchema.equals(datasetSchema)) { this.datasetSchema = descriptor.getSchema(); this.reuseEntity = !(Formats.PARQUET.equals(format)); DEFAULT_FLUSHABLE_COMMIT_ON_BATCH) && (Formats.AVRO.equals(format)); DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format)); this.datasetName = view.getDataset().getName(); this.writer = view.newWriter();
throw new SchemaNotFoundException("If the schema is not a JSON string, a scheme must be specified in the URI " + "(ex: dataset:, view:, resource:, file:, hdfs:, etc)."); return Datasets.load(uri).getDataset().getDescriptor().getSchema(); } else if ("resource".equals(uri.getScheme())) { try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) { throw new SchemaNotFoundException("Cannot read schema of missing dataset: " + uri, e); } catch (IOException e) { throw new SchemaNotFoundException("Failed while reading " + uri + ": " + e.getMessage(), e);
final Schema schema = target.getDataset().getDescriptor().getSchema(); target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true /* cannot roll back the write */); getLogger().error(e.getMessage()); getLogger().debug("Incompatible schema error", e); session.transfer(flowFile, INCOMPATIBLE);
@Override public void process(InputStream in) throws IOException { try (DataFileStream<Record> stream = new DataFileStream<>( in, AvroUtil.newDatumReader(schema, Record.class))) { IncompatibleSchemaException.check( SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema); long written = 0L; try (DatasetWriter<Record> writer = target.newWriter()) { for (Record record : stream) { writer.write(record); written += 1; } } finally { session.adjustCounter("Stored records", written, true /* cannot roll back the write */); } } } });
@Override public void handle(Event event, Throwable cause) throws EventDeliveryException { try { if (writer == null) { writer = dataset.newWriter(); } final AvroFlumeEvent avroEvent = new AvroFlumeEvent(); avroEvent.setBody(ByteBuffer.wrap(event.getBody())); avroEvent.setHeaders(toCharSeqMap(event.getHeaders())); writer.write(avroEvent); nEventsHandled++; } catch (RuntimeException ex) { throw new EventDeliveryException(ex); } }
@Override public ValidationResult validate(String subject, String uri, ValidationContext context) { String message = "not set"; boolean isValid = true; if (uri.trim().isEmpty()) { isValid = false; } else { final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { new URIBuilder(URI.create(uri)).build(); } catch (RuntimeException e) { message = e.getMessage(); isValid = false; } } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation("Dataset URI is invalid: " + message) .valid(isValid) .build(); } };
@Override public void close() throws EventDeliveryException { if (nEventsHandled > 0) { try { writer.close(); } catch (RuntimeException ex) { throw new EventDeliveryException(ex); } finally { writer = null; nEventsHandled = 0; } } }
/** * Parse the event using the entity parser and write the entity to the dataset. * * @param event The event to write * @throws EventDeliveryException An error occurred trying to write to the dataset that couldn't or shouldn't be handled by the failure policy. */ @VisibleForTesting void write(Event event) throws EventDeliveryException { try { this.entity = parser.parse(event, reuseEntity ? entity : null); this.bytesParsed += event.getBody().length; // writeEncoded would be an optimization in some cases, but HBase // will not support it and partitioned Datasets need to get partition // info from the entity Object. We may be able to avoid the // serialization round-trip otherwise. writer.write(entity); } catch (NonRecoverableEventException ex) { failurePolicy.handle(event, ex); } catch (DataFileWriter.AppendWriteException ex) { failurePolicy.handle(event, ex); } catch (RuntimeException ex) { Throwables.propagateIfInstanceOf(ex, EventDeliveryException.class); throw new EventDeliveryException(ex); } }
ObjectMapper mapper = new ObjectMapper(); mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); JsonNode node = mapper.readTree(new URL(url)); node = node.get("duObjects"); TypeReference<List<Dataset>> typeRef = new TypeReference<List<Dataset>>() { }; List<Dataset> list = mapper.readValue(node.traverse(), typeRef); for (int i = 0; i < list.size(); i++) { Dataset dataSet = list.get(i); System.out.println(dataSet.getName()); }
private static Schema parseSchema(String literal) { try { return new Schema.Parser().parse(literal); } catch (RuntimeException e) { throw new SchemaNotFoundException("Failed to parse schema: " + literal, e); } }
error = e.getMessage();
private View<Record> load(ProcessContext context, FlowFile file) { String uri = context.getProperty(KITE_DATASET_URI) .evaluateAttributeExpressions(file) .getValue(); return Datasets.load(uri, Record.class); } }
DEFAULT_NAMESPACE); this.datasetUri = new URIBuilder(repositoryURI, namespace, datasetName) .build();
if (writer != null) { try { writer.close();
private static Schema parseSchema(URI uri, InputStream in) throws IOException { try { return new Schema.Parser().parse(in); } catch (RuntimeException e) { throw new SchemaNotFoundException("Failed to parse schema at " + uri, e); } }
@Override public ValidationResult validate(String subject, String uri, ValidationContext context) { Configuration conf = getConfiguration(context.getProperty(CONF_XML_FILES).evaluateAttributeExpressions().getValue()); String error = null; if(StringUtils.isBlank(uri)) { return new ValidationResult.Builder().subject(subject).input(uri).explanation("Schema cannot be null.").valid(false).build(); } final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { getSchema(uri, conf); } catch (SchemaNotFoundException e) { error = e.getMessage(); } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation(error) .valid(error == null) .build(); } };